1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18 pub oid: ObjectId,
19 pub compressed_size: u64,
20 pub uncompressed_size: u64,
21 pub offset: u64,
22}
23
24pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55 pub window: usize,
58 pub depth: usize,
60 pub prefer_ofs_delta: bool,
64 pub thin_bases: HashMap<ObjectId, EncodedObject>,
69 pub reorder: bool,
75 pub compression_level: u32,
77}
78
79impl Default for PackWriteOptions {
80 fn default() -> Self {
81 Self::new()
82 }
83}
84
85impl PackWriteOptions {
86 pub fn new() -> Self {
90 Self {
91 window: DEFAULT_PACK_WINDOW,
92 depth: DEFAULT_PACK_DEPTH,
93 prefer_ofs_delta: true,
94 thin_bases: HashMap::new(),
95 reorder: true,
96 compression_level: 6,
97 }
98 }
99
100 pub fn with_window(mut self, window: usize) -> Self {
102 self.window = window;
103 self
104 }
105
106 pub fn with_depth(mut self, depth: usize) -> Self {
108 self.depth = depth;
109 self
110 }
111
112 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
115 self.prefer_ofs_delta = prefer_ofs_delta;
116 self
117 }
118
119 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
121 self.thin_bases = thin_bases;
122 self
123 }
124
125 pub fn with_reorder(mut self, reorder: bool) -> Self {
128 self.reorder = reorder;
129 self
130 }
131
132 pub fn with_compression_level(mut self, level: u32) -> Self {
134 self.compression_level = level.min(9);
135 self
136 }
137}
138
139#[derive(Debug, Clone, PartialEq, Eq)]
140pub struct RepackPolicy {
141 pub write_bitmaps: bool,
142 pub cruft_packs: bool,
143 pub geometric_factor: Option<u8>,
144}
145
146#[derive(Debug, Clone, PartialEq, Eq)]
147pub struct PackFile {
148 pub version: u32,
149 pub entries: Vec<PackObject>,
150 pub checksum: ObjectId,
151}
152
153#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct PackObject {
155 pub entry: PackEntry,
156 pub object: EncodedObject,
157}
158
159#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct PackVerifyStat {
163 pub oid: ObjectId,
165 pub object_type: ObjectType,
167 pub size: u64,
169 pub size_in_pack: u64,
172 pub offset: u64,
174 pub delta_depth: u32,
176 pub base_oid: Option<ObjectId>,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct PackVerifyStats {
185 pub objects: Vec<PackVerifyStat>,
186 pub checksum: ObjectId,
187}
188
189#[derive(Debug, Clone, PartialEq, Eq)]
190pub struct PackWrite {
191 pub pack: Vec<u8>,
192 pub index: Vec<u8>,
193 pub checksum: ObjectId,
194 pub entries: Vec<PackIndexEntry>,
195 pub delta_count: u32,
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub struct PackInput<'a> {
200 pub oid: &'a ObjectId,
201 pub object: &'a EncodedObject,
202}
203
204#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct PackIndexBuild {
206 pub index: Vec<u8>,
207 pub pack_checksum: ObjectId,
208 pub entries: Vec<PackIndexEntry>,
209}
210
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct PackIndex {
213 pub version: u32,
214 pub fanout: [u32; 256],
215 pub entries: Vec<PackIndexEntry>,
216 pub pack_checksum: ObjectId,
217 pub index_checksum: ObjectId,
218}
219
220#[derive(Debug, Clone, PartialEq, Eq)]
221pub struct PackIndexView<'a> {
222 pub version: u32,
223 pub count: usize,
224 pub fanout: [u32; 256],
225 pub pack_checksum: ObjectId,
226 pub index_checksum: ObjectId,
227 bytes: &'a [u8],
228 format: ObjectFormat,
229 tables: PackIndexViewTables,
230}
231
232pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
233 fn as_bytes(&self) -> &[u8];
234}
235
236impl<T> PackIndexByteSource for T
237where
238 T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
239{
240 fn as_bytes(&self) -> &[u8] {
241 self.as_ref()
242 }
243}
244
245#[derive(Debug)]
246struct SharedIndexBytes(Arc<[u8]>);
247
248impl PackIndexByteSource for SharedIndexBytes {
249 fn as_bytes(&self) -> &[u8] {
250 self.0.as_ref()
251 }
252}
253
254#[derive(Debug, Clone)]
255pub struct PackIndexViewData {
256 pub version: u32,
257 pub count: usize,
258 pub fanout: [u32; 256],
259 pub pack_checksum: ObjectId,
260 pub index_checksum: ObjectId,
261 bytes: Arc<dyn PackIndexByteSource>,
262 format: ObjectFormat,
263 tables: PackIndexViewTables,
264}
265
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub struct PackIndexEntry {
268 pub oid: ObjectId,
269 pub crc32: u32,
270 pub offset: u64,
271}
272
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
274pub struct PackIndexLookup {
275 pub crc32: u32,
276 pub offset: u64,
277}
278
279#[derive(Debug, Clone, PartialEq, Eq)]
280enum PackIndexViewTables {
281 V1 {
282 entry_table: Range<usize>,
283 },
284 V2 {
285 oid_table: Range<usize>,
286 crc_table: Range<usize>,
287 small_offset_table: Range<usize>,
288 large_offset_table: Range<usize>,
289 },
290}
291
292#[derive(Debug, Clone, PartialEq, Eq)]
293pub struct PackReverseIndex {
294 pub version: u32,
295 pub format: ObjectFormat,
296 pub positions: Vec<u32>,
297 pub pack_checksum: ObjectId,
298 pub index_checksum: ObjectId,
299}
300
301#[derive(Debug, Clone, PartialEq, Eq)]
302pub struct PackMtimes {
303 pub version: u32,
304 pub format: ObjectFormat,
305 pub mtimes: Vec<u32>,
306 pub pack_checksum: ObjectId,
307 pub index_checksum: ObjectId,
308}
309
310#[derive(Debug, Clone, PartialEq, Eq)]
311pub struct PackBitmapIndex {
312 pub version: u16,
313 pub format: ObjectFormat,
314 pub options: u16,
315 pub pack_checksum: ObjectId,
316 pub index_checksum: ObjectId,
317 pub type_bitmaps: PackBitmapTypeBitmaps,
318 pub entries: Vec<PackBitmapEntry>,
319 pub name_hash_cache: Option<Vec<u32>>,
320}
321
322#[derive(Debug, Clone, PartialEq, Eq)]
323pub struct PackBitmapTypeBitmaps {
324 pub commits: EwahBitmap,
325 pub trees: EwahBitmap,
326 pub blobs: EwahBitmap,
327 pub tags: EwahBitmap,
328}
329
330#[derive(Debug, Clone, PartialEq, Eq)]
331pub struct PackBitmapEntry {
332 pub object_position: u32,
337 pub xor_offset: u8,
338 pub flags: u8,
339 pub bitmap: EwahBitmap,
342}
343
344#[derive(Debug, Clone, PartialEq, Eq)]
345pub struct EwahBitmap {
346 pub bit_size: u32,
347 pub words: Vec<u64>,
348 pub rlw_position: u32,
349}
350
351#[derive(Debug, Clone, PartialEq, Eq)]
352pub struct MultiPackIndex {
353 pub version: u8,
354 pub format: ObjectFormat,
355 pub pack_count: u32,
356 pub pack_names: Vec<String>,
357 pub object_count: u32,
358 pub fanout: [u32; 256],
359 pub objects: Vec<MultiPackIndexEntry>,
360 pub reverse_index: Option<Vec<u32>>,
361 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
362 pub chunks: Vec<MultiPackIndexChunk>,
363 pub checksum: ObjectId,
364}
365
366#[derive(Debug, Clone)]
367pub struct MultiPackIndexOidLookup {
368 format: ObjectFormat,
369 pack_count: u32,
370 pack_names: Vec<String>,
371 fanout: [u32; 256],
372 object_count: usize,
373 oid_lookup_offset: usize,
374 object_offsets_offset: usize,
375 large_offsets_offset: Option<usize>,
376 large_offsets_len: usize,
377 bytes: Arc<dyn PackIndexByteSource>,
378}
379
380#[derive(Debug, Clone, PartialEq, Eq)]
381pub struct MultiPackIndexEntry {
382 pub oid: ObjectId,
383 pub pack_int_id: u32,
384 pub offset: u64,
385 pub force_large_offset: bool,
386}
387
388#[derive(Debug, Clone, PartialEq, Eq)]
389pub struct MultiPackBitmapPack {
390 pub bitmap_pos: u32,
391 pub bitmap_nr: u32,
392}
393
394#[derive(Debug, Clone, PartialEq, Eq)]
395pub struct MultiPackIndexChunk {
396 pub id: [u8; 4],
397 pub offset: u64,
398 pub len: u64,
399}
400
401#[derive(Debug, Clone, Copy, PartialEq, Eq)]
402enum PackObjectKind {
403 Commit,
404 Tree,
405 Blob,
406 Tag,
407 OfsDelta,
408 RefDelta,
409}
410
411#[derive(Debug, Clone, PartialEq, Eq)]
412enum ParsedPackEntry {
413 Resolved(PackObject),
414 Delta {
415 base: DeltaBase,
416 compressed_size: u64,
417 delta_size: u64,
418 offset: u64,
419 delta: Vec<u8>,
420 },
421}
422
423#[derive(Debug, Clone, PartialEq, Eq)]
424enum DeltaBase {
425 Offset(u64),
426 Ref(ObjectId),
427}
428
429struct OnDiskEntry {
433 offset: u64,
434 base: Option<DeltaBase>,
435 stream_size: u64,
436}
437
438impl PackFile {
439 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
440 Self::parse(bytes, ObjectFormat::Sha1)
441 }
442
443 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
444 Self::parse_with_base(bytes, format, |_| Ok(None))
445 }
446
447 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
448 Self::parse(&bundle.pack, bundle.format)
449 }
450
451 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
452 let PackIndexBuild {
453 index,
454 pack_checksum,
455 entries,
456 } = PackIndex::write_v2_for_pack(bytes, format)?;
457 Ok(PackWrite {
458 pack: bytes.to_vec(),
459 index,
460 checksum: pack_checksum,
461 entries,
462 delta_count: 0,
463 })
464 }
465
466 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
467 where
468 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
469 {
470 Self::parse_with_base(bytes, format, external_base)
471 }
472
473 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
474 where
475 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
476 {
477 let trailer_len = format.raw_len();
478 if bytes.len() < 12 + trailer_len {
479 return Err(GitError::InvalidFormat("pack file too short".into()));
480 }
481 let trailer_offset = bytes.len() - trailer_len;
482 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
483 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
484 if checksum != expected {
485 return Err(GitError::InvalidFormat(format!(
486 "pack checksum mismatch: expected {expected}, got {checksum}"
487 )));
488 }
489
490 if &bytes[..4] != b"PACK" {
491 return Err(GitError::InvalidFormat("missing PACK signature".into()));
492 }
493 let version = u32_be(&bytes[4..8]);
494 if version != 2 && version != 3 {
495 return Err(GitError::Unsupported(format!("pack version {version}")));
496 }
497 let count = u32_be(&bytes[8..12]) as usize;
498 let mut offset = 12usize;
499 let mut entries = Vec::with_capacity(count);
500 for _ in 0..count {
501 let entry_offset = offset;
502 let header = parse_entry_header(bytes, &mut offset)?;
503 let base =
504 match header.kind {
505 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
506 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
507 )),
508 PackObjectKind::RefDelta => {
509 let hash_len = format.raw_len();
510 if offset + hash_len > trailer_offset {
511 return Err(GitError::InvalidFormat(
512 "truncated ref-delta base object id".into(),
513 ));
514 }
515 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
516 offset += hash_len;
517 Some(DeltaBase::Ref(oid))
518 }
519 _ => None,
520 };
521 let mut body = Vec::new();
522 let consumed = inflate_into(
523 &bytes[offset..trailer_offset],
524 &mut body,
525 header.size.min(usize::MAX as u64) as usize,
526 )?;
527 if body.len() as u64 != header.size {
528 return Err(GitError::InvalidObject(format!(
529 "pack object declared {} bytes, decoded {}",
530 header.size,
531 body.len()
532 )));
533 }
534 if consumed == 0 {
535 return Err(GitError::InvalidFormat(
536 "empty compressed pack entry".into(),
537 ));
538 }
539 offset = offset
540 .checked_add(consumed)
541 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
542 if offset > trailer_offset {
543 return Err(GitError::InvalidFormat(
544 "pack entry extends past checksum".into(),
545 ));
546 }
547 if let Some(base) = base {
548 entries.push(ParsedPackEntry::Delta {
549 base,
550 compressed_size: consumed as u64,
551 delta_size: header.size,
552 offset: entry_offset as u64,
553 delta: body,
554 });
555 } else {
556 let object_type = match header.kind {
557 PackObjectKind::Commit => ObjectType::Commit,
558 PackObjectKind::Tree => ObjectType::Tree,
559 PackObjectKind::Blob => ObjectType::Blob,
560 PackObjectKind::Tag => ObjectType::Tag,
561 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
562 };
563 let object = EncodedObject::new(object_type, body);
564 let oid = object.object_id(format)?;
565 entries.push(ParsedPackEntry::Resolved(PackObject {
566 entry: PackEntry {
567 oid,
568 compressed_size: consumed as u64,
569 uncompressed_size: header.size,
570 offset: entry_offset as u64,
571 },
572 object,
573 }));
574 }
575 }
576 if offset != trailer_offset {
577 return Err(GitError::InvalidFormat(format!(
578 "pack has {} trailing bytes before checksum",
579 trailer_offset - offset
580 )));
581 }
582 Ok(Self {
583 version,
584 entries: resolve_pack_entries(entries, format, &mut external_base)?,
585 checksum,
586 })
587 }
588
589 pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
600 let pack = Self::parse(bytes, format)?;
604
605 let trailer_len = format.raw_len();
609 let trailer_offset = bytes.len() - trailer_len;
610 let count = u32_be(&bytes[8..12]) as usize;
611 let mut offset = 12usize;
612 let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
618 for _ in 0..count {
619 let entry_offset = offset as u64;
620 let header = parse_entry_header(bytes, &mut offset)?;
621 let stream_size = header.size;
622 let base =
623 match header.kind {
624 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
625 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
626 )),
627 PackObjectKind::RefDelta => {
628 let hash_len = format.raw_len();
629 if offset + hash_len > trailer_offset {
630 return Err(GitError::InvalidFormat(
631 "truncated ref-delta base object id".into(),
632 ));
633 }
634 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
635 offset += hash_len;
636 Some(DeltaBase::Ref(oid))
637 }
638 _ => None,
639 };
640 let mut body = Vec::new();
642 let consumed = inflate_into(
643 &bytes[offset..trailer_offset],
644 &mut body,
645 header.size.min(usize::MAX as u64) as usize,
646 )?;
647 offset = offset
648 .checked_add(consumed)
649 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
650 on_disk.push(OnDiskEntry {
651 offset: entry_offset,
652 base,
653 stream_size,
654 });
655 }
656
657 let mut resolved_by_offset: HashMap<u64, &PackObject> =
659 HashMap::with_capacity(pack.entries.len());
660 for object in &pack.entries {
661 resolved_by_offset.insert(object.entry.offset, object);
662 }
663 let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
665 for entry in &on_disk {
666 if let Some(object) = resolved_by_offset.get(&entry.offset) {
667 oid_by_offset.insert(entry.offset, object.entry.oid);
668 }
669 }
670 let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
672 for (idx, entry) in on_disk.iter().enumerate() {
673 index_by_offset.insert(entry.offset, idx);
674 }
675
676 let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
679 sorted_offsets.sort_unstable();
680 let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
681 for window in sorted_offsets.windows(2) {
682 next_offset.insert(window[0], window[1]);
683 }
684 if let Some(last) = sorted_offsets.last() {
685 next_offset.insert(*last, trailer_offset as u64);
686 }
687
688 let mut depth = vec![None; on_disk.len()];
694 fn resolve_depth(
695 idx: usize,
696 on_disk: &[OnDiskEntry],
697 index_by_offset: &HashMap<u64, usize>,
698 offset_of_oid: &HashMap<ObjectId, u64>,
699 depth: &mut [Option<u32>],
700 ) -> u32 {
701 if let Some(d) = depth[idx] {
702 return d;
703 }
704 let computed = match &on_disk[idx].base {
705 None => 0,
706 Some(base) => {
707 let base_idx = match base {
708 DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
709 DeltaBase::Ref(oid) => offset_of_oid
710 .get(oid)
711 .and_then(|off| index_by_offset.get(off).copied()),
712 };
713 match base_idx {
714 Some(bi) => {
715 resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
716 }
717 None => 1,
719 }
720 }
721 };
722 depth[idx] = Some(computed);
723 computed
724 }
725 let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
726 for (off, oid) in &oid_by_offset {
727 offset_of_oid.insert(*oid, *off);
728 }
729 for idx in 0..on_disk.len() {
730 resolve_depth(idx, &on_disk, &index_by_offset, &offset_of_oid, &mut depth);
731 }
732
733 let mut stats = Vec::with_capacity(on_disk.len());
734 for (idx, entry) in on_disk.iter().enumerate() {
735 let off = entry.offset;
736 let object = resolved_by_offset.get(&off).ok_or_else(|| {
737 GitError::InvalidFormat("pack offset missing from resolved set".into())
738 })?;
739 let size_in_pack = next_offset
740 .get(&off)
741 .copied()
742 .unwrap_or(trailer_offset as u64)
743 .saturating_sub(off);
744 let base_oid = match &entry.base {
745 None => None,
746 Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
747 Some(DeltaBase::Ref(oid)) => Some(*oid),
748 };
749 stats.push(PackVerifyStat {
750 oid: object.entry.oid,
751 object_type: object.object.object_type,
752 size: entry.stream_size,
755 size_in_pack,
756 offset: off,
757 delta_depth: depth[idx].unwrap_or(0),
758 base_oid,
759 });
760 }
761 stats.sort_by_key(|stat| stat.offset);
763
764 Ok(PackVerifyStats {
765 objects: stats,
766 checksum: pack.checksum,
767 })
768 }
769
770 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
771 where
772 T: Borrow<EncodedObject>,
773 {
774 Self::write_undeltified(objects, ObjectFormat::Sha1)
775 }
776
777 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
783 where
784 T: Borrow<EncodedObject>,
785 {
786 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
787 Self::write_packed_impl(objects, format, &options)
788 }
789
790 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
799 where
800 T: Borrow<EncodedObject>,
801 {
802 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
803 }
804
805 pub fn write_packed_with_options<T>(
809 objects: &[T],
810 format: ObjectFormat,
811 options: &PackWriteOptions,
812 ) -> Result<PackWrite>
813 where
814 T: Borrow<EncodedObject>,
815 {
816 Self::write_packed_impl(objects, format, options)
817 }
818
819 pub fn write_packed_with_known_ids(
828 inputs: &[PackInput<'_>],
829 format: ObjectFormat,
830 ) -> Result<PackWrite> {
831 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
832 }
833
834 pub fn write_packed_with_known_ids_and_options(
837 inputs: &[PackInput<'_>],
838 format: ObjectFormat,
839 options: &PackWriteOptions,
840 ) -> Result<PackWrite> {
841 if inputs.len() > u32::MAX as usize {
842 return Err(GitError::InvalidFormat("too many pack objects".into()));
843 }
844 let mut objects = Vec::with_capacity(inputs.len());
845 let mut object_ids = Vec::with_capacity(inputs.len());
846 for input in inputs {
847 if input.oid.format() != format {
848 return Err(GitError::InvalidObjectId(format!(
849 "pack object id {} uses {}, pack uses {}",
850 input.oid,
851 input.oid.format().name(),
852 format.name()
853 )));
854 }
855 objects.push(input.object);
856 object_ids.push(*input.oid);
857 }
858 Self::write_packed_from_parts(objects, object_ids, format, options)
859 }
860
861 pub fn write_thin<T>(
870 objects: &[T],
871 format: ObjectFormat,
872 external_bases: HashMap<ObjectId, EncodedObject>,
873 ) -> Result<PackWrite>
874 where
875 T: Borrow<EncodedObject>,
876 {
877 let options = PackWriteOptions::new().with_thin_bases(external_bases);
878 Self::write_packed_impl(objects, format, &options)
879 }
880
881 fn write_packed_impl<T>(
882 objects: &[T],
883 format: ObjectFormat,
884 options: &PackWriteOptions,
885 ) -> Result<PackWrite>
886 where
887 T: Borrow<EncodedObject>,
888 {
889 if objects.len() > u32::MAX as usize {
890 return Err(GitError::InvalidFormat("too many pack objects".into()));
891 }
892 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
893
894 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
897 for object in &objects {
898 object_ids.push(object.object_id(format)?);
899 }
900 Self::write_packed_from_parts(objects, object_ids, format, options)
901 }
902
903 fn write_packed_from_parts(
904 objects: Vec<&EncodedObject>,
905 object_ids: Vec<ObjectId>,
906 format: ObjectFormat,
907 options: &PackWriteOptions,
908 ) -> Result<PackWrite> {
909 let mut seen = HashSet::with_capacity(object_ids.len());
910 for oid in &object_ids {
911 if !seen.insert(oid) {
912 return Err(GitError::InvalidFormat(format!(
913 "pack contains duplicate object id {oid}"
914 )));
915 }
916 }
917
918 for oid in options.thin_bases.keys() {
920 if oid.format() != format {
921 return Err(GitError::InvalidObjectId(
922 "thin pack base object id format does not match pack format".into(),
923 ));
924 }
925 }
926
927 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
933
934 let mut pack = Vec::new();
935 pack.extend_from_slice(b"PACK");
936 pack.extend_from_slice(&2u32.to_be_bytes());
937 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
938
939 let mut index_entries = Vec::with_capacity(objects.len());
940 let mut delta_count = 0u32;
941 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
944
945 let compressed_payloads =
946 compress_planned_payloads(&objects, &plan, &order, options.compression_level)?;
947
948 for (order_pos, &idx) in order.iter().enumerate() {
949 let offset = pack.len() as u64;
950 let mut entry_bytes = Vec::new();
951 match &plan[idx].base {
952 PlannedBase::None => {
953 write_entry_header(
954 &mut entry_bytes,
955 objects[idx].object_type,
956 objects[idx].body.len() as u64,
957 );
958 }
959 PlannedBase::InPack { base_idx, delta } => {
960 delta_count += 1;
961 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
962 GitError::InvalidFormat(
963 "in-pack delta base emitted after dependent object".into(),
964 )
965 })?;
966 if options.prefer_ofs_delta {
967 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
968 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
969 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
970 })?;
971 write_ofs_delta_offset(&mut entry_bytes, relative)?;
972 } else {
973 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
974 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
975 }
976 }
977 PlannedBase::External { base_oid, delta } => {
978 delta_count += 1;
979 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
980 entry_bytes.extend_from_slice(base_oid.as_bytes());
981 }
982 }
983 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
984 let crc32 = crc32fast::hash(&entry_bytes);
985 pack.extend_from_slice(&entry_bytes);
986 written_offsets[idx] = Some(offset);
987 index_entries.push(PackIndexEntry {
988 oid: object_ids[idx].clone(),
989 crc32,
990 offset,
991 });
992 }
993
994 let checksum = sley_core::digest_bytes(format, &pack)?;
995 pack.extend_from_slice(checksum.as_bytes());
996 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
997 Ok(PackWrite {
998 pack,
999 index,
1000 checksum,
1001 entries: index_entries,
1002 delta_count,
1003 })
1004 }
1005}
1006
1007impl<'a> PackIndexView<'a> {
1008 pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
1009 Self::parse(bytes, ObjectFormat::Sha1)
1010 }
1011
1012 pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1013 Self::parse_impl(bytes, format, true, true)
1014 }
1015
1016 pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1020 Self::parse_impl(bytes, format, false, true)
1021 }
1022
1023 pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1030 Self::parse_impl(bytes, format, false, false)
1031 }
1032
1033 pub fn count(&self) -> usize {
1034 self.count
1035 }
1036
1037 pub fn fanout(&self) -> &[u32; 256] {
1038 &self.fanout
1039 }
1040
1041 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1042 if oid.format() != self.format {
1043 return None;
1044 }
1045 let bucket = usize::from(oid.as_bytes()[0]);
1046 let mut start = if bucket == 0 {
1047 0
1048 } else {
1049 self.fanout[bucket - 1] as usize
1050 };
1051 let mut end = self.fanout[bucket] as usize;
1052 let target = oid.as_bytes();
1053
1054 while start < end {
1055 let mid = start + (end - start) / 2;
1056 match self.oid_bytes_at(mid).cmp(target) {
1057 std::cmp::Ordering::Less => start = mid + 1,
1058 std::cmp::Ordering::Equal => return self.lookup_at(mid),
1059 std::cmp::Ordering::Greater => end = mid,
1060 }
1061 }
1062 None
1063 }
1064
1065 fn parse_impl(
1066 bytes: &'a [u8],
1067 format: ObjectFormat,
1068 verify_checksum: bool,
1069 validate_entries: bool,
1070 ) -> Result<Self> {
1071 let hash_len = format.raw_len();
1072 if bytes.len() < 4 {
1073 return Err(GitError::InvalidFormat("pack index too short".into()));
1074 }
1075 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1076 return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1077 }
1078 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1079 return Err(GitError::InvalidFormat("pack index too short".into()));
1080 }
1081 let version = u32_be(&bytes[4..8]);
1082 if version != 2 {
1083 return Err(GitError::Unsupported(format!(
1084 "pack index version {version}"
1085 )));
1086 }
1087 let index_checksum_offset = bytes.len() - hash_len;
1088 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1089 if verify_checksum {
1090 let actual_index_checksum =
1091 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1092 if actual_index_checksum != index_checksum {
1093 return Err(GitError::InvalidFormat(format!(
1094 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1095 )));
1096 }
1097 }
1098
1099 let mut offset = 8usize;
1100 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1101 let count = fanout[255] as usize;
1102 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1103 offset = oid_table.end;
1104 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1105 offset = crc_table.end;
1106 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1107 offset = small_offset_table.end;
1108
1109 let large_offset_count = (0..count)
1110 .filter(|idx| {
1111 let start = small_offset_table.start + idx * 4;
1112 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1113 })
1114 .count();
1115 let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1116 offset = large_offset_table.end;
1117
1118 let expected_trailer_offset = bytes.len() - hash_len * 2;
1119 if offset != expected_trailer_offset {
1120 if !verify_checksum && offset < expected_trailer_offset {
1121 large_offset_table = large_offset_table.start..expected_trailer_offset;
1122 offset = expected_trailer_offset;
1123 } else {
1124 return Err(GitError::InvalidFormat(format!(
1125 "pack index has {} unexpected bytes before trailer",
1126 expected_trailer_offset.saturating_sub(offset)
1127 )));
1128 }
1129 }
1130 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1131
1132 let view = Self {
1133 version,
1134 count,
1135 fanout,
1136 pack_checksum,
1137 index_checksum,
1138 bytes,
1139 format,
1140 tables: PackIndexViewTables::V2 {
1141 oid_table,
1142 crc_table,
1143 small_offset_table,
1144 large_offset_table,
1145 },
1146 };
1147 if validate_entries {
1148 view.validate_v2_entries()?;
1149 }
1150 Ok(view)
1151 }
1152
1153 fn parse_v1_impl(
1154 bytes: &'a [u8],
1155 format: ObjectFormat,
1156 verify_checksum: bool,
1157 validate_entries: bool,
1158 ) -> Result<Self> {
1159 let hash_len = format.raw_len();
1160 if bytes.len() < 256 * 4 + 2 * hash_len {
1161 return Err(GitError::InvalidFormat("pack index too short".into()));
1162 }
1163 let index_checksum_offset = bytes.len() - hash_len;
1164 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1165 if verify_checksum {
1166 let actual_index_checksum =
1167 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1168 if actual_index_checksum != index_checksum {
1169 return Err(GitError::InvalidFormat(format!(
1170 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1171 )));
1172 }
1173 }
1174
1175 let mut offset = 0usize;
1176 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1177 let count = fanout[255] as usize;
1178 let entry_len = hash_len
1179 .checked_add(4)
1180 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1181 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1182 offset = entry_table.end;
1183 let expected_trailer_offset = bytes.len() - hash_len * 2;
1184 if offset != expected_trailer_offset {
1185 return Err(GitError::InvalidFormat(format!(
1186 "pack index has {} unexpected bytes before trailer",
1187 expected_trailer_offset.saturating_sub(offset)
1188 )));
1189 }
1190 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1191
1192 let view = Self {
1193 version: 1,
1194 count,
1195 fanout,
1196 pack_checksum,
1197 index_checksum,
1198 bytes,
1199 format,
1200 tables: PackIndexViewTables::V1 { entry_table },
1201 };
1202 if validate_entries {
1203 view.validate_v1_entries()?;
1204 }
1205 Ok(view)
1206 }
1207
1208 fn validate_v2_entries(&self) -> Result<()> {
1209 let PackIndexViewTables::V2 {
1210 oid_table,
1211 small_offset_table,
1212 large_offset_table,
1213 ..
1214 } = &self.tables
1215 else {
1216 unreachable!("v2 validation only runs for v2 views");
1217 };
1218 let oid_table = self.slice(oid_table.clone());
1219 let small_offset_table = self.slice(small_offset_table.clone());
1220 let large_offset_table = self.slice(large_offset_table.clone());
1221 let hash_len = self.format.raw_len();
1222 for idx in 0..self.count {
1223 let oid_start = idx * hash_len;
1224 let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1225 if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1226 return Err(GitError::InvalidFormat(
1227 "pack index object ids are not strictly ascending".into(),
1228 ));
1229 }
1230 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1231
1232 let offset_start = idx * 4;
1233 let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1234 pack_index_v2_offset(raw_offset, large_offset_table)?;
1235 }
1236 Ok(())
1237 }
1238
1239 fn validate_v1_entries(&self) -> Result<()> {
1240 let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1241 unreachable!("v1 validation only runs for v1 views");
1242 };
1243 let entry_table = self.slice(entry_table.clone());
1244 let hash_len = self.format.raw_len();
1245 let entry_len = hash_len
1246 .checked_add(4)
1247 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1248 for idx in 0..self.count {
1249 let start = idx * entry_len;
1250 let oid_start = start + 4;
1251 let oid_bytes = &entry_table[oid_start..start + entry_len];
1252 if idx > 0 {
1253 let previous_oid_start = oid_start - entry_len;
1254 let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1255 if previous_oid >= oid_bytes {
1256 return Err(GitError::InvalidFormat(
1257 "pack index object ids are not strictly sorted".into(),
1258 ));
1259 }
1260 }
1261 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1262 }
1263 Ok(())
1264 }
1265
1266 fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1267 let hash_len = self.format.raw_len();
1268 match &self.tables {
1269 PackIndexViewTables::V1 { entry_table } => {
1270 let entry_table = self.slice(entry_table.clone());
1271 let entry_len = hash_len + 4;
1272 let start = idx * entry_len + 4;
1273 &entry_table[start..start + hash_len]
1274 }
1275 PackIndexViewTables::V2 { oid_table, .. } => {
1276 let oid_table = self.slice(oid_table.clone());
1277 let start = idx * hash_len;
1278 &oid_table[start..start + hash_len]
1279 }
1280 }
1281 }
1282
1283 fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1284 if idx >= self.count {
1285 return None;
1286 }
1287 let hash_len = self.format.raw_len();
1288 match &self.tables {
1289 PackIndexViewTables::V1 { entry_table } => {
1290 let entry_table = self.slice(entry_table.clone());
1291 let entry_len = hash_len + 4;
1292 let start = idx * entry_len;
1293 Some(PackIndexLookup {
1294 crc32: 0,
1295 offset: u64::from(u32_be(&entry_table[start..start + 4])),
1296 })
1297 }
1298 PackIndexViewTables::V2 {
1299 crc_table,
1300 small_offset_table,
1301 large_offset_table,
1302 ..
1303 } => {
1304 let crc_table = self.slice(crc_table.clone());
1305 let small_offset_table = self.slice(small_offset_table.clone());
1306 let large_offset_table = self.slice(large_offset_table.clone());
1307 let crc_start = idx * 4;
1308 let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1309 Some(PackIndexLookup {
1310 crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1311 offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1312 })
1313 }
1314 }
1315 }
1316
1317 fn slice(&self, range: Range<usize>) -> &'a [u8] {
1318 &self.bytes[range]
1319 }
1320}
1321
1322impl PackIndexViewData {
1323 pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1324 Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1325 }
1326
1327 pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1331 Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1332 }
1333
1334 pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1337 Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1338 }
1339
1340 pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1341 Self::parse_impl(bytes, format, true, true)
1342 }
1343
1344 pub fn parse_source_without_checksum(
1345 bytes: Arc<dyn PackIndexByteSource>,
1346 format: ObjectFormat,
1347 ) -> Result<Self> {
1348 Self::parse_impl(bytes, format, false, true)
1349 }
1350
1351 pub fn parse_trusted_source_without_checksum(
1352 bytes: Arc<dyn PackIndexByteSource>,
1353 format: ObjectFormat,
1354 ) -> Result<Self> {
1355 Self::parse_impl(bytes, format, false, false)
1356 }
1357
1358 pub fn count(&self) -> usize {
1359 self.count
1360 }
1361
1362 pub fn fanout(&self) -> &[u32; 256] {
1363 &self.fanout
1364 }
1365
1366 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1367 self.as_view().find(oid)
1368 }
1369
1370 pub fn as_view(&self) -> PackIndexView<'_> {
1371 PackIndexView {
1372 version: self.version,
1373 count: self.count,
1374 fanout: self.fanout,
1375 pack_checksum: self.pack_checksum,
1376 index_checksum: self.index_checksum,
1377 bytes: self.bytes.as_bytes(),
1378 format: self.format,
1379 tables: self.tables.clone(),
1380 }
1381 }
1382
1383 fn parse_impl(
1384 bytes: Arc<dyn PackIndexByteSource>,
1385 format: ObjectFormat,
1386 verify_checksum: bool,
1387 validate_entries: bool,
1388 ) -> Result<Self> {
1389 let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1390 let view = PackIndexView::parse_impl(
1391 bytes.as_bytes(),
1392 format,
1393 verify_checksum,
1394 validate_entries,
1395 )?;
1396 (
1397 view.version,
1398 view.count,
1399 view.fanout,
1400 view.pack_checksum,
1401 view.index_checksum,
1402 view.tables,
1403 )
1404 };
1405 Ok(Self {
1406 version,
1407 count,
1408 fanout,
1409 pack_checksum,
1410 index_checksum,
1411 bytes,
1412 format,
1413 tables,
1414 })
1415 }
1416}
1417
1418impl PackIndex {
1419 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1420 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1421 }
1422
1423 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1424 let trailer_len = format.raw_len();
1425 if pack_bytes.len() < 12 + trailer_len {
1426 return Err(GitError::InvalidFormat("pack file too short".into()));
1427 }
1428 let trailer_offset = pack_bytes.len() - trailer_len;
1429 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1430 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1431 if pack_checksum != expected {
1432 return Err(GitError::InvalidFormat(format!(
1433 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1434 )));
1435 }
1436
1437 if &pack_bytes[..4] != b"PACK" {
1438 return Err(GitError::InvalidFormat("missing PACK signature".into()));
1439 }
1440 let version = u32_be(&pack_bytes[4..8]);
1441 if version != 2 && version != 3 {
1442 return Err(GitError::Unsupported(format!("pack version {version}")));
1443 }
1444 let count = u32_be(&pack_bytes[8..12]) as usize;
1445 let mut offset = 12usize;
1446 let mut parsed_entries = Vec::with_capacity(count);
1447 let mut raw_entries = Vec::with_capacity(count);
1448 for _ in 0..count {
1449 let entry_offset = offset;
1450 let header = parse_entry_header(pack_bytes, &mut offset)?;
1451 let base = match header.kind {
1452 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1453 pack_bytes,
1454 &mut offset,
1455 entry_offset as u64,
1456 )?)),
1457 PackObjectKind::RefDelta => {
1458 let hash_len = format.raw_len();
1459 if offset + hash_len > trailer_offset {
1460 return Err(GitError::InvalidFormat(
1461 "truncated ref-delta base object id".into(),
1462 ));
1463 }
1464 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1465 offset += hash_len;
1466 Some(DeltaBase::Ref(oid))
1467 }
1468 _ => None,
1469 };
1470 let mut body = Vec::new();
1471 let consumed = inflate_into(
1472 &pack_bytes[offset..trailer_offset],
1473 &mut body,
1474 header.size.min(usize::MAX as u64) as usize,
1475 )?;
1476 if body.len() as u64 != header.size {
1477 return Err(GitError::InvalidObject(format!(
1478 "pack object declared {} bytes, decoded {}",
1479 header.size,
1480 body.len()
1481 )));
1482 }
1483 if consumed == 0 {
1484 return Err(GitError::InvalidFormat(
1485 "empty compressed pack entry".into(),
1486 ));
1487 }
1488 offset = offset
1489 .checked_add(consumed)
1490 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1491 if offset > trailer_offset {
1492 return Err(GitError::InvalidFormat(
1493 "pack entry extends past checksum".into(),
1494 ));
1495 }
1496 raw_entries.push((
1497 entry_offset as u64,
1498 crc32fast::hash(&pack_bytes[entry_offset..offset]),
1499 ));
1500 if let Some(base) = base {
1501 parsed_entries.push(ParsedPackEntry::Delta {
1502 base,
1503 compressed_size: consumed as u64,
1504 delta_size: header.size,
1505 offset: entry_offset as u64,
1506 delta: body,
1507 });
1508 } else {
1509 let object_type = match header.kind {
1510 PackObjectKind::Commit => ObjectType::Commit,
1511 PackObjectKind::Tree => ObjectType::Tree,
1512 PackObjectKind::Blob => ObjectType::Blob,
1513 PackObjectKind::Tag => ObjectType::Tag,
1514 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1515 };
1516 let object = EncodedObject::new(object_type, body);
1517 let oid = object.object_id(format)?;
1518 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1519 entry: PackEntry {
1520 oid,
1521 compressed_size: consumed as u64,
1522 uncompressed_size: header.size,
1523 offset: entry_offset as u64,
1524 },
1525 object,
1526 }));
1527 }
1528 }
1529 if offset != trailer_offset {
1530 return Err(GitError::InvalidFormat(format!(
1531 "pack has {} trailing bytes before checksum",
1532 trailer_offset - offset
1533 )));
1534 }
1535
1536 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1537 let entries = resolved
1538 .iter()
1539 .zip(raw_entries)
1540 .map(|(object, (offset, crc32))| PackIndexEntry {
1541 oid: object.entry.oid,
1542 crc32,
1543 offset,
1544 })
1545 .collect::<Vec<_>>();
1546 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1547 Ok(PackIndexBuild {
1548 index,
1549 pack_checksum,
1550 entries,
1551 })
1552 }
1553
1554 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1555 Self::parse(bytes, ObjectFormat::Sha1)
1556 }
1557
1558 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1559 Self::parse_impl(bytes, format, true)
1560 }
1561
1562 pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1563 Self::parse_impl(bytes, format, false)
1564 }
1565
1566 fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1567 let hash_len = format.raw_len();
1568 if bytes.len() < 4 {
1569 return Err(GitError::InvalidFormat("pack index too short".into()));
1570 }
1571 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1572 return Self::parse_v1_impl(bytes, format, verify_checksum);
1573 }
1574 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1575 return Err(GitError::InvalidFormat("pack index too short".into()));
1576 }
1577 let version = u32_be(&bytes[4..8]);
1578 if version != 2 {
1579 return Err(GitError::Unsupported(format!(
1580 "pack index version {version}"
1581 )));
1582 }
1583 let index_checksum_offset = bytes.len() - hash_len;
1584 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1585 if verify_checksum {
1586 let actual_index_checksum =
1587 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1588 if actual_index_checksum != index_checksum {
1589 return Err(GitError::InvalidFormat(format!(
1590 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1591 )));
1592 }
1593 }
1594
1595 let mut offset = 8usize;
1596 let mut fanout = [0u32; 256];
1597 let mut previous = 0u32;
1598 for slot in &mut fanout {
1599 *slot = u32_be(&bytes[offset..offset + 4]);
1600 if *slot < previous {
1601 return Err(GitError::InvalidFormat(
1602 "pack index fanout is not monotonic".into(),
1603 ));
1604 }
1605 previous = *slot;
1606 offset += 4;
1607 }
1608 let count = fanout[255] as usize;
1609 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1610 offset = oid_table.end;
1611 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1612 offset = crc_table.end;
1613 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1614 offset = small_offset_table.end;
1615
1616 let large_offset_count = (0..count)
1617 .filter(|idx| {
1618 let start = small_offset_table.start + idx * 4;
1619 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1620 })
1621 .count();
1622 let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1623 offset = large_offset_table.end;
1624
1625 let expected_trailer_offset = bytes.len() - hash_len * 2;
1626 if offset != expected_trailer_offset {
1627 if !verify_checksum && offset < expected_trailer_offset {
1628 large_offset_table = large_offset_table.start..expected_trailer_offset;
1629 offset = expected_trailer_offset;
1630 } else {
1631 return Err(GitError::InvalidFormat(format!(
1632 "pack index has {} unexpected bytes before trailer",
1633 expected_trailer_offset.saturating_sub(offset)
1634 )));
1635 }
1636 }
1637 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1638
1639 let mut entries = Vec::with_capacity(count);
1640 for idx in 0..count {
1641 let oid_start = oid_table.start + idx * hash_len;
1642 let crc_start = crc_table.start + idx * 4;
1643 let offset_start = small_offset_table.start + idx * 4;
1644 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1645 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1649 return Err(GitError::InvalidFormat(
1650 "pack index object ids are not strictly ascending".into(),
1651 ));
1652 }
1653 let expected_min = if oid_bytes[0] == 0 {
1654 0
1655 } else {
1656 fanout[usize::from(oid_bytes[0] - 1)]
1657 };
1658 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1659 return Err(GitError::InvalidFormat(
1660 "pack index object id is outside its fanout bucket".into(),
1661 ));
1662 }
1663 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1664 let offset = if raw_offset & 0x8000_0000 == 0 {
1665 u64::from(raw_offset)
1666 } else {
1667 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1668 let large_start = large_offset_table.start + large_idx * 8;
1669 if large_idx >= large_offset_table.len() / 8 {
1670 return Err(GitError::InvalidFormat(
1671 "pack index large offset points past table".into(),
1672 ));
1673 }
1674 u64_be(&bytes[large_start..large_start + 8])
1675 };
1676 entries.push(PackIndexEntry {
1677 oid: ObjectId::from_raw(format, oid_bytes)?,
1678 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1679 offset,
1680 });
1681 }
1682 Ok(Self {
1683 version,
1684 fanout,
1685 entries,
1686 pack_checksum,
1687 index_checksum,
1688 })
1689 }
1690
1691 fn parse_v1_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1692 let hash_len = format.raw_len();
1693 if bytes.len() < 256 * 4 + 2 * hash_len {
1694 return Err(GitError::InvalidFormat("pack index too short".into()));
1695 }
1696 let index_checksum_offset = bytes.len() - hash_len;
1697 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1698 if verify_checksum {
1699 let actual_index_checksum =
1700 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1701 if actual_index_checksum != index_checksum {
1702 return Err(GitError::InvalidFormat(format!(
1703 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1704 )));
1705 }
1706 }
1707
1708 let mut offset = 0usize;
1709 let mut fanout = [0u32; 256];
1710 let mut previous = 0u32;
1711 for slot in &mut fanout {
1712 *slot = u32_be(&bytes[offset..offset + 4]);
1713 if *slot < previous {
1714 return Err(GitError::InvalidFormat(
1715 "pack index fanout is not monotonic".into(),
1716 ));
1717 }
1718 previous = *slot;
1719 offset += 4;
1720 }
1721 let count = fanout[255] as usize;
1722 let entry_len = hash_len
1723 .checked_add(4)
1724 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1725 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1726 offset = entry_table.end;
1727 let expected_trailer_offset = bytes.len() - hash_len * 2;
1728 if offset != expected_trailer_offset {
1729 return Err(GitError::InvalidFormat(format!(
1730 "pack index has {} unexpected bytes before trailer",
1731 expected_trailer_offset.saturating_sub(offset)
1732 )));
1733 }
1734 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1735
1736 let mut entries = Vec::with_capacity(count);
1737 let mut previous_oid: Option<ObjectId> = None;
1738 for idx in 0..count {
1739 let start = entry_table.start + idx * entry_len;
1740 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1741 if let Some(previous) = &previous_oid
1742 && previous.as_bytes() >= oid.as_bytes()
1743 {
1744 return Err(GitError::InvalidFormat(
1745 "pack index object ids are not strictly sorted".into(),
1746 ));
1747 }
1748 previous_oid = Some(oid);
1749 entries.push(PackIndexEntry {
1750 oid,
1751 crc32: 0,
1752 offset: u64::from(u32_be(&bytes[start..start + 4])),
1753 });
1754 }
1755 Ok(Self {
1756 version: 1,
1757 fanout,
1758 entries,
1759 pack_checksum,
1760 index_checksum,
1761 })
1762 }
1763
1764 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1765 self.entries
1766 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1767 .ok()
1768 .map(|idx| &self.entries[idx])
1769 }
1770
1771 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1772 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1773 }
1774
1775 pub fn write_v2(
1776 format: ObjectFormat,
1777 entries: &[PackIndexEntry],
1778 pack_checksum: &ObjectId,
1779 ) -> Result<Vec<u8>> {
1780 if pack_checksum.format() != format {
1781 return Err(GitError::InvalidObjectId(
1782 "pack checksum format does not match index format".into(),
1783 ));
1784 }
1785 let mut entries = entries.iter().collect::<Vec<_>>();
1786 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1787 for pair in entries.windows(2) {
1788 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1789 return Err(GitError::InvalidFormat(format!(
1790 "pack index contains duplicate object id {}",
1791 pair[0].oid
1792 )));
1793 }
1794 }
1795 let mut fanout = [0u32; 256];
1796 for entry in &entries {
1797 if entry.oid.format() != format {
1798 return Err(GitError::InvalidObjectId(
1799 "pack index entry format does not match index format".into(),
1800 ));
1801 }
1802 let first = entry.oid.as_bytes()[0] as usize;
1803 fanout[first] = fanout[first]
1804 .checked_add(1)
1805 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1806 }
1807 let mut running = 0u32;
1808 for slot in &mut fanout {
1809 running = running
1810 .checked_add(*slot)
1811 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1812 *slot = running;
1813 }
1814
1815 let mut index = Vec::new();
1816 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1817 index.extend_from_slice(&2u32.to_be_bytes());
1818 for count in fanout {
1819 index.extend_from_slice(&count.to_be_bytes());
1820 }
1821 for entry in &entries {
1822 index.extend_from_slice(entry.oid.as_bytes());
1823 }
1824 for entry in &entries {
1825 index.extend_from_slice(&entry.crc32.to_be_bytes());
1826 }
1827
1828 let mut large_offsets = Vec::new();
1829 for entry in &entries {
1830 if entry.offset < 0x8000_0000 {
1831 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1832 } else {
1833 if large_offsets.len() > 0x7fff_ffff {
1834 return Err(GitError::InvalidFormat(
1835 "too many large pack offsets".into(),
1836 ));
1837 }
1838 let large_idx = large_offsets.len() as u32;
1839 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1840 large_offsets.push(entry.offset);
1841 }
1842 }
1843 for offset in large_offsets {
1844 index.extend_from_slice(&offset.to_be_bytes());
1845 }
1846 index.extend_from_slice(pack_checksum.as_bytes());
1847 let index_checksum = sley_core::digest_bytes(format, &index)?;
1848 index.extend_from_slice(index_checksum.as_bytes());
1849 Ok(index)
1850 }
1851
1852 pub fn write_v1(
1858 format: ObjectFormat,
1859 entries: &[PackIndexEntry],
1860 pack_checksum: &ObjectId,
1861 ) -> Result<Vec<u8>> {
1862 if pack_checksum.format() != format {
1863 return Err(GitError::InvalidObjectId(
1864 "pack checksum format does not match index format".into(),
1865 ));
1866 }
1867 let mut entries = entries.iter().collect::<Vec<_>>();
1868 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1869 for pair in entries.windows(2) {
1870 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1871 return Err(GitError::InvalidFormat(format!(
1872 "pack index contains duplicate object id {}",
1873 pair[0].oid
1874 )));
1875 }
1876 }
1877 let mut fanout = [0u32; 256];
1878 for entry in &entries {
1879 if entry.oid.format() != format {
1880 return Err(GitError::InvalidObjectId(
1881 "pack index entry format does not match index format".into(),
1882 ));
1883 }
1884 if entry.offset > 0xffff_ffff {
1885 return Err(GitError::InvalidFormat(
1886 "pack offset too large for a version-1 index".into(),
1887 ));
1888 }
1889 let first = entry.oid.as_bytes()[0] as usize;
1890 fanout[first] = fanout[first]
1891 .checked_add(1)
1892 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1893 }
1894 let mut running = 0u32;
1895 for slot in &mut fanout {
1896 running = running
1897 .checked_add(*slot)
1898 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1899 *slot = running;
1900 }
1901
1902 let mut index = Vec::new();
1903 for count in fanout {
1904 index.extend_from_slice(&count.to_be_bytes());
1905 }
1906 for entry in &entries {
1907 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1908 index.extend_from_slice(entry.oid.as_bytes());
1909 }
1910 index.extend_from_slice(pack_checksum.as_bytes());
1911 let index_checksum = sley_core::digest_bytes(format, &index)?;
1912 index.extend_from_slice(index_checksum.as_bytes());
1913 Ok(index)
1914 }
1915}
1916
1917pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1922 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1923 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1924 let mut index_position = vec![0u32; entries.len()];
1925 for (position, &entry) in oid_sorted.iter().enumerate() {
1926 index_position[entry] = position as u32;
1927 }
1928 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1929 by_offset.sort_by_key(|&entry| entries[entry].offset);
1930 by_offset
1931 .into_iter()
1932 .map(|entry| index_position[entry])
1933 .collect()
1934}
1935
1936impl PackReverseIndex {
1937 pub fn write(
1938 format: ObjectFormat,
1939 positions: &[u32],
1940 pack_checksum: &ObjectId,
1941 ) -> Result<Vec<u8>> {
1942 if pack_checksum.format() != format {
1943 return Err(GitError::InvalidObjectId(
1944 "pack checksum format does not match reverse index format".into(),
1945 ));
1946 }
1947 validate_position_permutation(positions)?;
1948
1949 let mut out = Vec::new();
1950 out.extend_from_slice(b"RIDX");
1951 out.extend_from_slice(&1u32.to_be_bytes());
1952 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1953 for position in positions {
1954 out.extend_from_slice(&position.to_be_bytes());
1955 }
1956 out.extend_from_slice(pack_checksum.as_bytes());
1957 let checksum = sley_core::digest_bytes(format, &out)?;
1958 out.extend_from_slice(checksum.as_bytes());
1959 Ok(out)
1960 }
1961
1962 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1963 let hash_len = format.raw_len();
1964 let table_len = object_count
1965 .checked_mul(4)
1966 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1967 let min_len = 12usize
1968 .checked_add(table_len)
1969 .and_then(|len| len.checked_add(hash_len * 2))
1970 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1971 if bytes.len() < min_len {
1972 return Err(GitError::InvalidFormat("reverse index too short".into()));
1973 }
1974 if bytes.len() != min_len {
1975 return Err(GitError::InvalidFormat(format!(
1976 "reverse index has {} trailing bytes",
1977 bytes.len() - min_len
1978 )));
1979 }
1980 if &bytes[..4] != b"RIDX" {
1981 return Err(GitError::InvalidFormat(
1982 "missing reverse index signature".into(),
1983 ));
1984 }
1985 let version = u32_be(&bytes[4..8]);
1986 if version != 1 {
1987 return Err(GitError::Unsupported(format!(
1988 "reverse index version {version}"
1989 )));
1990 }
1991 let hash_id = u32_be(&bytes[8..12]);
1992 if hash_id != hash_function_id(format) {
1993 return Err(GitError::InvalidFormat(format!(
1994 "reverse index hash id {hash_id} does not match {}",
1995 format.name()
1996 )));
1997 }
1998
1999 let index_checksum_offset = bytes.len() - hash_len;
2000 let actual_index_checksum =
2001 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2002 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2003 if actual_index_checksum != index_checksum {
2004 return Err(GitError::InvalidFormat(format!(
2005 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2006 )));
2007 }
2008
2009 let pack_checksum_offset = index_checksum_offset - hash_len;
2010 let pack_checksum =
2011 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2012 let mut positions = Vec::with_capacity(object_count);
2013 let mut offset = 12usize;
2014 for _ in 0..object_count {
2015 let position = u32_be(&bytes[offset..offset + 4]);
2016 positions.push(position);
2017 offset += 4;
2018 }
2019 validate_position_permutation(&positions)?;
2020
2021 Ok(Self {
2022 version,
2023 format,
2024 positions,
2025 pack_checksum,
2026 index_checksum,
2027 })
2028 }
2029}
2030
2031impl PackMtimes {
2032 pub fn write(
2033 format: ObjectFormat,
2034 mtimes: &[u32],
2035 pack_checksum: &ObjectId,
2036 ) -> Result<Vec<u8>> {
2037 if pack_checksum.format() != format {
2038 return Err(GitError::InvalidObjectId(
2039 "pack checksum format does not match mtimes format".into(),
2040 ));
2041 }
2042
2043 let mut out = Vec::new();
2044 out.extend_from_slice(b"MTME");
2045 out.extend_from_slice(&1u32.to_be_bytes());
2046 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2047 for mtime in mtimes {
2048 out.extend_from_slice(&mtime.to_be_bytes());
2049 }
2050 out.extend_from_slice(pack_checksum.as_bytes());
2051 let checksum = sley_core::digest_bytes(format, &out)?;
2052 out.extend_from_slice(checksum.as_bytes());
2053 Ok(out)
2054 }
2055
2056 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2057 let hash_len = format.raw_len();
2058 let table_len = object_count
2059 .checked_mul(4)
2060 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2061 let expected_len = 12usize
2062 .checked_add(table_len)
2063 .and_then(|len| len.checked_add(hash_len * 2))
2064 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2065 if bytes.len() < expected_len {
2066 return Err(GitError::InvalidFormat("mtimes file too short".into()));
2067 }
2068 if bytes.len() != expected_len {
2069 return Err(GitError::InvalidFormat(format!(
2070 "mtimes file has {} trailing bytes",
2071 bytes.len() - expected_len
2072 )));
2073 }
2074 if &bytes[..4] != b"MTME" {
2075 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2076 }
2077 let version = u32_be(&bytes[4..8]);
2078 if version != 1 {
2079 return Err(GitError::Unsupported(format!("mtimes version {version}")));
2080 }
2081 let hash_id = u32_be(&bytes[8..12]);
2082 if hash_id != hash_function_id(format) {
2083 return Err(GitError::InvalidFormat(format!(
2084 "mtimes hash id {hash_id} does not match {}",
2085 format.name()
2086 )));
2087 }
2088
2089 let index_checksum_offset = bytes.len() - hash_len;
2090 let actual_index_checksum =
2091 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2092 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2093 if actual_index_checksum != index_checksum {
2094 return Err(GitError::InvalidFormat(format!(
2095 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2096 )));
2097 }
2098
2099 let pack_checksum_offset = index_checksum_offset - hash_len;
2100 let pack_checksum =
2101 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2102 let mut mtimes = Vec::with_capacity(object_count);
2103 let mut offset = 12usize;
2104 for _ in 0..object_count {
2105 mtimes.push(u32_be(&bytes[offset..offset + 4]));
2106 offset += 4;
2107 }
2108
2109 Ok(Self {
2110 version,
2111 format,
2112 mtimes,
2113 pack_checksum,
2114 index_checksum,
2115 })
2116 }
2117}
2118
2119impl PackBitmapIndex {
2120 pub const OPTION_FULL_DAG: u16 = 0x0001;
2121 pub const OPTION_HASH_CACHE: u16 = 0x0004;
2122
2123 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2124 let hash_len = format.raw_len();
2125 let min_len = 12usize
2126 .checked_add(hash_len * 2)
2127 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2128 if bytes.len() < min_len {
2129 return Err(GitError::InvalidFormat("bitmap index too short".into()));
2130 }
2131 if &bytes[..4] != b"BITM" {
2132 return Err(GitError::InvalidFormat(
2133 "missing bitmap index signature".into(),
2134 ));
2135 }
2136 let version = u16_be(&bytes[4..6]);
2137 if version != 1 {
2138 return Err(GitError::Unsupported(format!(
2139 "bitmap index version {version}"
2140 )));
2141 }
2142 let options = u16_be(&bytes[6..8]);
2143 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2144 if options & !known_options != 0 {
2145 return Err(GitError::Unsupported(format!(
2146 "bitmap index options {:#06x}",
2147 options & !known_options
2148 )));
2149 }
2150 let entry_count = u32_be(&bytes[8..12]) as usize;
2151 let checksum_offset = bytes.len() - hash_len;
2152 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2153 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2154 if actual_index_checksum != index_checksum {
2155 return Err(GitError::InvalidFormat(format!(
2156 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2157 )));
2158 }
2159
2160 let pack_checksum_end = 12usize
2161 .checked_add(hash_len)
2162 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2163 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2164 let mut offset = pack_checksum_end;
2165 let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2166 let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2167 let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2168 let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2169
2170 let mut entries = Vec::with_capacity(entry_count);
2171 for idx in 0..entry_count {
2172 if checksum_offset.saturating_sub(offset) < 6 {
2173 return Err(GitError::InvalidFormat(
2174 "truncated bitmap index entry".into(),
2175 ));
2176 }
2177 let object_position = u32_be(&bytes[offset..offset + 4]);
2178 offset += 4;
2179 if object_position as usize >= object_count {
2180 return Err(GitError::InvalidFormat(
2181 "bitmap index entry points past object table".into(),
2182 ));
2183 }
2184 let xor_offset = bytes[offset];
2185 offset += 1;
2186 if xor_offset as usize > idx || xor_offset > 160 {
2187 return Err(GitError::InvalidFormat(
2188 "bitmap index entry has invalid XOR offset".into(),
2189 ));
2190 }
2191 let flags = bytes[offset];
2192 offset += 1;
2193 let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2194 entries.push(PackBitmapEntry {
2195 object_position,
2196 xor_offset,
2197 flags,
2198 bitmap,
2199 });
2200 }
2201
2202 let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2203 let cache_len = object_count
2204 .checked_mul(4)
2205 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2206 if checksum_offset.saturating_sub(offset) < cache_len {
2207 return Err(GitError::InvalidFormat(
2208 "truncated bitmap hash cache".into(),
2209 ));
2210 }
2211 let mut cache = Vec::with_capacity(object_count);
2212 for _ in 0..object_count {
2213 cache.push(u32_be(&bytes[offset..offset + 4]));
2214 offset += 4;
2215 }
2216 Some(cache)
2217 } else {
2218 None
2219 };
2220
2221 if offset != checksum_offset {
2222 return Err(GitError::InvalidFormat(format!(
2223 "bitmap index has {} trailing bytes",
2224 checksum_offset - offset
2225 )));
2226 }
2227
2228 Ok(Self {
2229 version,
2230 format,
2231 options,
2232 pack_checksum,
2233 index_checksum,
2234 type_bitmaps: PackBitmapTypeBitmaps {
2235 commits,
2236 trees,
2237 blobs,
2238 tags,
2239 },
2240 entries,
2241 name_hash_cache,
2242 })
2243 }
2244
2245 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2248 self.entries
2249 .iter()
2250 .find(|entry| entry.object_position == position)
2251 }
2252}
2253
2254fn parse_bitmap_ewah(
2255 bytes: &[u8],
2256 offset: &mut usize,
2257 checksum_offset: usize,
2258 _object_count: usize,
2259) -> Result<EwahBitmap> {
2260 if checksum_offset.saturating_sub(*offset) < 12 {
2261 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2262 }
2263 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2264 *offset += 4;
2265 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2266 *offset += 4;
2267 let words_len = word_count
2268 .checked_mul(8)
2269 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2270 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2271 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2272 }
2273 let mut words = Vec::with_capacity(word_count);
2274 for _ in 0..word_count {
2275 words.push(u64_be(&bytes[*offset..*offset + 8]));
2276 *offset += 8;
2277 }
2278 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2279 *offset += 4;
2280 validate_ewah_words(bit_size, &words, rlw_position)?;
2281 Ok(EwahBitmap {
2282 bit_size,
2283 words,
2284 rlw_position,
2285 })
2286}
2287
2288fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2289 if words.is_empty() {
2290 if rlw_position != 0 || bit_size != 0 {
2291 return Err(GitError::InvalidFormat(
2292 "EWAH bitmap has invalid empty RLW".into(),
2293 ));
2294 }
2295 return Ok(());
2296 }
2297 if rlw_position as usize >= words.len() {
2298 return Err(GitError::InvalidFormat(
2299 "EWAH RLW position points past word table".into(),
2300 ));
2301 }
2302 let mut word_idx = 0usize;
2303 let mut decoded_words = 0u64;
2304 while word_idx < words.len() {
2305 let rlw = words[word_idx];
2306 let run_words = (rlw >> 1) & 0xffff_ffff;
2307 let literal_words = (rlw >> 33) as usize;
2308 word_idx += 1;
2309 word_idx = word_idx
2310 .checked_add(literal_words)
2311 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
2312 if word_idx > words.len() {
2313 return Err(GitError::InvalidFormat(
2314 "EWAH literal words extend past word table".into(),
2315 ));
2316 }
2317 decoded_words = decoded_words
2318 .checked_add(run_words)
2319 .and_then(|value| value.checked_add(literal_words as u64))
2320 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2321 }
2322 let decoded_bits = decoded_words
2323 .checked_mul(64)
2324 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2325 if decoded_bits < u64::from(bit_size) {
2326 return Err(GitError::InvalidFormat(
2327 "EWAH bitmap decodes fewer bits than declared".into(),
2328 ));
2329 }
2330 Ok(())
2331}
2332
2333impl MultiPackIndex {
2334 pub fn write(
2335 format: ObjectFormat,
2336 version: u8,
2337 pack_names: &[String],
2338 objects: &[MultiPackIndexEntry],
2339 ) -> Result<Vec<u8>> {
2340 Self::write_with_reverse_index(format, version, pack_names, objects, None)
2341 }
2342
2343 pub fn write_with_reverse_index(
2352 format: ObjectFormat,
2353 version: u8,
2354 pack_names: &[String],
2355 objects: &[MultiPackIndexEntry],
2356 preferred_pack: Option<u32>,
2357 ) -> Result<Vec<u8>> {
2358 Self::write_with_bitmap_packs(format, version, pack_names, objects, preferred_pack, None)
2359 }
2360
2361 pub fn write_with_bitmap_packs(
2362 format: ObjectFormat,
2363 version: u8,
2364 pack_names: &[String],
2365 objects: &[MultiPackIndexEntry],
2366 preferred_pack: Option<u32>,
2367 bitmapped_packs: Option<&[MultiPackBitmapPack]>,
2368 ) -> Result<Vec<u8>> {
2369 if let Some(preferred) = preferred_pack
2370 && preferred as usize >= pack_names.len()
2371 {
2372 return Err(GitError::InvalidFormat(format!(
2373 "preferred pack {preferred} out of range for {} packs",
2374 pack_names.len()
2375 )));
2376 }
2377 if version != 1 && version != 2 {
2378 return Err(GitError::Unsupported(format!(
2379 "multi-pack-index version {version}"
2380 )));
2381 }
2382 if pack_names.len() > u32::MAX as usize {
2383 return Err(GitError::InvalidFormat(
2384 "too many multi-pack-index packs".into(),
2385 ));
2386 }
2387 if objects.len() > u32::MAX as usize {
2388 return Err(GitError::InvalidFormat(
2389 "too many multi-pack-index objects".into(),
2390 ));
2391 }
2392 if let Some(bitmapped_packs) = bitmapped_packs {
2393 if bitmapped_packs.len() != pack_names.len() {
2394 return Err(GitError::InvalidFormat(
2395 "multi-pack-index BTMP pack count mismatch".into(),
2396 ));
2397 }
2398 for pack in bitmapped_packs {
2399 let bitmap_end = u64::from(pack.bitmap_pos)
2400 .checked_add(u64::from(pack.bitmap_nr))
2401 .ok_or_else(|| {
2402 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
2403 })?;
2404 if bitmap_end > objects.len() as u64 {
2405 return Err(GitError::InvalidFormat(
2406 "multi-pack-index BTMP range points past object table".into(),
2407 ));
2408 }
2409 }
2410 }
2411 validate_midx_pack_names(pack_names)?;
2412 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2413 return Err(GitError::InvalidFormat(
2414 "multi-pack-index v1 pack names must be sorted".into(),
2415 ));
2416 }
2417
2418 let mut objects = objects.iter().collect::<Vec<_>>();
2419 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2420 let mut previous_oid: Option<&ObjectId> = None;
2421 for object in &objects {
2422 if object.oid.format() != format {
2423 return Err(GitError::InvalidObjectId(
2424 "multi-pack-index object format does not match index format".into(),
2425 ));
2426 }
2427 if let Some(previous) = previous_oid
2428 && previous.as_bytes() == object.oid.as_bytes()
2429 {
2430 return Err(GitError::InvalidFormat(
2431 "multi-pack-index contains duplicate object ids".into(),
2432 ));
2433 }
2434 if object.pack_int_id as usize >= pack_names.len() {
2435 return Err(GitError::InvalidFormat(
2436 "multi-pack-index object points past pack table".into(),
2437 ));
2438 }
2439 previous_oid = Some(&object.oid);
2440 }
2441
2442 let mut large_offsets = Vec::new();
2443 let mut chunks = vec![
2444 (*b"PNAM", write_midx_pack_names(pack_names)),
2445 (*b"OIDF", write_midx_oid_fanout(&objects)?),
2446 (*b"OIDL", write_midx_oid_lookup(&objects)),
2447 (
2448 *b"OOFF",
2449 write_midx_object_offsets(&objects, &mut large_offsets)?,
2450 ),
2451 ];
2452 if !large_offsets.is_empty() {
2453 chunks.push((*b"LOFF", large_offsets));
2454 }
2455 if let Some(preferred) = preferred_pack {
2456 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2459 pseudo.sort_by_key(|&midx_pos| {
2460 let object = objects[midx_pos as usize];
2461 (
2462 object.pack_int_id != preferred,
2463 object.pack_int_id,
2464 object.offset,
2465 )
2466 });
2467 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2468 for midx_pos in pseudo {
2469 ridx.extend_from_slice(&midx_pos.to_be_bytes());
2470 }
2471 chunks.push((*b"RIDX", ridx));
2472 }
2473 if let Some(bitmapped_packs) = bitmapped_packs {
2474 let mut btmp = Vec::with_capacity(bitmapped_packs.len() * 8);
2475 for pack in bitmapped_packs {
2476 btmp.extend_from_slice(&pack.bitmap_pos.to_be_bytes());
2477 btmp.extend_from_slice(&pack.bitmap_nr.to_be_bytes());
2478 }
2479 chunks.push((*b"BTMP", btmp));
2480 }
2481 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2482 }
2483
2484 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2485 Self::parse_impl(bytes, format, true)
2486 }
2487
2488 pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2489 Self::parse_impl(bytes, format, false)
2490 }
2491
2492 fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
2493 let hash_len = format.raw_len();
2494 if bytes.len() < 12 + 12 + hash_len {
2495 return Err(GitError::InvalidFormat(
2496 "multi-pack-index file too short".into(),
2497 ));
2498 }
2499 if &bytes[..4] != b"MIDX" {
2500 return Err(GitError::InvalidFormat(
2501 "missing multi-pack-index signature".into(),
2502 ));
2503 }
2504 let version = bytes[4];
2505 if version != 1 && version != 2 {
2506 return Err(GitError::Unsupported(format!(
2507 "multi-pack-index version {version}"
2508 )));
2509 }
2510 let hash_id = bytes[5];
2511 if u32::from(hash_id) != hash_function_id(format) {
2512 return Err(GitError::InvalidFormat(format!(
2513 "multi-pack-index hash id {hash_id} does not match {}",
2514 format.name()
2515 )));
2516 }
2517 let chunk_count = bytes[6] as usize;
2518 let base_midx_count = bytes[7];
2519 if base_midx_count != 0 {
2520 return Err(GitError::Unsupported(format!(
2521 "multi-pack-index base count {base_midx_count}"
2522 )));
2523 }
2524 let pack_count = u32_be(&bytes[8..12]);
2525 let lookup_len = (chunk_count + 1)
2526 .checked_mul(12)
2527 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2528 let data_start = 12usize
2529 .checked_add(lookup_len)
2530 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2531 let checksum_offset = bytes.len() - hash_len;
2532 if data_start > checksum_offset {
2533 return Err(GitError::InvalidFormat(
2534 "truncated multi-pack-index chunk lookup".into(),
2535 ));
2536 }
2537
2538 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2539 if verify_checksum {
2540 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2541 if actual_checksum != checksum {
2542 return Err(GitError::InvalidFormat(format!(
2543 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2544 )));
2545 }
2546 }
2547
2548 let mut entries = Vec::with_capacity(chunk_count + 1);
2549 let mut offset = 12usize;
2550 for _ in 0..=chunk_count {
2551 let id = [
2552 bytes[offset],
2553 bytes[offset + 1],
2554 bytes[offset + 2],
2555 bytes[offset + 3],
2556 ];
2557 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2558 entries.push((id, chunk_offset));
2559 offset += 12;
2560 }
2561 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2562 return Err(GitError::InvalidFormat(
2563 "multi-pack-index chunk lookup is empty".into(),
2564 ));
2565 };
2566 if terminator_id != [0, 0, 0, 0] {
2567 return Err(GitError::InvalidFormat(
2568 "multi-pack-index chunk lookup missing terminator".into(),
2569 ));
2570 }
2571 if terminator_offset != checksum_offset as u64 {
2572 return Err(GitError::InvalidFormat(
2573 "multi-pack-index terminator does not point at checksum".into(),
2574 ));
2575 }
2576
2577 let mut chunks = Vec::with_capacity(chunk_count);
2578 let mut previous_offset = data_start as u64;
2579 let mut reported_unaligned = false;
2580 for pair in entries.windows(2) {
2581 let (id, chunk_offset) = pair[0];
2582 let (_next_id, next_offset) = pair[1];
2583 if id == [0, 0, 0, 0] {
2584 return Err(GitError::InvalidFormat(
2585 "multi-pack-index chunk id is zero before terminator".into(),
2586 ));
2587 }
2588 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2589 return Err(GitError::InvalidFormat(
2590 "multi-pack-index chunk offsets are not monotonic".into(),
2591 ));
2592 }
2593 if chunk_offset % 4 != 0 && !reported_unaligned {
2594 eprintln!(
2595 "error: chunk id {:08x} not 4-byte aligned",
2596 u32::from_be_bytes(id)
2597 );
2598 reported_unaligned = true;
2599 }
2600 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2601 return Err(GitError::InvalidFormat(
2602 "multi-pack-index chunk length is invalid".into(),
2603 ));
2604 }
2605 chunks.push(MultiPackIndexChunk {
2606 id,
2607 offset: chunk_offset,
2608 len: next_offset - chunk_offset,
2609 });
2610 previous_offset = chunk_offset;
2611 }
2612
2613 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2614 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2615 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2616 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2617 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2618 let bitmapped_packs =
2619 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2620
2621 Ok(Self {
2622 version,
2623 format,
2624 pack_count,
2625 pack_names,
2626 object_count: object_count as u32,
2627 fanout,
2628 objects,
2629 reverse_index,
2630 bitmapped_packs,
2631 chunks,
2632 checksum,
2633 })
2634 }
2635
2636 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2637 self.objects
2638 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2639 .ok()
2640 .map(|idx| &self.objects[idx])
2641 }
2642}
2643
2644impl MultiPackIndexOidLookup {
2645 pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
2646 let raw = bytes.as_bytes();
2647 let hash_len = format.raw_len();
2648 if raw.len() < 12 + 12 + hash_len {
2649 return Err(GitError::InvalidFormat(
2650 "multi-pack-index file too short".into(),
2651 ));
2652 }
2653 if &raw[..4] != b"MIDX" {
2654 return Err(GitError::InvalidFormat(
2655 "missing multi-pack-index signature".into(),
2656 ));
2657 }
2658 let version = raw[4];
2659 if version != 1 && version != 2 {
2660 return Err(GitError::Unsupported(format!(
2661 "multi-pack-index version {version}"
2662 )));
2663 }
2664 let hash_id = raw[5];
2665 if u32::from(hash_id) != hash_function_id(format) {
2666 return Err(GitError::InvalidFormat(format!(
2667 "multi-pack-index hash id {hash_id} does not match {}",
2668 format.name()
2669 )));
2670 }
2671 let chunk_count = raw[6] as usize;
2672 let base_midx_count = raw[7];
2673 if base_midx_count != 0 {
2674 return Err(GitError::Unsupported(format!(
2675 "multi-pack-index base count {base_midx_count}"
2676 )));
2677 }
2678 let pack_count = u32_be(&raw[8..12]);
2679 let lookup_len = (chunk_count + 1)
2680 .checked_mul(12)
2681 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2682 let data_start = 12usize
2683 .checked_add(lookup_len)
2684 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2685 let checksum_offset = raw.len() - hash_len;
2686 if data_start > checksum_offset {
2687 return Err(GitError::InvalidFormat(
2688 "truncated multi-pack-index chunk lookup".into(),
2689 ));
2690 }
2691
2692 let mut entries = Vec::with_capacity(chunk_count + 1);
2693 let mut offset = 12usize;
2694 for _ in 0..=chunk_count {
2695 let id = [
2696 raw[offset],
2697 raw[offset + 1],
2698 raw[offset + 2],
2699 raw[offset + 3],
2700 ];
2701 let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
2702 entries.push((id, chunk_offset));
2703 offset += 12;
2704 }
2705 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2706 return Err(GitError::InvalidFormat(
2707 "multi-pack-index chunk lookup is empty".into(),
2708 ));
2709 };
2710 if terminator_id != [0, 0, 0, 0] {
2711 return Err(GitError::InvalidFormat(
2712 "multi-pack-index chunk lookup missing terminator".into(),
2713 ));
2714 }
2715 if terminator_offset != checksum_offset as u64 {
2716 return Err(GitError::InvalidFormat(
2717 "multi-pack-index terminator does not point at checksum".into(),
2718 ));
2719 }
2720
2721 let mut chunks = Vec::with_capacity(chunk_count);
2722 let mut previous_offset = data_start as u64;
2723 let mut reported_unaligned = false;
2724 for pair in entries.windows(2) {
2725 let (id, chunk_offset) = pair[0];
2726 let (_next_id, next_offset) = pair[1];
2727 if id == [0, 0, 0, 0] {
2728 return Err(GitError::InvalidFormat(
2729 "multi-pack-index chunk id is zero before terminator".into(),
2730 ));
2731 }
2732 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2733 return Err(GitError::InvalidFormat(
2734 "multi-pack-index chunk offsets are not monotonic".into(),
2735 ));
2736 }
2737 if chunk_offset % 4 != 0 && !reported_unaligned {
2738 eprintln!(
2739 "error: chunk id {:08x} not 4-byte aligned",
2740 u32::from_be_bytes(id)
2741 );
2742 reported_unaligned = true;
2743 }
2744 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2745 return Err(GitError::InvalidFormat(
2746 "multi-pack-index chunk length is invalid".into(),
2747 ));
2748 }
2749 chunks.push(MultiPackIndexChunk {
2750 id,
2751 offset: chunk_offset,
2752 len: next_offset - chunk_offset,
2753 });
2754 previous_offset = chunk_offset;
2755 }
2756
2757 let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
2758 let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
2759 let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
2760 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2761 let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2762 GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2763 })?;
2764 if oid_lookup.len() != expected_len {
2765 return Err(GitError::InvalidFormat(
2766 "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
2767 ));
2768 }
2769 let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
2770 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2771 let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2772 GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2773 })?;
2774 if object_offsets.len() != expected_offsets_len {
2775 return Err(GitError::InvalidFormat(
2776 "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
2777 ));
2778 }
2779 let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
2780 if let Some(large_offsets) = large_offsets
2781 && large_offsets.len() % 8 != 0
2782 {
2783 return Err(GitError::InvalidFormat(
2784 "multi-pack-index LOFF chunk has invalid length".into(),
2785 ));
2786 }
2787 let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
2788 let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
2789 let (large_offsets_offset, large_offsets_len) = match large_offsets {
2790 Some(large_offsets) => (
2791 Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
2792 large_offsets.len(),
2793 ),
2794 None => (None, 0),
2795 };
2796 Ok(Self {
2797 format,
2798 pack_count,
2799 pack_names,
2800 fanout,
2801 object_count,
2802 oid_lookup_offset,
2803 object_offsets_offset,
2804 large_offsets_offset,
2805 large_offsets_len,
2806 bytes,
2807 })
2808 }
2809
2810 pub fn contains(&self, oid: &ObjectId) -> bool {
2811 self.find_position(oid).is_some()
2812 }
2813
2814 pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2815 let Some(position) = self.find_position(oid) else {
2816 return Ok(None);
2817 };
2818 let bytes = self.bytes.as_bytes();
2819 let hash_len = self.format.raw_len();
2820 let oid_start = self
2821 .oid_lookup_offset
2822 .checked_add(position * hash_len)
2823 .ok_or_else(|| {
2824 GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2825 })?;
2826 let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
2827 let offset_start = self
2828 .object_offsets_offset
2829 .checked_add(position * 8)
2830 .ok_or_else(|| {
2831 GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2832 })?;
2833 let data = &bytes[offset_start..offset_start + 8];
2834 let pack_int_id = u32_be(&data[..4]);
2835 if pack_int_id >= self.pack_count {
2836 return Err(GitError::InvalidFormat(
2837 "multi-pack-index object points past pack table".into(),
2838 ));
2839 }
2840 let raw_offset = u32_be(&data[4..8]);
2841 let offset = if raw_offset & 0x8000_0000 == 0 {
2842 u64::from(raw_offset)
2843 } else {
2844 let Some(large_offsets_offset) = self.large_offsets_offset else {
2845 return Err(GitError::InvalidFormat(
2846 "multi-pack-index large offset missing LOFF chunk".into(),
2847 ));
2848 };
2849 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2850 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2851 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2852 })?;
2853 let large_end = large_start.checked_add(8).ok_or_else(|| {
2854 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2855 })?;
2856 if large_end > self.large_offsets_len {
2857 return Err(GitError::InvalidFormat(
2858 "fatal: multi-pack-index large offset out of bounds".into(),
2859 ));
2860 }
2861 let start = large_offsets_offset + large_start;
2862 u64_be(&bytes[start..start + 8])
2863 };
2864 Ok(Some(MultiPackIndexEntry {
2865 oid,
2866 pack_int_id,
2867 offset,
2868 force_large_offset: raw_offset & 0x8000_0000 != 0,
2869 }))
2870 }
2871
2872 pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2873 self.pack_names
2874 .get(pack_int_id as usize)
2875 .map(String::as_str)
2876 }
2877
2878 fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2879 if oid.format() != self.format || self.object_count == 0 {
2880 return None;
2881 }
2882 let first = oid.as_bytes()[0] as usize;
2883 let start = if first == 0 {
2884 0
2885 } else {
2886 self.fanout[first - 1] as usize
2887 };
2888 let end = self.fanout[first] as usize;
2889 if start >= end || end > self.object_count {
2890 return None;
2891 }
2892 let hash_len = self.format.raw_len();
2893 let table_start = self.oid_lookup_offset;
2894 let table_end = table_start + self.object_count * hash_len;
2895 let bytes = self.bytes.as_bytes();
2896 let table = &bytes[table_start..table_end];
2897 let needle = oid.as_bytes();
2898 let mut low = start;
2899 let mut high = end;
2900 while low < high {
2901 let mid = low + (high - low) / 2;
2902 let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2903 match raw.cmp(needle) {
2904 std::cmp::Ordering::Less => low = mid + 1,
2905 std::cmp::Ordering::Equal => return Some(mid),
2906 std::cmp::Ordering::Greater => high = mid,
2907 }
2908 }
2909 None
2910 }
2911}
2912
2913fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2914 for name in pack_names {
2915 if name.is_empty() {
2916 return Err(GitError::InvalidFormat(
2917 "multi-pack-index pack name is empty".into(),
2918 ));
2919 }
2920 if name
2921 .bytes()
2922 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2923 {
2924 return Err(GitError::InvalidFormat(
2925 "multi-pack-index pack name contains an invalid byte".into(),
2926 ));
2927 }
2928 }
2929 Ok(())
2930}
2931
2932fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2933 let mut out = Vec::new();
2934 for name in pack_names {
2935 out.extend_from_slice(name.as_bytes());
2936 out.push(0);
2937 }
2938 while out.len() % 4 != 0 {
2939 out.push(0);
2940 }
2941 out
2942}
2943
2944fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2945 let mut counts = [0u32; 256];
2946 for object in objects {
2947 let first = object.oid.as_bytes()[0] as usize;
2948 counts[first] = counts[first]
2949 .checked_add(1)
2950 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2951 }
2952 let mut running = 0u32;
2953 let mut out = Vec::with_capacity(256 * 4);
2954 for count in counts {
2955 running = running
2956 .checked_add(count)
2957 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2958 out.extend_from_slice(&running.to_be_bytes());
2959 }
2960 Ok(out)
2961}
2962
2963fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2964 let mut out = Vec::new();
2965 for object in objects {
2966 out.extend_from_slice(object.oid.as_bytes());
2967 }
2968 out
2969}
2970
2971fn write_midx_object_offsets(
2972 objects: &[&MultiPackIndexEntry],
2973 large_offsets: &mut Vec<u8>,
2974) -> Result<Vec<u8>> {
2975 let mut out = Vec::new();
2976 for object in objects {
2977 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2978 if object.offset < 0x8000_0000 && !object.force_large_offset {
2979 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2980 } else {
2981 let large_idx = large_offsets.len() / 8;
2982 if large_idx > 0x7fff_ffff {
2983 return Err(GitError::InvalidFormat(
2984 "too many multi-pack-index large offsets".into(),
2985 ));
2986 }
2987 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2988 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2989 }
2990 }
2991 Ok(out)
2992}
2993
2994fn write_multi_pack_index_chunks(
2995 format: ObjectFormat,
2996 version: u8,
2997 pack_count: u32,
2998 chunks: &[([u8; 4], Vec<u8>)],
2999) -> Result<Vec<u8>> {
3000 if chunks.len() > u8::MAX as usize {
3001 return Err(GitError::InvalidFormat(
3002 "too many multi-pack-index chunks".into(),
3003 ));
3004 }
3005 let lookup_len = (chunks.len() + 1)
3006 .checked_mul(12)
3007 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3008 let mut out = Vec::new();
3009 out.extend_from_slice(b"MIDX");
3010 out.push(version);
3011 out.push(hash_function_id(format) as u8);
3012 out.push(chunks.len() as u8);
3013 out.push(0);
3014 out.extend_from_slice(&pack_count.to_be_bytes());
3015 let mut chunk_offset = (12usize)
3016 .checked_add(lookup_len)
3017 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
3018 as u64;
3019 for (id, data) in chunks {
3020 out.extend_from_slice(id);
3021 out.extend_from_slice(&chunk_offset.to_be_bytes());
3022 chunk_offset = chunk_offset
3023 .checked_add(data.len() as u64)
3024 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
3025 }
3026 out.extend_from_slice(&[0, 0, 0, 0]);
3027 out.extend_from_slice(&chunk_offset.to_be_bytes());
3028 for (_id, data) in chunks {
3029 out.extend_from_slice(data);
3030 }
3031 let checksum = sley_core::digest_bytes(format, &out)?;
3032 out.extend_from_slice(checksum.as_bytes());
3033 Ok(out)
3034}
3035
3036#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3037struct EntryHeader {
3038 kind: PackObjectKind,
3039 size: u64,
3040}
3041
3042pub trait PackDeltaCache {
3056 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
3058 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
3060}
3061
3062struct NoopDeltaCache;
3065
3066impl PackDeltaCache for NoopDeltaCache {
3067 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
3068 None
3069 }
3070 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
3071}
3072
3073thread_local! {
3079 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
3080}
3081
3082const MAX_INFLATE_EXPANSION: usize = 1032;
3093
3094const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3100
3101fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3109 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3110 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3112}
3113
3114fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3123 INFLATE.with(|cell| {
3124 let mut decompress = cell.borrow_mut();
3125 decompress.reset(true);
3126 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3127 let mut input = compressed;
3128 let mut consumed_total = 0usize;
3129 loop {
3130 if out.len() == out.capacity() {
3133 out.reserve(out.len().max(64));
3134 }
3135 let before_in = decompress.total_in();
3136 let before_out = decompress.total_out();
3137 let status = decompress
3138 .decompress_vec(input, out, flate2::FlushDecompress::None)
3139 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3140 let consumed = (decompress.total_in() - before_in) as usize;
3141 let produced = decompress.total_out() - before_out;
3142 input = &input[consumed..];
3143 consumed_total += consumed;
3144 match status {
3145 flate2::Status::StreamEnd => return Ok(consumed_total),
3146 _ if consumed == 0 && produced == 0 => {
3147 return Err(GitError::InvalidObject("truncated zlib stream".into()));
3148 }
3149 _ => {}
3150 }
3151 }
3152 })
3153}
3154
3155fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3159 INFLATE.with(|cell| {
3160 let mut decompress = cell.borrow_mut();
3161 decompress.reset(true);
3162 out.reserve(max_out.max(16));
3163 let mut input = compressed;
3164 while out.len() < max_out {
3165 if out.len() == out.capacity() {
3166 out.reserve(out.len().max(16));
3167 }
3168 let before_in = decompress.total_in();
3169 let before_out = decompress.total_out();
3170 let status = decompress
3171 .decompress_vec(input, out, flate2::FlushDecompress::None)
3172 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3173 let consumed = (decompress.total_in() - before_in) as usize;
3174 let produced = decompress.total_out() - before_out;
3175 input = &input[consumed..];
3176 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3177 break;
3178 }
3179 }
3180 Ok(())
3181 })
3182}
3183
3184pub fn read_object_at_arc<F>(
3192 pack_bytes: &[u8],
3193 offset: u64,
3194 format: ObjectFormat,
3195 resolve_ref_base: F,
3196) -> Result<Arc<EncodedObject>>
3197where
3198 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3199{
3200 read_object_at_with_cache_arc(
3201 pack_bytes,
3202 offset,
3203 format,
3204 resolve_ref_base,
3205 &NoopDeltaCache,
3206 )
3207}
3208
3209pub fn read_object_at_with_cache_arc<F, C>(
3218 pack_bytes: &[u8],
3219 offset: u64,
3220 format: ObjectFormat,
3221 mut resolve_ref_base: F,
3222 cache: &C,
3223) -> Result<Arc<EncodedObject>>
3224where
3225 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3226 C: PackDeltaCache + ?Sized,
3227{
3228 read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3229}
3230
3231fn read_object_at_inner<F, C>(
3232 pack_bytes: &[u8],
3233 offset: u64,
3234 format: ObjectFormat,
3235 resolve_ref_base: &mut F,
3236 cache: &C,
3237) -> Result<Arc<EncodedObject>>
3238where
3239 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3240 C: PackDeltaCache + ?Sized,
3241{
3242 if let Some(object) = cache.get(offset) {
3245 return Ok(object);
3246 }
3247 let trailer_offset = pack_bytes
3248 .len()
3249 .checked_sub(format.raw_len())
3250 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3251 let mut cursor = usize::try_from(offset)
3252 .ok()
3253 .filter(|&value| value < trailer_offset)
3254 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3255 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3256 let base = match header.kind {
3257 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3258 pack_bytes,
3259 &mut cursor,
3260 offset,
3261 )?)),
3262 PackObjectKind::RefDelta => {
3263 let hash_len = format.raw_len();
3264 if cursor + hash_len > trailer_offset {
3265 return Err(GitError::InvalidFormat(
3266 "truncated ref-delta base object id".into(),
3267 ));
3268 }
3269 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3270 cursor += hash_len;
3271 Some(DeltaBase::Ref(oid))
3272 }
3273 _ => None,
3274 };
3275 let mut body = Vec::new();
3276 inflate_into(
3277 &pack_bytes[cursor..trailer_offset],
3278 &mut body,
3279 header.size.min(usize::MAX as u64) as usize,
3280 )?;
3281 if body.len() as u64 != header.size {
3282 return Err(GitError::InvalidObject(format!(
3283 "pack object declared {} bytes, decoded {}",
3284 header.size,
3285 body.len()
3286 )));
3287 }
3288 let object = match base {
3289 None => {
3290 let object_type = match header.kind {
3291 PackObjectKind::Commit => ObjectType::Commit,
3292 PackObjectKind::Tree => ObjectType::Tree,
3293 PackObjectKind::Blob => ObjectType::Blob,
3294 PackObjectKind::Tag => ObjectType::Tag,
3295 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3296 return Err(GitError::InvalidFormat(
3297 "delta pack entry decoded without a base".into(),
3298 ));
3299 }
3300 };
3301 Arc::new(EncodedObject::new(object_type, body))
3302 }
3303 Some(DeltaBase::Offset(base_offset)) => {
3304 let base =
3305 read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
3306 let resolved = apply_pack_delta(&base.body, &body)?;
3307 Arc::new(EncodedObject::new(base.object_type, resolved))
3308 }
3309 Some(DeltaBase::Ref(base_oid)) => {
3310 let base = resolve_ref_base(&base_oid)?
3311 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
3312 let resolved = apply_pack_delta(&base.body, &body)?;
3313 Arc::new(EncodedObject::new(base.object_type, resolved))
3314 }
3315 };
3316 cache.insert(offset, Arc::clone(&object));
3320 Ok(object)
3321}
3322
3323pub fn read_object_header_at<F>(
3333 pack_bytes: &[u8],
3334 offset: u64,
3335 format: ObjectFormat,
3336 mut resolve_ref_base_type: F,
3337) -> Result<(ObjectType, u64)>
3338where
3339 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3340{
3341 read_object_header_at_inner(
3342 pack_bytes,
3343 offset,
3344 format,
3345 &mut resolve_ref_base_type,
3346 &mut NoopHeaderTypeCache,
3347 )
3348}
3349
3350pub trait HeaderTypeCache {
3367 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
3369 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
3371}
3372
3373struct NoopHeaderTypeCache;
3374
3375impl HeaderTypeCache for NoopHeaderTypeCache {
3376 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
3377 None
3378 }
3379 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
3380}
3381
3382pub fn read_object_header_at_with_cache<F, C>(
3388 pack_bytes: &[u8],
3389 offset: u64,
3390 format: ObjectFormat,
3391 mut resolve_ref_base_type: F,
3392 type_cache: &mut C,
3393) -> Result<(ObjectType, u64)>
3394where
3395 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3396 C: HeaderTypeCache + ?Sized,
3397{
3398 if let Some(header) = type_cache.get(offset) {
3399 return Ok(header);
3400 }
3401 read_object_header_at_inner(
3402 pack_bytes,
3403 offset,
3404 format,
3405 &mut resolve_ref_base_type,
3406 type_cache,
3407 )
3408}
3409
3410fn read_object_header_at_inner<F, C>(
3411 pack_bytes: &[u8],
3412 offset: u64,
3413 format: ObjectFormat,
3414 resolve_ref_base_type: &mut F,
3415 type_cache: &mut C,
3416) -> Result<(ObjectType, u64)>
3417where
3418 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3419 C: HeaderTypeCache + ?Sized,
3420{
3421 let trailer_offset = pack_bytes
3422 .len()
3423 .checked_sub(format.raw_len())
3424 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3425 let mut cursor = usize::try_from(offset)
3426 .ok()
3427 .filter(|&value| value < trailer_offset)
3428 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3429 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3430 let resolved = match header.kind {
3431 PackObjectKind::Commit => (ObjectType::Commit, header.size),
3432 PackObjectKind::Tree => (ObjectType::Tree, header.size),
3433 PackObjectKind::Blob => (ObjectType::Blob, header.size),
3434 PackObjectKind::Tag => (ObjectType::Tag, header.size),
3435 PackObjectKind::OfsDelta => {
3436 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3437 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3438 let base_type = match type_cache.get(base_offset) {
3441 Some((base_type, _)) => base_type,
3442 None => {
3443 let (base_type, _) = read_object_header_at_inner(
3444 pack_bytes,
3445 base_offset,
3446 format,
3447 resolve_ref_base_type,
3448 type_cache,
3449 )?;
3450 base_type
3451 }
3452 };
3453 (base_type, size)
3454 }
3455 PackObjectKind::RefDelta => {
3456 let hash_len = format.raw_len();
3457 if cursor + hash_len > trailer_offset {
3458 return Err(GitError::InvalidFormat(
3459 "truncated ref-delta base object id".into(),
3460 ));
3461 }
3462 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3463 cursor += hash_len;
3464 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3465 let base_type = resolve_ref_base_type(&oid)?
3466 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3467 (base_type, size)
3468 }
3469 };
3470 type_cache.put(offset, resolved);
3473 Ok(resolved)
3474}
3475
3476const DELTA_HEADER_PREFIX_LEN: usize = 32;
3480
3481fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3484 let mut prefix = Vec::new();
3485 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3486 decoded_delta_result_size(&prefix)
3487}
3488
3489fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3490 let first = next_byte(bytes, offset)?;
3491 let mut size = u64::from(first & 0x0f);
3492 let kind = match (first >> 4) & 0x07 {
3493 1 => PackObjectKind::Commit,
3494 2 => PackObjectKind::Tree,
3495 3 => PackObjectKind::Blob,
3496 4 => PackObjectKind::Tag,
3497 6 => PackObjectKind::OfsDelta,
3498 7 => PackObjectKind::RefDelta,
3499 other => {
3500 return Err(GitError::InvalidFormat(format!(
3501 "invalid pack object type {other}"
3502 )));
3503 }
3504 };
3505 let mut shift = 4;
3506 let mut byte = first;
3507 while byte & 0x80 != 0 {
3508 byte = next_byte(bytes, offset)?;
3509 let part = u64::from(byte & 0x7f);
3510 size = size
3511 .checked_add(
3512 part.checked_shl(shift)
3513 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3514 )
3515 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3516 shift += 7;
3517 }
3518 Ok(EntryHeader { kind, size })
3519}
3520
3521fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3522 let mut byte = next_byte(bytes, offset)?;
3523 let mut relative = u64::from(byte & 0x7f);
3524 while byte & 0x80 != 0 {
3525 byte = next_byte(bytes, offset)?;
3526 relative = relative
3527 .checked_add(1)
3528 .and_then(|value| value.checked_shl(7))
3529 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3530 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3531 }
3532 entry_offset
3533 .checked_sub(relative)
3534 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3535}
3536
3537fn resolve_pack_entries<F>(
3538 parsed: Vec<ParsedPackEntry>,
3539 format: ObjectFormat,
3540 external_base: &mut F,
3541) -> Result<Vec<PackObject>>
3542where
3543 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3544{
3545 let mut offset_to_index = HashMap::with_capacity(parsed.len());
3546 for (idx, entry) in parsed.iter().enumerate() {
3547 offset_to_index.insert(parsed_entry_offset(entry), idx);
3548 }
3549
3550 let mut resolved = vec![None; parsed.len()];
3551 let mut oid_to_index = HashMap::new();
3552 let mut unresolved = 0usize;
3553 for (idx, entry) in parsed.iter().enumerate() {
3554 match entry {
3555 ParsedPackEntry::Resolved(object) => {
3556 oid_to_index.insert(object.entry.oid, idx);
3557 resolved[idx] = Some(object.clone());
3558 }
3559 ParsedPackEntry::Delta { .. } => unresolved += 1,
3560 }
3561 }
3562
3563 while unresolved != 0 {
3564 let mut progress = false;
3565 for idx in 0..parsed.len() {
3566 if resolved[idx].is_some() {
3567 continue;
3568 }
3569 let ParsedPackEntry::Delta {
3570 base,
3571 compressed_size,
3572 delta_size,
3573 offset,
3574 delta,
3575 } = &parsed[idx]
3576 else {
3577 continue;
3578 };
3579 let Some(base_object) = delta_base_object(
3580 base,
3581 &offset_to_index,
3582 &oid_to_index,
3583 &resolved,
3584 external_base,
3585 )?
3586 else {
3587 continue;
3588 };
3589 let body = apply_pack_delta(base_object.body(), delta)?;
3590 let object = EncodedObject::new(base_object.object_type(), body);
3591 let oid = object.object_id(format)?;
3592 let pack_object = PackObject {
3593 entry: PackEntry {
3594 oid,
3595 compressed_size: *compressed_size,
3596 uncompressed_size: object.body.len() as u64,
3597 offset: *offset,
3598 },
3599 object,
3600 };
3601 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3602 return Err(GitError::InvalidObject(
3603 "resolved delta size does not match delta header".into(),
3604 ));
3605 }
3606 if *delta_size != delta.len() as u64 {
3607 return Err(GitError::InvalidObject(format!(
3608 "pack delta declared {delta_size} bytes, decoded {}",
3609 delta.len()
3610 )));
3611 }
3612 oid_to_index.insert(oid, idx);
3613 resolved[idx] = Some(pack_object);
3614 unresolved -= 1;
3615 progress = true;
3616 }
3617 if !progress {
3618 return Err(GitError::Unsupported("unresolved delta base".into()));
3619 }
3620 }
3621
3622 resolved
3623 .into_iter()
3624 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3625 .collect()
3626}
3627
3628fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3629 match entry {
3630 ParsedPackEntry::Resolved(object) => object.entry.offset,
3631 ParsedPackEntry::Delta { offset, .. } => *offset,
3632 }
3633}
3634
3635enum DeltaBaseObject<'a> {
3636 Borrowed(&'a EncodedObject),
3637 Owned(EncodedObject),
3638}
3639
3640impl DeltaBaseObject<'_> {
3641 fn object_type(&self) -> ObjectType {
3642 match self {
3643 Self::Borrowed(object) => object.object_type,
3644 Self::Owned(object) => object.object_type,
3645 }
3646 }
3647
3648 fn body(&self) -> &[u8] {
3649 match self {
3650 Self::Borrowed(object) => &object.body,
3651 Self::Owned(object) => &object.body,
3652 }
3653 }
3654}
3655
3656fn delta_base_object<'a, F>(
3657 base: &DeltaBase,
3658 offset_to_index: &HashMap<u64, usize>,
3659 oid_to_index: &HashMap<ObjectId, usize>,
3660 resolved: &'a [Option<PackObject>],
3661 external_base: &mut F,
3662) -> Result<Option<DeltaBaseObject<'a>>>
3663where
3664 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3665{
3666 match base {
3667 DeltaBase::Offset(offset) => {
3668 let Some(index) = offset_to_index.get(offset).copied() else {
3669 return Err(GitError::InvalidFormat(format!(
3670 "ofs-delta base offset {offset} not found"
3671 )));
3672 };
3673 Ok(resolved[index]
3674 .as_ref()
3675 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3676 }
3677 DeltaBase::Ref(oid) => {
3678 if let Some(index) = oid_to_index.get(oid).copied() {
3679 return Ok(resolved[index]
3680 .as_ref()
3681 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3682 }
3683 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3684 }
3685 }
3686}
3687
3688fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3689 let mut cursor = 0usize;
3690 let base_size = read_delta_varint(delta, &mut cursor)?;
3691 if base_size != base.len() as u64 {
3692 return Err(GitError::InvalidObject(format!(
3693 "delta base size mismatch: expected {base_size}, got {}",
3694 base.len()
3695 )));
3696 }
3697 let result_size = read_delta_varint(delta, &mut cursor)?;
3698 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3707 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3708 while cursor < delta.len() {
3709 let command = delta[cursor];
3710 cursor += 1;
3711 if command & 0x80 != 0 {
3712 let copy_offset =
3713 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3714 let mut copy_size =
3715 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3716 if copy_size == 0 {
3717 copy_size = 0x10000;
3718 }
3719 let start = usize::try_from(copy_offset)
3720 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3721 let len = usize::try_from(copy_size)
3722 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3723 let end = start
3724 .checked_add(len)
3725 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3726 let Some(slice) = base.get(start..end) else {
3727 return Err(GitError::InvalidObject(
3728 "delta copy range exceeds base object".into(),
3729 ));
3730 };
3731 result.extend_from_slice(slice);
3732 } else if command != 0 {
3733 let len = usize::from(command);
3734 let end = cursor
3735 .checked_add(len)
3736 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3737 let Some(slice) = delta.get(cursor..end) else {
3738 return Err(GitError::InvalidObject(
3739 "delta insert range exceeds delta data".into(),
3740 ));
3741 };
3742 result.extend_from_slice(slice);
3743 cursor = end;
3744 } else {
3745 return Err(GitError::InvalidObject(
3746 "delta contains reserved zero command".into(),
3747 ));
3748 }
3749 }
3750 if result.len() as u64 != result_size {
3751 return Err(GitError::InvalidObject(format!(
3752 "delta result size mismatch: expected {result_size}, got {}",
3753 result.len()
3754 )));
3755 }
3756 Ok(result)
3757}
3758
3759fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3760 let mut cursor = 0usize;
3761 let _ = read_delta_varint(delta, &mut cursor)?;
3762 read_delta_varint(delta, &mut cursor)
3763}
3764
3765const DELTA_BLOCK_SIZE: usize = 16;
3768
3769const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3774
3775const DELTA_BUCKET_BITS: usize = 12;
3778const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3779const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3780
3781struct DeltaIndex<'a> {
3788 base: &'a [u8],
3789 blocks: Vec<DeltaBlock>,
3790 buckets: Vec<usize>,
3791}
3792
3793#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3794struct DeltaBlock {
3795 hash: u32,
3796 offset: usize,
3797}
3798
3799impl<'a> DeltaIndex<'a> {
3800 fn new(base: &'a [u8]) -> Self {
3801 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3802 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3803 for_each_delta_anchor(base.len(), |offset| {
3804 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3805 buckets[delta_bucket(hash) + 1] += 1;
3806 anchors.push(DeltaBlock { hash, offset });
3807 });
3808 for idx in 1..buckets.len() {
3809 buckets[idx] += buckets[idx - 1];
3810 }
3811
3812 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3813 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3814 for anchor in anchors {
3815 let bucket = delta_bucket(anchor.hash);
3816 let next = &mut next_offsets[bucket];
3817 blocks[*next] = anchor;
3818 *next += 1;
3819 }
3820
3821 Self {
3822 base,
3823 blocks,
3824 buckets,
3825 }
3826 }
3827
3828 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3829 let bucket = delta_bucket(hash);
3830 let start = self.buckets[bucket];
3831 let end = self.buckets[bucket + 1];
3832 self.blocks[start..end]
3833 .iter()
3834 .filter(move |block| block.hash == hash)
3835 }
3836
3837 fn has_hash(&self, hash: u32) -> bool {
3838 self.candidate_blocks(hash).next().is_some()
3839 }
3840
3841 fn has_shared_anchor(&self, target: &[u8]) -> bool {
3842 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3843 return false;
3844 }
3845 let last = target.len() - DELTA_BLOCK_SIZE;
3846 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3847 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3848 if self.has_hash(hash) {
3849 return true;
3850 }
3851 }
3852 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3853 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3854 if self.has_hash(hash) {
3855 return true;
3856 }
3857 }
3858 false
3859 }
3860
3861 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3863 if !self.has_shared_anchor(target) {
3864 return None;
3865 }
3866 let base = self.base;
3867 let mut delta = Vec::new();
3868 write_delta_varint(&mut delta, base.len() as u64);
3869 write_delta_varint(&mut delta, target.len() as u64);
3870
3871 let mut pending_insert_start = 0usize;
3872 let mut pos = 0usize;
3873 while pos < target.len() {
3874 let mut best_len = 0usize;
3875 let mut best_offset = 0usize;
3876 if pos + DELTA_BLOCK_SIZE <= target.len() {
3877 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3878 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3879 let candidate = candidate.offset;
3882 let max_len = (base.len() - candidate).min(target.len() - pos);
3883 let mut len = 0usize;
3884 while len < max_len && base[candidate + len] == target[pos + len] {
3885 len += 1;
3886 }
3887 if len > best_len {
3888 best_len = len;
3889 best_offset = candidate;
3890 }
3891 }
3892 }
3893
3894 if best_len >= DELTA_BLOCK_SIZE {
3895 if pending_insert_start < pos {
3896 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3897 }
3898 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3899 pos += best_len;
3900 pending_insert_start = pos;
3901 } else {
3902 pos += 1;
3903 }
3904 }
3905 if pending_insert_start < target.len() {
3906 write_delta_insert(&mut delta, &target[pending_insert_start..]);
3907 }
3908 Some(delta)
3909 }
3910}
3911
3912fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3913 if len < DELTA_BLOCK_SIZE {
3914 return;
3915 }
3916 len -= DELTA_BLOCK_SIZE;
3917 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3918 visit(offset);
3919 }
3920 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3921 visit(len);
3922 }
3923}
3924
3925fn delta_anchor_count(len: usize) -> usize {
3926 if len < DELTA_BLOCK_SIZE {
3927 return 0;
3928 }
3929 let last = len - DELTA_BLOCK_SIZE;
3930 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3931}
3932
3933fn delta_bucket(hash: u32) -> usize {
3934 (hash as usize) & DELTA_BUCKET_MASK
3935}
3936
3937const DELTA_MAX_CHAIN: usize = 64;
3940
3941fn block_hash(block: &[u8]) -> u32 {
3947 let mut hash = 0u32;
3948 for &byte in block {
3949 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3950 }
3951 hash
3952}
3953
3954#[derive(Debug, Clone, PartialEq, Eq)]
3956enum PlannedBase {
3957 None,
3959 InPack { base_idx: usize, delta: Vec<u8> },
3963 External { base_oid: ObjectId, delta: Vec<u8> },
3965}
3966
3967#[derive(Debug, Clone, PartialEq, Eq)]
3968struct PlannedEntry {
3969 base: PlannedBase,
3970}
3971
3972fn compress_planned_payloads(
3973 objects: &[&EncodedObject],
3974 plan: &[PlannedEntry],
3975 order: &[usize],
3976 compression_level: u32,
3977) -> Result<Vec<Vec<u8>>> {
3978 if order.is_empty() {
3979 return Ok(Vec::new());
3980 }
3981
3982 let worker_count = std::thread::available_parallelism()
3983 .map(|threads| threads.get())
3984 .unwrap_or(1)
3985 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3986 .min(order.len());
3987 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3988 let mut payloads = Vec::with_capacity(order.len());
3989 for &idx in order {
3990 payloads.push(compressed_payload(
3991 planned_payload(objects, plan, idx),
3992 compression_level,
3993 )?);
3994 }
3995 return Ok(payloads);
3996 }
3997
3998 let chunk_len = order.len().div_ceil(worker_count);
3999 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
4000 std::thread::scope(|scope| {
4001 let mut handles = Vec::new();
4002 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
4003 let chunk_start = chunk_idx * chunk_len;
4004 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
4005 let mut chunk_payloads = Vec::with_capacity(chunk.len());
4006 for (offset, &idx) in chunk.iter().enumerate() {
4007 chunk_payloads.push((
4008 chunk_start + offset,
4009 compressed_payload(
4010 planned_payload(objects, plan, idx),
4011 compression_level,
4012 )?,
4013 ));
4014 }
4015 Ok(chunk_payloads)
4016 }));
4017 }
4018
4019 let mut first_error = None;
4020 for handle in handles {
4021 match handle.join() {
4022 Ok(Ok(chunk_payloads)) => {
4023 if first_error.is_none() {
4024 for (pos, payload) in chunk_payloads {
4025 payloads[pos] = payload;
4026 }
4027 }
4028 }
4029 Ok(Err(err)) => {
4030 first_error.get_or_insert(err);
4031 }
4032 Err(_) => {
4033 first_error.get_or_insert_with(|| {
4034 GitError::InvalidObject("pack compression worker panicked".into())
4035 });
4036 }
4037 }
4038 }
4039
4040 match first_error {
4041 Some(err) => Err(err),
4042 None => Ok(()),
4043 }
4044 })?;
4045 Ok(payloads)
4046}
4047
4048fn planned_payload<'a>(
4049 objects: &'a [&'a EncodedObject],
4050 plan: &'a [PlannedEntry],
4051 idx: usize,
4052) -> &'a [u8] {
4053 match &plan[idx].base {
4054 PlannedBase::None => &objects[idx].body,
4055 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
4056 }
4057}
4058
4059fn compressed_payload(body: &[u8], compression_level: u32) -> Result<Vec<u8>> {
4060 let mut out = Vec::new();
4061 write_compressed_payload(&mut out, body, compression_level)?;
4062 Ok(out)
4063}
4064
4065const DELTA_MAX_EXTERNAL_BASES: usize = 64;
4068
4069struct DeltaWindowEntry<'a> {
4070 idx: usize,
4071 index: DeltaIndex<'a>,
4072}
4073
4074fn delta_type_rank(object_type: ObjectType) -> u8 {
4077 match object_type {
4078 ObjectType::Commit => 0,
4079 ObjectType::Tree => 1,
4080 ObjectType::Blob => 2,
4081 ObjectType::Tag => 3,
4082 }
4083}
4084
4085fn plan_pack_deltas(
4115 objects: &[&EncodedObject],
4116 object_ids: &[ObjectId],
4117 options: &PackWriteOptions,
4118) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4119 let count = objects.len();
4120 let mut plan: Vec<PlannedEntry> = (0..count)
4121 .map(|_| PlannedEntry {
4122 base: PlannedBase::None,
4123 })
4124 .collect();
4125
4126 let mut order: Vec<usize> = (0..count).collect();
4130 if options.reorder && options.depth > 0 {
4131 order.sort_by(|&left, &right| {
4132 delta_type_rank(objects[left].object_type)
4133 .cmp(&delta_type_rank(objects[right].object_type))
4134 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4135 .then_with(|| {
4136 object_ids[left]
4137 .as_bytes()
4138 .cmp(object_ids[right].as_bytes())
4139 })
4140 });
4141 }
4142
4143 if options.depth == 0 {
4144 return Ok((plan, order));
4145 }
4146
4147 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4150 Vec::with_capacity(options.thin_bases.len());
4151 for (oid, object) in &options.thin_bases {
4152 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4153 }
4154
4155 let mut depth = vec![0usize; count];
4158 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4160 std::collections::VecDeque::new();
4161
4162 for &idx in &order {
4163 let target = &objects[idx].body;
4164 let target_type = objects[idx].object_type;
4165
4166 let mut best_delta: Option<Vec<u8>> = None;
4167 let mut best_base = PlannedBase::None;
4168
4169 for base_entry in window.iter().rev() {
4171 let base_idx = base_entry.idx;
4172 if objects[base_idx].object_type != target_type {
4173 continue;
4174 }
4175 if depth[base_idx] + 1 > options.depth {
4178 continue;
4179 }
4180 let Some(delta) = base_entry.index.delta(target) else {
4181 continue;
4182 };
4183 if !delta_is_acceptable(&delta, target.len()) {
4184 continue;
4185 }
4186 if best_delta
4187 .as_ref()
4188 .is_none_or(|current| delta.len() < current.len())
4189 {
4190 best_delta = Some(delta);
4191 best_base = PlannedBase::InPack {
4192 base_idx,
4193 delta: Vec::new(),
4194 };
4195 }
4196 }
4197
4198 for (base_oid, base_type, base_index) in
4201 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4202 {
4203 if *base_type != target_type {
4204 continue;
4205 }
4206 let Some(delta) = base_index.delta(target) else {
4207 continue;
4208 };
4209 if !delta_is_acceptable(&delta, target.len()) {
4210 continue;
4211 }
4212 if best_delta
4213 .as_ref()
4214 .is_none_or(|current| delta.len() < current.len())
4215 {
4216 best_delta = Some(delta);
4217 best_base = PlannedBase::External {
4218 base_oid: *base_oid,
4219 delta: Vec::new(),
4220 };
4221 }
4222 }
4223
4224 if let Some(delta) = best_delta {
4225 match best_base {
4226 PlannedBase::InPack { base_idx, .. } => {
4227 depth[idx] = depth[base_idx] + 1;
4228 plan[idx].base = PlannedBase::InPack { base_idx, delta };
4229 }
4230 PlannedBase::External { base_oid, .. } => {
4231 depth[idx] = 1;
4232 plan[idx].base = PlannedBase::External { base_oid, delta };
4233 }
4234 PlannedBase::None => {}
4235 }
4236 }
4237
4238 window.push_back(DeltaWindowEntry {
4240 idx,
4241 index: DeltaIndex::new(&objects[idx].body),
4242 });
4243 while window.len() > options.window {
4244 window.pop_front();
4245 }
4246 }
4247
4248 Ok((plan, order))
4249}
4250
4251fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4256 !delta.is_empty() && delta.len() < target_len
4257}
4258
4259fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4260 loop {
4261 let mut byte = (value as u8) & 0x7f;
4262 value >>= 7;
4263 if value != 0 {
4264 byte |= 0x80;
4265 }
4266 out.push(byte);
4267 if value == 0 {
4268 break;
4269 }
4270 }
4271}
4272
4273fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4274 while size != 0 {
4275 let chunk = size.min(0x10000);
4276 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4277 let mut command = 0x80u8;
4278 let mut payload = [0u8; 7];
4279 let mut payload_len = 0usize;
4280 for idx in 0..4 {
4281 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4282 if byte != 0 {
4283 command |= 1 << idx;
4284 payload[payload_len] = byte;
4285 payload_len += 1;
4286 }
4287 }
4288 for idx in 0..3 {
4289 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4290 if byte != 0 {
4291 command |= 0x10 << idx;
4292 payload[payload_len] = byte;
4293 payload_len += 1;
4294 }
4295 }
4296 out.push(command);
4297 out.extend_from_slice(&payload[..payload_len]);
4298 offset += chunk;
4299 size -= chunk;
4300 }
4301}
4302
4303fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
4304 while !bytes.is_empty() {
4305 let chunk_len = bytes.len().min(0x7f);
4306 out.push(chunk_len as u8);
4307 out.extend_from_slice(&bytes[..chunk_len]);
4308 bytes = &bytes[chunk_len..];
4309 }
4310}
4311
4312fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
4313 let mut value = 0u64;
4314 let mut shift = 0u32;
4315 loop {
4316 let Some(byte) = delta.get(*cursor).copied() else {
4317 return Err(GitError::InvalidObject("truncated delta size".into()));
4318 };
4319 *cursor += 1;
4320 value = value
4321 .checked_add(
4322 u64::from(byte & 0x7f)
4323 .checked_shl(shift)
4324 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
4325 )
4326 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4327 if byte & 0x80 == 0 {
4328 return Ok(value);
4329 }
4330 shift = shift
4331 .checked_add(7)
4332 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4333 }
4334}
4335
4336fn read_delta_copy_value(
4337 delta: &[u8],
4338 cursor: &mut usize,
4339 command: u8,
4340 masks: &[u8],
4341) -> Result<u64> {
4342 let mut value = 0u64;
4343 for (shift, mask) in masks.iter().enumerate() {
4344 if command & mask != 0 {
4345 let Some(byte) = delta.get(*cursor).copied() else {
4346 return Err(GitError::InvalidObject(
4347 "truncated delta copy command".into(),
4348 ));
4349 };
4350 *cursor += 1;
4351 value |= u64::from(byte) << (shift * 8);
4352 }
4353 }
4354 Ok(value)
4355}
4356
4357fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8], compression_level: u32) -> Result<()> {
4358 let mut compressor = Compress::new(Compression::new(compression_level.min(9)), true);
4359 out.reserve(zlib_compress_bound(body.len()));
4360 let status = compressor
4361 .compress_vec(body, out, FlushCompress::Finish)
4362 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
4363 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
4364 return Err(GitError::InvalidObject(
4365 "zlib compression did not finish pack entry".into(),
4366 ));
4367 }
4368 Ok(())
4369}
4370
4371fn zlib_compress_bound(len: usize) -> usize {
4372 len.saturating_add(len >> 12)
4373 .saturating_add(len >> 14)
4374 .saturating_add(len >> 25)
4375 .saturating_add(13)
4376}
4377
4378fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
4379 let type_code = match object_type {
4380 ObjectType::Commit => 1,
4381 ObjectType::Tree => 2,
4382 ObjectType::Blob => 3,
4383 ObjectType::Tag => 4,
4384 };
4385 write_pack_entry_header_kind(out, type_code, size);
4386}
4387
4388fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4389 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4390 size >>= 4;
4391 if size != 0 {
4392 byte |= 0x80;
4393 }
4394 out.push(byte);
4395 while size != 0 {
4396 let mut byte = (size as u8) & 0x7f;
4397 size >>= 7;
4398 if size != 0 {
4399 byte |= 0x80;
4400 }
4401 out.push(byte);
4402 }
4403}
4404
4405fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4406 if relative == 0 {
4407 return Err(GitError::InvalidFormat(
4408 "ofs-delta relative offset cannot be zero".into(),
4409 ));
4410 }
4411 let mut value = relative;
4412 let mut bytes = vec![(value & 0x7f) as u8];
4413 value >>= 7;
4414 while value != 0 {
4415 value -= 1;
4416 bytes.push(((value & 0x7f) as u8) | 0x80);
4417 value >>= 7;
4418 }
4419 bytes.reverse();
4420 out.extend_from_slice(&bytes);
4421 Ok(())
4422}
4423
4424fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4425 let Some(byte) = bytes.get(*offset).copied() else {
4426 return Err(GitError::InvalidFormat(
4427 "truncated pack entry header".into(),
4428 ));
4429 };
4430 *offset += 1;
4431 Ok(byte)
4432}
4433
4434fn u16_be(bytes: &[u8]) -> u16 {
4435 u16::from_be_bytes([bytes[0], bytes[1]])
4436}
4437
4438fn u32_be(bytes: &[u8]) -> u32 {
4439 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4440}
4441
4442fn u64_be(bytes: &[u8]) -> u64 {
4443 u64::from_be_bytes([
4444 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4445 ])
4446}
4447
4448fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4449 let mut fanout = [0u32; 256];
4450 let mut previous = 0u32;
4451 for slot in &mut fanout {
4452 *slot = u32_be(&bytes[*offset..*offset + 4]);
4453 if *slot < previous {
4454 return Err(GitError::InvalidFormat(
4455 "pack index fanout is not monotonic".into(),
4456 ));
4457 }
4458 previous = *slot;
4459 *offset += 4;
4460 }
4461 Ok(fanout)
4462}
4463
4464fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4465 let expected_min = if oid_bytes[0] == 0 {
4466 0
4467 } else {
4468 fanout[usize::from(oid_bytes[0] - 1)]
4469 };
4470 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4471 return Err(GitError::InvalidFormat(
4472 "pack index object id is outside its fanout bucket".into(),
4473 ));
4474 }
4475 Ok(())
4476}
4477
4478fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4479 if raw_offset & 0x8000_0000 == 0 {
4480 return Ok(u64::from(raw_offset));
4481 }
4482 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4483 let large_start = large_idx
4484 .checked_mul(8)
4485 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4486 let large_end = large_start
4487 .checked_add(8)
4488 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4489 if large_end > large_offset_table.len() {
4490 return Err(GitError::InvalidFormat(
4491 "pack index large offset points past table".into(),
4492 ));
4493 }
4494 Ok(u64_be(&large_offset_table[large_start..large_end]))
4495}
4496
4497fn checked_range(
4498 start: usize,
4499 count: usize,
4500 width: usize,
4501 total: usize,
4502) -> Result<std::ops::Range<usize>> {
4503 let len = count
4504 .checked_mul(width)
4505 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4506 let end = start
4507 .checked_add(len)
4508 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4509 if end > total {
4510 return Err(GitError::InvalidFormat("truncated pack index table".into()));
4511 }
4512 Ok(start..end)
4513}
4514
4515fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4516 let mut seen = vec![false; positions.len()];
4517 for position in positions {
4518 let idx = *position as usize;
4519 if idx >= positions.len() {
4520 return Err(GitError::InvalidFormat(
4521 "reverse index position points past object table".into(),
4522 ));
4523 }
4524 if seen[idx] {
4525 return Err(GitError::InvalidFormat(
4526 "reverse index position is duplicated".into(),
4527 ));
4528 }
4529 seen[idx] = true;
4530 }
4531 Ok(())
4532}
4533
4534fn parse_midx_pack_names(
4535 bytes: &[u8],
4536 chunks: &[MultiPackIndexChunk],
4537 pack_count: usize,
4538 version: u8,
4539) -> Result<Vec<String>> {
4540 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4541 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4542 let mut names = Vec::with_capacity(pack_count);
4543 let mut offset = 0usize;
4544 while names.len() < pack_count {
4545 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4546 return Err(GitError::InvalidFormat(
4547 "fatal: multi-pack-index pack-name chunk is too short".into(),
4548 ));
4549 };
4550 let name_bytes = &data[offset..offset + relative_end];
4551 if name_bytes.is_empty() {
4552 return Err(GitError::InvalidFormat(
4553 "multi-pack-index PNAM entry is empty".into(),
4554 ));
4555 }
4556 let name = std::str::from_utf8(name_bytes)
4557 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4558 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4559 return Err(GitError::InvalidFormat(
4560 "multi-pack-index PNAM entry contains a path separator".into(),
4561 ));
4562 }
4563 names.push(name.to_string());
4564 offset += relative_end + 1;
4565 }
4566 let padding = &data[offset..];
4567 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4568 return Err(GitError::InvalidFormat(
4569 "multi-pack-index PNAM padding is invalid".into(),
4570 ));
4571 }
4572 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4573 return Err(GitError::InvalidFormat(
4574 "multi-pack-index v1 PNAM entries are not sorted".into(),
4575 ));
4576 }
4577 Ok(names)
4578}
4579
4580fn parse_midx_oid_fanout(
4581 bytes: &[u8],
4582 chunks: &[MultiPackIndexChunk],
4583) -> Result<([u32; 256], usize)> {
4584 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4585 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4586 if data.len() != 256 * 4 {
4587 return Err(GitError::InvalidFormat(
4588 "error: multi-pack-index OID fanout is of the wrong size\nfatal: multi-pack-index required OID fanout chunk missing or corrupted".into(),
4589 ));
4590 }
4591 let mut fanout = [0u32; 256];
4592 let mut previous = 0u32;
4593 for (idx, slot) in fanout.iter_mut().enumerate() {
4594 let start = idx * 4;
4595 *slot = u32_be(&data[start..start + 4]);
4596 if *slot < previous {
4597 return Err(GitError::InvalidFormat(
4598 format!(
4599 "error: oid fanout out of order: fanout[{}] = {:x} > {:x} = fanout[{idx}]\nfatal: multi-pack-index required OID fanout chunk missing or corrupted",
4600 idx - 1,
4601 previous,
4602 *slot
4603 ),
4604 ));
4605 }
4606 previous = *slot;
4607 }
4608 Ok((fanout, fanout[255] as usize))
4609}
4610
4611fn parse_midx_object_ids(
4612 bytes: &[u8],
4613 chunks: &[MultiPackIndexChunk],
4614 format: ObjectFormat,
4615 object_count: usize,
4616 fanout: &[u32; 256],
4617) -> Result<Vec<ObjectId>> {
4618 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4619 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4620 let expected_len = object_count
4621 .checked_mul(format.raw_len())
4622 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4623 if data.len() != expected_len {
4624 return Err(GitError::InvalidFormat(
4625 "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
4626 ));
4627 }
4628
4629 let mut ids = Vec::with_capacity(object_count);
4630 let mut counts = [0u32; 256];
4631 let mut previous_oid: Option<ObjectId> = None;
4632 for idx in 0..object_count {
4633 let start = idx * format.raw_len();
4634 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4635 if let Some(previous) = &previous_oid
4636 && previous.as_bytes() >= oid.as_bytes()
4637 {
4638 return Err(GitError::InvalidFormat(
4639 "multi-pack-index OIDL object ids are not strictly sorted".into(),
4640 ));
4641 }
4642 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4643 .checked_add(1)
4644 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4645 previous_oid = Some(oid);
4646 ids.push(oid);
4647 }
4648
4649 let mut running = 0u32;
4650 for (idx, count) in counts.iter().enumerate() {
4651 running = running
4652 .checked_add(*count)
4653 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4654 if fanout[idx] != running {
4655 return Err(GitError::InvalidFormat(
4656 "multi-pack-index OIDF fanout does not match OIDL".into(),
4657 ));
4658 }
4659 }
4660 Ok(ids)
4661}
4662
4663fn parse_midx_object_offsets(
4664 bytes: &[u8],
4665 chunks: &[MultiPackIndexChunk],
4666 object_ids: Vec<ObjectId>,
4667 pack_count: u32,
4668) -> Result<Vec<MultiPackIndexEntry>> {
4669 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4670 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4671 let expected_len = object_ids
4672 .len()
4673 .checked_mul(8)
4674 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4675 if data.len() != expected_len {
4676 return Err(GitError::InvalidFormat(
4677 "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
4678 ));
4679 }
4680 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4681 if let Some(large_offsets) = large_offsets
4682 && large_offsets.len() % 8 != 0
4683 {
4684 return Err(GitError::InvalidFormat(
4685 "multi-pack-index LOFF chunk has invalid length".into(),
4686 ));
4687 }
4688
4689 let mut entries = Vec::with_capacity(object_ids.len());
4690 for (idx, oid) in object_ids.into_iter().enumerate() {
4691 let start = idx * 8;
4692 let pack_int_id = u32_be(&data[start..start + 4]);
4693 if pack_int_id >= pack_count {
4694 return Err(GitError::InvalidFormat(
4695 "multi-pack-index object points past pack table".into(),
4696 ));
4697 }
4698 let raw_offset = u32_be(&data[start + 4..start + 8]);
4699 let offset = if raw_offset & 0x8000_0000 == 0 {
4700 u64::from(raw_offset)
4701 } else {
4702 let Some(large_offsets) = large_offsets else {
4703 return Err(GitError::InvalidFormat(
4704 "multi-pack-index large offset missing LOFF chunk".into(),
4705 ));
4706 };
4707 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4708 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4709 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4710 })?;
4711 let large_end = large_start.checked_add(8).ok_or_else(|| {
4712 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4713 })?;
4714 if large_end > large_offsets.len() {
4715 return Err(GitError::InvalidFormat(
4716 "fatal: multi-pack-index large offset out of bounds".into(),
4717 ));
4718 }
4719 u64_be(&large_offsets[large_start..large_end])
4720 };
4721 entries.push(MultiPackIndexEntry {
4722 oid,
4723 pack_int_id,
4724 offset,
4725 force_large_offset: raw_offset & 0x8000_0000 != 0,
4726 });
4727 }
4728 Ok(entries)
4729}
4730
4731fn parse_midx_reverse_index(
4732 bytes: &[u8],
4733 chunks: &[MultiPackIndexChunk],
4734 object_count: usize,
4735) -> Result<Option<Vec<u32>>> {
4736 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4737 return Ok(None);
4738 };
4739 let expected_len = object_count
4740 .checked_mul(4)
4741 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4742 if data.len() != expected_len {
4743 return Err(GitError::InvalidFormat(
4744 "multi-pack-index reverse-index chunk is the wrong size".into(),
4745 ));
4746 }
4747 let mut positions = Vec::with_capacity(object_count);
4748 for idx in 0..object_count {
4749 let start = idx * 4;
4750 positions.push(u32_be(&data[start..start + 4]));
4751 }
4752 validate_position_permutation(&positions)?;
4753 Ok(Some(positions))
4754}
4755
4756fn parse_midx_bitmapped_packs(
4757 bytes: &[u8],
4758 chunks: &[MultiPackIndexChunk],
4759 pack_count: usize,
4760 object_count: usize,
4761) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4762 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4763 return Ok(None);
4764 };
4765 let expected_len = pack_count
4766 .checked_mul(8)
4767 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4768 if data.len() != expected_len {
4769 return Err(GitError::InvalidFormat(
4770 "multi-pack-index BTMP chunk has invalid length".into(),
4771 ));
4772 }
4773 let mut entries = Vec::with_capacity(pack_count);
4774 for idx in 0..pack_count {
4775 let start = idx * 8;
4776 let bitmap_pos = u32_be(&data[start..start + 4]);
4777 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4778 let bitmap_end = u64::from(bitmap_pos)
4779 .checked_add(u64::from(bitmap_nr))
4780 .ok_or_else(|| {
4781 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4782 })?;
4783 if bitmap_end > object_count as u64 {
4784 return Err(GitError::InvalidFormat(
4785 "multi-pack-index BTMP range points past object table".into(),
4786 ));
4787 }
4788 entries.push(MultiPackBitmapPack {
4789 bitmap_pos,
4790 bitmap_nr,
4791 });
4792 }
4793 Ok(Some(entries))
4794}
4795
4796fn midx_chunk_data<'a>(
4797 bytes: &'a [u8],
4798 chunks: &[MultiPackIndexChunk],
4799 id: [u8; 4],
4800 required: bool,
4801) -> Result<Option<&'a [u8]>> {
4802 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4803 if required {
4804 return Err(GitError::InvalidFormat(format!(
4805 "multi-pack-index missing {} chunk",
4806 std::str::from_utf8(&id).unwrap_or("required")
4807 )));
4808 }
4809 return Ok(None);
4810 };
4811 let start = usize::try_from(chunk.offset)
4812 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4813 let len = usize::try_from(chunk.len)
4814 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4815 let end = start
4816 .checked_add(len)
4817 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4818 let Some(data) = bytes.get(start..end) else {
4819 return Err(GitError::InvalidFormat(
4820 "multi-pack-index chunk extends past file".into(),
4821 ));
4822 };
4823 Ok(Some(data))
4824}
4825
4826fn hash_function_id(format: ObjectFormat) -> u32 {
4827 match format {
4828 ObjectFormat::Sha1 => 1,
4829 ObjectFormat::Sha256 => 2,
4830 }
4831}
4832
4833const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4836
4837const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4840
4841const EWAH_ALL_ONES: u64 = u64::MAX;
4843
4844impl EwahBitmap {
4845 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4859 let required_words = bit_size.div_ceil(64) as usize;
4860 if required_words > words.len() {
4861 return Err(GitError::InvalidFormat(format!(
4862 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4863 words.len()
4864 )));
4865 }
4866 let significant = &words[..required_words];
4869 let mut builder = EwahBuilder::new(bit_size);
4870 for &word in significant {
4871 if word == 0 {
4872 builder.add_empty_words(false, 1);
4873 } else if word == EWAH_ALL_ONES {
4874 builder.add_empty_words(true, 1);
4875 } else {
4876 builder.add_literal(word);
4877 }
4878 }
4879 builder.finish()
4880 }
4881
4882 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4888 let word_count = bit_size.div_ceil(64) as usize;
4889 let mut words = vec![0u64; word_count];
4890 for &position in positions {
4891 if position >= bit_size {
4892 return Err(GitError::InvalidFormat(format!(
4893 "EWAH bit position {position} out of range for bit_size {bit_size}"
4894 )));
4895 }
4896 let word_index = (position / 64) as usize;
4897 let bit_index = position % 64;
4898 words[word_index] |= 1u64 << bit_index;
4899 }
4900 Self::from_words(bit_size, &words)
4901 }
4902
4903 pub fn empty() -> Self {
4906 Self {
4907 bit_size: 0,
4908 words: Vec::new(),
4909 rlw_position: 0,
4910 }
4911 }
4912
4913 pub fn to_words(&self) -> Result<Vec<u64>> {
4919 let mut out = Vec::new();
4920 let mut word_idx = 0usize;
4921 while word_idx < self.words.len() {
4922 let rlw = self.words[word_idx];
4923 let run_bit = rlw & 1;
4924 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4925 let literal_words = (rlw >> 33) as usize;
4926 word_idx += 1;
4927 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4928 for _ in 0..run_words {
4929 out.push(fill);
4930 }
4931 let literal_end = word_idx
4932 .checked_add(literal_words)
4933 .filter(|end| *end <= self.words.len())
4934 .ok_or_else(|| {
4935 GitError::InvalidFormat("EWAH literal words extend past word table".into())
4936 })?;
4937 out.extend_from_slice(&self.words[word_idx..literal_end]);
4938 word_idx = literal_end;
4939 }
4940 let required_words = (self.bit_size as usize).div_ceil(64);
4941 if out.len() < required_words {
4942 out.resize(required_words, 0);
4943 }
4944 out.truncate(required_words);
4945 Ok(out)
4946 }
4947
4948 pub fn to_positions(&self) -> Result<Vec<u32>> {
4950 let words = self.to_words()?;
4951 let mut positions = Vec::new();
4952 for (word_index, word) in words.iter().enumerate() {
4953 let mut remaining = *word;
4954 while remaining != 0 {
4955 let bit = remaining.trailing_zeros();
4956 let position = (word_index as u64) * 64 + u64::from(bit);
4957 if position < u64::from(self.bit_size) {
4958 positions.push(position as u32);
4960 }
4961 remaining &= remaining - 1;
4962 }
4963 }
4964 Ok(positions)
4965 }
4966
4967 pub fn to_bytes(&self) -> Vec<u8> {
4971 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4972 self.append_bytes(&mut out);
4973 out
4974 }
4975
4976 fn append_bytes(&self, out: &mut Vec<u8>) {
4977 out.extend_from_slice(&self.bit_size.to_be_bytes());
4978 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4979 for word in &self.words {
4980 out.extend_from_slice(&word.to_be_bytes());
4981 }
4982 out.extend_from_slice(&self.rlw_position.to_be_bytes());
4983 }
4984}
4985
4986struct EwahBuilder {
4994 bit_size: u32,
4995 words: Vec<u64>,
4996 rlw_position: usize,
4997}
4998
4999impl EwahBuilder {
5000 fn new(bit_size: u32) -> Self {
5001 Self {
5003 bit_size,
5004 words: vec![0u64],
5005 rlw_position: 0,
5006 }
5007 }
5008
5009 fn rlw(&self) -> u64 {
5010 self.words[self.rlw_position]
5011 }
5012
5013 fn set_rlw(&mut self, value: u64) {
5014 self.words[self.rlw_position] = value;
5015 }
5016
5017 fn rlw_running_len(&self) -> u64 {
5018 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
5019 }
5020
5021 fn rlw_running_bit(&self) -> bool {
5022 self.rlw() & 1 == 1
5023 }
5024
5025 fn rlw_literal_len(&self) -> u64 {
5026 self.rlw() >> 33
5027 }
5028
5029 fn set_running_bit(&mut self, bit: bool) {
5030 let mut value = self.rlw();
5031 value &= !1;
5032 value |= u64::from(bit);
5033 self.set_rlw(value);
5034 }
5035
5036 fn set_running_len(&mut self, len: u64) {
5037 let mut value = self.rlw();
5038 value &= !(EWAH_MAX_RUNNING_LEN << 1);
5039 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
5040 self.set_rlw(value);
5041 }
5042
5043 fn set_literal_len(&mut self, len: u64) {
5044 let mut value = self.rlw();
5045 value &= (1u64 << 33) - 1;
5046 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
5047 self.set_rlw(value);
5048 }
5049
5050 fn push_rlw(&mut self) {
5052 self.rlw_position = self.words.len();
5053 self.words.push(0);
5054 }
5055
5056 fn add_empty_words(&mut self, value: bool, mut number: u64) {
5064 while number > 0 {
5065 let can_extend = self.rlw_literal_len() == 0
5069 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
5070 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
5071 if !can_extend {
5072 self.push_rlw();
5073 }
5074 if self.rlw_running_len() == 0 {
5075 self.set_running_bit(value);
5076 }
5077 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
5078 let take = available.min(number);
5079 self.set_running_len(self.rlw_running_len() + take);
5080 number -= take;
5081 }
5082 }
5083
5084 fn add_literal(&mut self, word: u64) {
5087 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
5088 self.push_rlw();
5089 }
5090 let literal_len = self.rlw_literal_len();
5091 self.set_literal_len(literal_len + 1);
5092 self.words.push(word);
5093 }
5094
5095 fn finish(self) -> Result<EwahBitmap> {
5096 let rlw_position = u32::try_from(self.rlw_position)
5097 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
5098 if self.words.len() > u32::MAX as usize {
5099 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
5100 }
5101 Ok(EwahBitmap {
5102 bit_size: self.bit_size,
5103 words: self.words,
5104 rlw_position,
5105 })
5106 }
5107}
5108
5109#[derive(Debug, Clone)]
5122pub struct PackBitmapWriter {
5123 format: ObjectFormat,
5124 pack_checksum: ObjectId,
5125 object_count: u32,
5126 commit_positions: Vec<u32>,
5127 tree_positions: Vec<u32>,
5128 blob_positions: Vec<u32>,
5129 tag_positions: Vec<u32>,
5130 name_hash_cache: Option<Vec<u32>>,
5131 selected: Vec<SelectedCommit>,
5132}
5133
5134#[derive(Debug, Clone)]
5135struct SelectedCommit {
5136 commit_index_position: u32,
5140 flags: u8,
5141 reachable: Vec<u32>,
5142}
5143
5144impl PackBitmapWriter {
5145 pub const FLAG_NONE: u8 = 0;
5149
5150 pub fn new(
5157 format: ObjectFormat,
5158 pack_checksum: ObjectId,
5159 object_types: &[ObjectType],
5160 ) -> Result<Self> {
5161 if object_types.len() > u32::MAX as usize {
5162 return Err(GitError::InvalidFormat(
5163 "too many objects for a pack bitmap".into(),
5164 ));
5165 }
5166 if pack_checksum.format() != format {
5167 return Err(GitError::InvalidObjectId(
5168 "pack checksum format does not match bitmap format".into(),
5169 ));
5170 }
5171 let object_count = object_types.len() as u32;
5172 let mut commit_positions = Vec::new();
5173 let mut tree_positions = Vec::new();
5174 let mut blob_positions = Vec::new();
5175 let mut tag_positions = Vec::new();
5176 for (index, object_type) in object_types.iter().enumerate() {
5177 let position = index as u32;
5178 match object_type {
5179 ObjectType::Commit => commit_positions.push(position),
5180 ObjectType::Tree => tree_positions.push(position),
5181 ObjectType::Blob => blob_positions.push(position),
5182 ObjectType::Tag => tag_positions.push(position),
5183 }
5184 }
5185 Ok(Self {
5186 format,
5187 pack_checksum,
5188 object_count,
5189 commit_positions,
5190 tree_positions,
5191 blob_positions,
5192 tag_positions,
5193 name_hash_cache: None,
5194 selected: Vec::new(),
5195 })
5196 }
5197
5198 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5204 if cache.len() != self.object_count as usize {
5205 return Err(GitError::InvalidFormat(format!(
5206 "name hash cache has {} entries but pack has {} objects",
5207 cache.len(),
5208 self.object_count
5209 )));
5210 }
5211 self.name_hash_cache = Some(cache);
5212 Ok(self)
5213 }
5214
5215 pub fn add_commit(
5227 &mut self,
5228 commit_position: u32,
5229 commit_index_position: u32,
5230 reachable: &[u32],
5231 ) -> Result<()> {
5232 if commit_position >= self.object_count {
5233 return Err(GitError::InvalidFormat(format!(
5234 "commit position {commit_position} out of range for {} objects",
5235 self.object_count
5236 )));
5237 }
5238 if commit_index_position >= self.object_count {
5239 return Err(GitError::InvalidFormat(format!(
5240 "commit index position {commit_index_position} out of range for {} objects",
5241 self.object_count
5242 )));
5243 }
5244 if !self.commit_positions.contains(&commit_position) {
5245 return Err(GitError::InvalidFormat(format!(
5246 "bitmap commit position {commit_position} is not a commit object"
5247 )));
5248 }
5249 for &position in reachable {
5250 if position >= self.object_count {
5251 return Err(GitError::InvalidFormat(format!(
5252 "reachable position {position} out of range for {} objects",
5253 self.object_count
5254 )));
5255 }
5256 }
5257 let mut reachable = reachable.to_vec();
5258 reachable.push(commit_position);
5259 self.selected.push(SelectedCommit {
5260 commit_index_position,
5261 flags: Self::FLAG_NONE,
5262 reachable,
5263 });
5264 Ok(())
5265 }
5266
5267 pub fn build(&self) -> Result<PackBitmapIndex> {
5274 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5275 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5276 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5277 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5278
5279 let mut entries = Vec::with_capacity(self.selected.len());
5280 for selected in &self.selected {
5281 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5282 entries.push(PackBitmapEntry {
5283 object_position: selected.commit_index_position,
5284 xor_offset: 0,
5285 flags: selected.flags,
5286 bitmap,
5287 });
5288 }
5289
5290 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5291 if self.name_hash_cache.is_some() {
5292 options |= PackBitmapIndex::OPTION_HASH_CACHE;
5293 }
5294
5295 let placeholder_checksum = ObjectId::null(self.format);
5300 Ok(PackBitmapIndex {
5301 version: 1,
5302 format: self.format,
5303 options,
5304 pack_checksum: self.pack_checksum.clone(),
5305 index_checksum: placeholder_checksum,
5306 type_bitmaps: PackBitmapTypeBitmaps {
5307 commits,
5308 trees,
5309 blobs,
5310 tags,
5311 },
5312 entries,
5313 name_hash_cache: self.name_hash_cache.clone(),
5314 })
5315 }
5316
5317 pub fn write(&self) -> Result<Vec<u8>> {
5320 self.build()?.write()
5321 }
5322}
5323
5324impl PackBitmapIndex {
5325 pub fn write(&self) -> Result<Vec<u8>> {
5339 if self.version != 1 {
5340 return Err(GitError::Unsupported(format!(
5341 "bitmap index version {}",
5342 self.version
5343 )));
5344 }
5345 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
5346 if self.options & !known_options != 0 {
5347 return Err(GitError::Unsupported(format!(
5348 "bitmap index options {:#06x}",
5349 self.options & !known_options
5350 )));
5351 }
5352 if self.pack_checksum.format() != self.format {
5353 return Err(GitError::InvalidObjectId(
5354 "bitmap pack checksum format does not match index format".into(),
5355 ));
5356 }
5357 if self.entries.len() > u32::MAX as usize {
5358 return Err(GitError::InvalidFormat(
5359 "too many bitmap index entries".into(),
5360 ));
5361 }
5362 let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
5363 match (&self.name_hash_cache, want_cache) {
5364 (Some(_), false) => {
5365 return Err(GitError::InvalidFormat(
5366 "name hash cache present without OPTION_HASH_CACHE".into(),
5367 ));
5368 }
5369 (None, true) => {
5370 return Err(GitError::InvalidFormat(
5371 "OPTION_HASH_CACHE set without a name hash cache".into(),
5372 ));
5373 }
5374 _ => {}
5375 }
5376
5377 let mut out = Vec::new();
5378 out.extend_from_slice(b"BITM");
5379 out.extend_from_slice(&self.version.to_be_bytes());
5380 out.extend_from_slice(&self.options.to_be_bytes());
5381 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
5382 out.extend_from_slice(self.pack_checksum.as_bytes());
5383
5384 self.type_bitmaps.commits.append_bytes(&mut out);
5385 self.type_bitmaps.trees.append_bytes(&mut out);
5386 self.type_bitmaps.blobs.append_bytes(&mut out);
5387 self.type_bitmaps.tags.append_bytes(&mut out);
5388
5389 for (idx, entry) in self.entries.iter().enumerate() {
5390 if entry.xor_offset as usize > idx {
5391 return Err(GitError::InvalidFormat(
5392 "bitmap index entry has invalid XOR offset".into(),
5393 ));
5394 }
5395 out.extend_from_slice(&entry.object_position.to_be_bytes());
5396 out.push(entry.xor_offset);
5397 out.push(entry.flags);
5398 entry.bitmap.append_bytes(&mut out);
5399 }
5400
5401 if let Some(cache) = &self.name_hash_cache {
5402 for value in cache {
5403 out.extend_from_slice(&value.to_be_bytes());
5404 }
5405 }
5406
5407 let checksum = sley_core::digest_bytes(self.format, &out)?;
5408 out.extend_from_slice(checksum.as_bytes());
5409 Ok(out)
5410 }
5411}
5412
5413pub fn write_bitmap(
5422 format: ObjectFormat,
5423 pack_checksum: ObjectId,
5424 object_types: &[ObjectType],
5425 commits: &[(u32, u32, Vec<u32>)],
5426 name_hash_cache: Option<Vec<u32>>,
5427) -> Result<Vec<u8>> {
5428 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5429 if let Some(cache) = name_hash_cache {
5430 writer = writer.with_name_hash_cache(cache)?;
5431 }
5432 for (commit_position, commit_index_position, reachable) in commits {
5433 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5434 }
5435 writer.write()
5436}
5437
5438#[cfg(test)]
5439mod tests {
5440 use super::*;
5441 use flate2::Compression;
5442 use flate2::read::ZlibDecoder;
5443 use flate2::write::ZlibEncoder;
5444 use std::fs;
5445 use std::io::Read;
5446 use std::io::Write;
5447 use std::path::{Path, PathBuf};
5448 use std::process::Command;
5449 use std::time::{SystemTime, UNIX_EPOCH};
5450
5451 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5452 PackWriteOptions::new()
5453 .with_prefer_ofs_delta(prefer_ofs_delta)
5454 .with_reorder(false)
5455 }
5456
5457 #[test]
5458 fn parses_single_blob_pack() {
5459 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5460 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5461 assert_eq!(parsed.version, 2);
5462 assert_eq!(parsed.entries.len(), 1);
5463 let object = &parsed.entries[0].object;
5464 assert_eq!(object.object_type, ObjectType::Blob);
5465 assert_eq!(object.body, b"hello\n");
5466 assert_eq!(
5467 parsed.entries[0].entry.oid.to_hex(),
5468 "ce013625030ba8dba906f756967f9e9ca394464a"
5469 );
5470 }
5471
5472 #[test]
5473 fn parses_single_blob_pack_sha256() {
5474 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5475 let parsed =
5476 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5477 assert_eq!(parsed.version, 2);
5478 assert_eq!(parsed.entries.len(), 1);
5479 let object = &parsed.entries[0].object;
5480 assert_eq!(object.object_type, ObjectType::Blob);
5481 assert_eq!(object.body, b"hello\n");
5482 assert_eq!(
5483 parsed.entries[0].entry.oid,
5484 object
5485 .object_id(ObjectFormat::Sha256)
5486 .expect("test operation should succeed")
5487 );
5488 }
5489
5490 #[test]
5491 fn parses_bundle_pack_payload_with_bundle_format() {
5492 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5493 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5494 .expect("test operation should succeed");
5495 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5496 .into_bytes()
5497 .into_iter()
5498 .chain(pack)
5499 .collect::<Vec<_>>();
5500 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5501 .expect("test operation should succeed");
5502
5503 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5504 assert_eq!(parsed.entries.len(), 1);
5505 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5506 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5507 }
5508
5509 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5515 let mut pack = Vec::new();
5516 pack.extend_from_slice(b"PACK");
5517 pack.extend_from_slice(&2u32.to_be_bytes());
5518 pack.extend_from_slice(&1u32.to_be_bytes());
5519 write_pack_entry_header_kind(&mut pack, 3, declared_size);
5521 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5522 encoder
5523 .write_all(real_body)
5524 .expect("test operation should succeed");
5525 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5526 let checksum =
5527 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5528 pack.extend_from_slice(checksum.as_bytes());
5529 pack
5530 }
5531
5532 #[test]
5545 fn rejects_decompression_bomb_header_without_oom() {
5546 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5547 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5548 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5549 let result = handle.join();
5550 assert!(
5552 result.is_ok(),
5553 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5554 );
5555 let parse_result = result.expect("parse thread should not panic on a bomb header");
5557 assert!(
5558 parse_result.is_err(),
5559 "bomb header (declared={declared}) should be rejected as invalid"
5560 );
5561 }
5562 }
5563
5564 fn lying_result_size_delta_pack(
5571 format: ObjectFormat,
5572 declared_result_size: u64,
5573 delta_kind: DeltaKind,
5574 ) -> Vec<u8> {
5575 let base = b"hello";
5576 let result = b"hello world"; let mut delta = Vec::new();
5580 write_delta_varint(&mut delta, base.len() as u64);
5581 write_delta_varint(&mut delta, declared_result_size);
5582 let suffix = &result[base.len()..];
5584 delta.push(0x90); delta.push(base.len() as u8);
5586 delta.push(suffix.len() as u8);
5587 delta.extend_from_slice(suffix);
5588
5589 let mut pack = Vec::new();
5590 pack.extend_from_slice(b"PACK");
5591 pack.extend_from_slice(&2u32.to_be_bytes());
5592 pack.extend_from_slice(&2u32.to_be_bytes());
5593
5594 let base_offset = pack.len();
5595 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5596 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5597 encoder
5598 .write_all(base)
5599 .expect("test operation should succeed");
5600 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5601
5602 let delta_offset = pack.len();
5603 write_pack_entry_header_kind(
5604 &mut pack,
5605 match delta_kind {
5606 DeltaKind::Offset => 6,
5607 DeltaKind::Ref => 7,
5608 },
5609 delta.len() as u64,
5610 );
5611 match delta_kind {
5612 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5613 DeltaKind::Ref => {
5614 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5615 .expect("test operation should succeed");
5616 pack.extend_from_slice(base_oid.as_bytes());
5617 }
5618 }
5619 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5620 encoder
5621 .write_all(&delta)
5622 .expect("test operation should succeed");
5623 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5624
5625 let checksum =
5626 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5627 pack.extend_from_slice(checksum.as_bytes());
5628 pack
5629 }
5630
5631 #[test]
5641 fn rejects_delta_result_size_bomb_without_oom() {
5642 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5643 for &declared in bombs {
5644 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5645 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5646 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5647 let join_result = handle.join();
5648 assert!(
5649 join_result.is_ok(),
5650 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5651 instead of erroring cleanly"
5652 );
5653 let parse_result =
5654 join_result.expect("parse thread should not panic on a delta bomb");
5655 assert!(
5656 parse_result.is_err(),
5657 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5658 as invalid (result.len() != declared)"
5659 );
5660 }
5661 }
5662 }
5663
5664 #[test]
5668 fn applies_legitimate_delta_after_result_size_bound() {
5669 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5670 let base = b"hello";
5671 let result = b"hello world";
5672 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5673 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5674 assert_eq!(parsed.entries.len(), 2);
5675 assert_eq!(parsed.entries[0].object.body, base);
5676 assert_eq!(parsed.entries[1].object.body, result);
5677 }
5678 }
5679
5680 #[test]
5681 fn bounded_inflate_reserve_caps_attacker_declared_size() {
5682 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5684 assert_eq!(
5686 bounded_inflate_reserve(usize::MAX, usize::MAX),
5687 MAX_INFLATE_RESERVE
5688 );
5689 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5693 assert_eq!(bounded_inflate_reserve(0, 0), 64);
5695 }
5696
5697 #[test]
5698 fn rejects_bundle_pack_payload_with_wrong_object_format() {
5699 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5700 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5701 .expect("test operation should succeed");
5702 let bundle_bytes =
5703 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5704 .into_bytes()
5705 .into_iter()
5706 .chain(pack)
5707 .collect::<Vec<_>>();
5708 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5709 .expect("test operation should succeed");
5710
5711 assert!(PackFile::parse_bundle(&bundle).is_err());
5712 }
5713
5714 fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5715 let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5716 let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5717 let owned_view =
5718 PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5719 .expect("test operation should succeed");
5720
5721 assert_eq!(view.version, owned.version);
5722 assert_eq!(view.count, owned.entries.len());
5723 assert_eq!(view.count(), owned.entries.len());
5724 assert_eq!(view.fanout(), &owned.fanout);
5725 assert_eq!(view.pack_checksum, owned.pack_checksum);
5726 assert_eq!(view.index_checksum, owned.index_checksum);
5727 assert_eq!(owned_view.version, owned.version);
5728 assert_eq!(owned_view.count(), owned.entries.len());
5729 assert_eq!(owned_view.fanout(), &owned.fanout);
5730 assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5731 assert_eq!(owned_view.index_checksum, owned.index_checksum);
5732 for entry in &owned.entries {
5733 let owned_found = owned
5734 .find(&entry.oid)
5735 .expect("test operation should succeed");
5736 let expected = Some(PackIndexLookup {
5737 crc32: owned_found.crc32,
5738 offset: owned_found.offset,
5739 });
5740 assert_eq!(view.find(&entry.oid), expected);
5741 assert_eq!(owned_view.find(&entry.oid), expected);
5742 }
5743 }
5744
5745 #[test]
5746 fn writes_pack_and_index_that_round_trip() {
5747 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5748 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5749 .expect("test operation should succeed");
5750 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5751 let index =
5752 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5753 let oid = object
5754 .object_id(ObjectFormat::Sha1)
5755 .expect("test operation should succeed");
5756 assert_eq!(pack.entries[0].object, object);
5757 assert_eq!(index.pack_checksum, pack.checksum);
5758 assert_eq!(
5759 index
5760 .find(&oid)
5761 .expect("test operation should succeed")
5762 .offset,
5763 12
5764 );
5765 }
5766
5767 #[test]
5768 fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5769 let objects = (0..8)
5770 .map(|idx| {
5771 EncodedObject::new(
5772 ObjectType::Blob,
5773 format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5774 )
5775 })
5776 .collect::<Vec<_>>();
5777 let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5778 .expect("test operation should succeed");
5779
5780 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5781
5782 let view =
5783 PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5784 let missing = sley_core::object_id_for_bytes(
5785 ObjectFormat::Sha1,
5786 "blob",
5787 b"not present in borrowed index\n",
5788 )
5789 .expect("test operation should succeed");
5790 assert_eq!(view.find(&missing), None);
5791 }
5792
5793 #[test]
5794 fn writes_sha256_pack_and_index_that_round_trip() {
5795 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5796 let written =
5797 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5798 .expect("test operation should succeed");
5799 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5800 .expect("test operation should succeed");
5801 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5802 .expect("test operation should succeed");
5803 let oid = object
5804 .object_id(ObjectFormat::Sha256)
5805 .expect("test operation should succeed");
5806 assert_eq!(pack.entries[0].object, object);
5807 assert_eq!(index.pack_checksum, pack.checksum);
5808 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5809 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5810 assert_eq!(
5811 index
5812 .find(&oid)
5813 .expect("test operation should succeed")
5814 .offset,
5815 12
5816 );
5817 }
5818
5819 #[test]
5820 fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5821 let objects = (0..4)
5822 .map(|idx| {
5823 EncodedObject::new(
5824 ObjectType::Blob,
5825 format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5826 )
5827 })
5828 .collect::<Vec<_>>();
5829 let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5830 .expect("test operation should succeed");
5831
5832 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5833 }
5834
5835 #[test]
5836 fn indexes_existing_sha256_pack_bytes() {
5837 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5838 let written =
5839 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5840 .expect("test operation should succeed");
5841
5842 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5843 .expect("test operation should succeed");
5844 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5845 .expect("test operation should succeed");
5846
5847 assert_eq!(indexed.pack_checksum, written.checksum);
5848 assert_eq!(indexed.entries, written.entries);
5849 assert_eq!(index.pack_checksum, written.checksum);
5850 assert_eq!(index.entries, written.entries);
5851 }
5852
5853 #[test]
5854 fn indexes_existing_delta_pack_bytes() {
5855 let (base, changed) = similar_blob_objects();
5856 let options = delta_pack_options(true);
5857 let written = PackFile::write_packed_with_options(
5858 &[base, changed.clone()],
5859 ObjectFormat::Sha1,
5860 &options,
5861 )
5862 .expect("test operation should succeed");
5863
5864 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5865 .expect("test operation should succeed");
5866 let index =
5867 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5868 let changed_oid = changed
5869 .object_id(ObjectFormat::Sha1)
5870 .expect("test operation should succeed");
5871
5872 assert_eq!(indexed.pack_checksum, written.checksum);
5873 assert_eq!(indexed.entries, written.entries);
5874 assert_eq!(
5875 index
5876 .find(&changed_oid)
5877 .expect("test operation should succeed")
5878 .offset,
5879 written.entries[1].offset
5880 );
5881 assert_eq!(
5882 index
5883 .find(&changed_oid)
5884 .expect("test operation should succeed")
5885 .crc32,
5886 written.entries[1].crc32
5887 );
5888 }
5889
5890 #[test]
5891 fn writes_ref_delta_pack_and_index_that_round_trip() {
5892 let (base, changed) = similar_blob_objects();
5893 let options = delta_pack_options(false);
5894 let written = PackFile::write_packed_with_options(
5895 &[base.clone(), changed.clone()],
5896 ObjectFormat::Sha1,
5897 &options,
5898 )
5899 .expect("test operation should succeed");
5900 let mut second_offset = written.entries[1].offset as usize;
5901 let header = parse_entry_header(&written.pack, &mut second_offset)
5902 .expect("test operation should succeed");
5903 assert_eq!(header.kind, PackObjectKind::RefDelta);
5904
5905 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5906 let index =
5907 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5908 let oid = changed
5909 .object_id(ObjectFormat::Sha1)
5910 .expect("test operation should succeed");
5911 assert_eq!(pack.entries[0].object, base);
5912 assert_eq!(pack.entries[1].object, changed);
5913 assert_eq!(index.pack_checksum, pack.checksum);
5914 assert_eq!(
5915 index
5916 .find(&oid)
5917 .expect("test operation should succeed")
5918 .offset,
5919 written.entries[1].offset
5920 );
5921 }
5922
5923 #[test]
5924 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5925 let (base, changed) = similar_blob_objects();
5926 let options = delta_pack_options(true);
5927 let written = PackFile::write_packed_with_options(
5928 &[base, changed.clone()],
5929 ObjectFormat::Sha1,
5930 &options,
5931 )
5932 .expect("test operation should succeed");
5933 let mut second = written.entries[1].offset as usize;
5935 assert_eq!(
5936 parse_entry_header(&written.pack, &mut second)
5937 .expect("test operation should succeed")
5938 .kind,
5939 PackObjectKind::OfsDelta
5940 );
5941 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5943 for po in &parsed.entries {
5944 let got =
5945 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5946 Ok(None)
5947 })
5948 .expect("test operation should succeed");
5949 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5950 }
5951 }
5952
5953 #[derive(Default)]
5956 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5957
5958 impl HeaderTypeCache for MapHeaderTypeCache {
5959 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5960 self.0.get(&pack_offset).copied()
5961 }
5962 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5963 self.0.insert(pack_offset, header);
5964 }
5965 }
5966
5967 #[test]
5968 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5969 let (base, changed) = similar_blob_objects();
5970 let options = delta_pack_options(true);
5971 let written =
5972 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5973 .expect("test operation should succeed");
5974 let mut second = written.entries[1].offset as usize;
5976 assert_eq!(
5977 parse_entry_header(&written.pack, &mut second)
5978 .expect("test operation should succeed")
5979 .kind,
5980 PackObjectKind::OfsDelta
5981 );
5982
5983 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5984 let mut cache = MapHeaderTypeCache::default();
5985 for po in &parsed.entries {
5986 let uncached =
5987 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5988 Ok(None)
5989 })
5990 .expect("test operation should succeed");
5991 assert_eq!(
5993 uncached,
5994 (po.object.object_type, po.object.body.len() as u64),
5995 "uncached header at offset {}",
5996 po.entry.offset
5997 );
5998 let cold = read_object_header_at_with_cache(
6000 &written.pack,
6001 po.entry.offset,
6002 ObjectFormat::Sha1,
6003 |_| Ok(None),
6004 &mut cache,
6005 )
6006 .expect("test operation should succeed");
6007 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
6008 }
6009 for po in &parsed.entries {
6012 let warm = read_object_header_at_with_cache(
6013 &written.pack,
6014 po.entry.offset,
6015 ObjectFormat::Sha1,
6016 |_| panic!("warm cache must not re-walk the chain"),
6017 &mut cache,
6018 )
6019 .expect("test operation should succeed");
6020 assert_eq!(
6021 warm,
6022 (po.object.object_type, po.object.body.len() as u64),
6023 "warm cache at offset {}",
6024 po.entry.offset
6025 );
6026 }
6027 }
6028
6029 #[test]
6030 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
6031 let (base, changed) = similar_blob_objects();
6032 let options = delta_pack_options(false);
6033 let written = PackFile::write_packed_with_options(
6034 &[base, changed.clone()],
6035 ObjectFormat::Sha1,
6036 &options,
6037 )
6038 .expect("test operation should succeed");
6039 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6040 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
6041 .entries
6042 .iter()
6043 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
6044 .collect();
6045 for po in &parsed.entries {
6046 let got =
6047 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
6048 Ok(by_oid.get(oid).cloned())
6049 })
6050 .expect("test operation should succeed");
6051 assert_eq!(*got, po.object);
6052 }
6053 }
6054
6055 #[derive(Default)]
6059 struct CountingDeltaCache {
6060 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
6061 hits: std::cell::Cell<usize>,
6062 inserts: std::cell::Cell<usize>,
6063 }
6064
6065 impl PackDeltaCache for CountingDeltaCache {
6066 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
6067 let hit = self.map.borrow().get(&offset).cloned();
6068 if hit.is_some() {
6069 self.hits.set(self.hits.get() + 1);
6070 }
6071 hit
6072 }
6073 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
6074 self.inserts.set(self.inserts.get() + 1);
6075 self.map.borrow_mut().insert(offset, object);
6076 }
6077 }
6078
6079 #[test]
6080 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
6081 let mut objects = Vec::new();
6084 for idx in 0..8u32 {
6085 let mut body = vec![b'x'; 4096];
6086 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
6087 objects.push(EncodedObject::new(ObjectType::Blob, body));
6088 }
6089 let options = delta_pack_options(true);
6090 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6091 .expect("test operation should succeed");
6092 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6093
6094 let cache = CountingDeltaCache::default();
6095 for _ in 0..2 {
6098 for po in &parsed.entries {
6099 let got = read_object_at_with_cache_arc(
6100 &written.pack,
6101 po.entry.offset,
6102 ObjectFormat::Sha1,
6103 |_| Ok(None),
6104 &cache,
6105 )
6106 .expect("test operation should succeed");
6107 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6108 }
6109 }
6110 assert!(cache.hits.get() > 0, "cache never served a warm object");
6113 }
6114
6115 #[test]
6116 fn writes_ofs_delta_pack_and_index_that_round_trip() {
6117 let (base, changed) = similar_blob_objects();
6118 let options = delta_pack_options(true);
6119 let written = PackFile::write_packed_with_options(
6120 &[base.clone(), changed.clone()],
6121 ObjectFormat::Sha1,
6122 &options,
6123 )
6124 .expect("test operation should succeed");
6125 let mut second_offset = written.entries[1].offset as usize;
6126 let header = parse_entry_header(&written.pack, &mut second_offset)
6127 .expect("test operation should succeed");
6128 assert_eq!(header.kind, PackObjectKind::OfsDelta);
6129
6130 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6131 let index =
6132 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6133 let oid = changed
6134 .object_id(ObjectFormat::Sha1)
6135 .expect("test operation should succeed");
6136 assert_eq!(pack.entries[0].object, base);
6137 assert_eq!(pack.entries[1].object, changed);
6138 assert_eq!(index.pack_checksum, pack.checksum);
6139 assert_eq!(
6140 index
6141 .find(&oid)
6142 .expect("test operation should succeed")
6143 .offset,
6144 written.entries[1].offset
6145 );
6146 }
6147
6148 #[test]
6149 fn resolves_ofs_delta_pack_entry() {
6150 let base = b"hello";
6151 let result = b"hello world";
6152 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6153 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6154 assert_eq!(parsed.entries.len(), 2);
6155 assert_eq!(parsed.entries[0].object.body, base);
6156 assert_eq!(parsed.entries[1].object.body, result);
6157 assert_eq!(
6158 parsed.entries[1].entry.oid,
6159 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6160 .expect("test operation should succeed")
6161 );
6162 }
6163
6164 #[test]
6165 fn resolves_ref_delta_pack_entry() {
6166 let base = b"hello";
6167 let result = b"hello world";
6168 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6169 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6170 assert_eq!(parsed.entries.len(), 2);
6171 assert_eq!(parsed.entries[0].object.body, base);
6172 assert_eq!(parsed.entries[1].object.body, result);
6173 assert_eq!(
6174 parsed.entries[1].entry.oid,
6175 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6176 .expect("test operation should succeed")
6177 );
6178 }
6179
6180 #[test]
6181 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6182 let base = b"hello";
6183 let result = b"hello world";
6184 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6185 assert!(PackFile::parse_sha1(&pack).is_err());
6186
6187 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6188 .expect("test operation should succeed");
6189 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6190 if oid == &base_oid {
6191 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6192 } else {
6193 Ok(None)
6194 }
6195 })
6196 .expect("test operation should succeed");
6197 assert_eq!(parsed.entries.len(), 1);
6198 assert_eq!(parsed.entries[0].object.body, result);
6199 assert_eq!(
6200 parsed.entries[0].entry.oid,
6201 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6202 .expect("test operation should succeed")
6203 );
6204 }
6205
6206 #[test]
6207 fn rejects_bad_pack_checksum() {
6208 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6209 let last = pack.len() - 1;
6210 pack[last] ^= 1;
6211 assert!(PackFile::parse_sha1(&pack).is_err());
6212 }
6213
6214 #[test]
6215 fn raw_pack_index_rejects_bad_pack_checksum() {
6216 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6217 let last = pack.len() - 1;
6218 pack[last] ^= 1;
6219 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6220 }
6221
6222 #[test]
6223 fn pack_index_writer_rejects_duplicate_object_ids() {
6224 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6225 .expect("test operation should succeed");
6226 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6227 .expect("test operation should succeed");
6228 let entries = vec![
6229 PackIndexEntry {
6230 oid,
6231 crc32: 1,
6232 offset: 12,
6233 },
6234 PackIndexEntry {
6235 oid,
6236 crc32: 2,
6237 offset: 24,
6238 },
6239 ];
6240 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6241 }
6242
6243 #[test]
6244 fn parses_single_entry_pack_index() {
6245 let oid = ObjectId::from_hex(
6246 ObjectFormat::Sha1,
6247 "ce013625030ba8dba906f756967f9e9ca394464a",
6248 )
6249 .expect("test operation should succeed");
6250 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6251 .expect("test operation should succeed");
6252 let index = single_entry_index(
6253 ObjectFormat::Sha1,
6254 oid,
6255 0x1234_5678,
6256 12,
6257 pack_checksum.clone(),
6258 );
6259 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6260 assert_eq!(parsed.version, 2);
6261 assert_eq!(parsed.pack_checksum, pack_checksum);
6262 assert_eq!(parsed.entries.len(), 1);
6263 assert_eq!(
6264 parsed
6265 .find(&oid)
6266 .expect("test operation should succeed")
6267 .offset,
6268 12
6269 );
6270 assert_eq!(
6271 parsed
6272 .find(&oid)
6273 .expect("test operation should succeed")
6274 .crc32,
6275 0x1234_5678
6276 );
6277 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6278 }
6279
6280 #[test]
6281 fn parses_single_entry_pack_index_v1() {
6282 let oid = ObjectId::from_hex(
6283 ObjectFormat::Sha1,
6284 "ce013625030ba8dba906f756967f9e9ca394464a",
6285 )
6286 .expect("test operation should succeed");
6287 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6288 .expect("test operation should succeed");
6289 let index =
6290 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6291 let parsed =
6292 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6293 assert_eq!(parsed.version, 1);
6294 assert_eq!(parsed.pack_checksum, pack_checksum);
6295 assert_eq!(parsed.entries.len(), 1);
6296 assert_eq!(
6297 parsed
6298 .find(&oid)
6299 .expect("test operation should succeed")
6300 .offset,
6301 0x1234_5678
6302 );
6303 assert_eq!(
6304 parsed
6305 .find(&oid)
6306 .expect("test operation should succeed")
6307 .crc32,
6308 0
6309 );
6310 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6311 }
6312
6313 #[test]
6314 fn rejects_bad_pack_index_v1_checksum() {
6315 let oid = ObjectId::from_hex(
6316 ObjectFormat::Sha1,
6317 "ce013625030ba8dba906f756967f9e9ca394464a",
6318 )
6319 .expect("test operation should succeed");
6320 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6321 .expect("test operation should succeed");
6322 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
6323 let last = index.len() - 1;
6324 index[last] ^= 1;
6325 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
6326 }
6327
6328 #[test]
6329 fn pack_index_view_reads_v2_large_offsets() {
6330 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
6331 .expect("test operation should succeed");
6332 let second =
6333 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
6334 .expect("test operation should succeed");
6335 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6336 .expect("test operation should succeed");
6337 let entries = vec![
6338 PackIndexEntry {
6339 oid: first,
6340 crc32: 0x1111_2222,
6341 offset: 0x8000_0000,
6342 },
6343 PackIndexEntry {
6344 oid: second,
6345 crc32: 0x3333_4444,
6346 offset: 0x1_0000_0042,
6347 },
6348 ];
6349 let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
6350 .expect("test operation should succeed");
6351
6352 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6353 let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
6354 .expect("test operation should succeed");
6355 for entry in entries {
6356 assert_eq!(
6357 view.find(&entry.oid),
6358 Some(PackIndexLookup {
6359 crc32: entry.crc32,
6360 offset: entry.offset,
6361 })
6362 );
6363 }
6364 }
6365
6366 #[test]
6367 fn pack_index_view_default_parse_checks_index_checksum() {
6368 let oid = ObjectId::from_hex(
6369 ObjectFormat::Sha1,
6370 "ce013625030ba8dba906f756967f9e9ca394464a",
6371 )
6372 .expect("test operation should succeed");
6373 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6374 .expect("test operation should succeed");
6375 let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
6376 let last = index.len() - 1;
6377 index[last] ^= 1;
6378
6379 assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
6380 let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
6381 .expect("test operation should succeed");
6382 let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
6383 Arc::from(index.clone().into_boxed_slice()),
6384 ObjectFormat::Sha1,
6385 )
6386 .expect("test operation should succeed");
6387 assert_eq!(
6388 view.find(&oid),
6389 Some(PackIndexLookup {
6390 crc32: 0x1234_5678,
6391 offset: 12,
6392 })
6393 );
6394 assert_eq!(
6395 trusted_view.find(&oid),
6396 Some(PackIndexLookup {
6397 crc32: 0x1234_5678,
6398 offset: 12,
6399 })
6400 );
6401 }
6402
6403 #[test]
6404 fn parses_pack_reverse_index() {
6405 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6406 .expect("test operation should succeed");
6407 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6408 .expect("test operation should succeed");
6409 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6410 .expect("test operation should succeed");
6411 assert_eq!(parsed.version, 1);
6412 assert_eq!(parsed.format, ObjectFormat::Sha1);
6413 assert_eq!(parsed.positions, vec![2, 0, 1]);
6414 assert_eq!(parsed.pack_checksum, pack_checksum);
6415 assert_eq!(
6416 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6417 .expect("test operation should succeed"),
6418 reverse_index
6419 );
6420 }
6421
6422 #[test]
6423 fn rejects_bad_pack_reverse_index_checksum() {
6424 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6425 .expect("test operation should succeed");
6426 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6427 .expect("test operation should succeed");
6428 let last = reverse_index.len() - 1;
6429 reverse_index[last] ^= 1;
6430 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6431 }
6432
6433 #[test]
6434 fn rejects_bad_pack_reverse_index_positions() {
6435 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6436 .expect("test operation should succeed");
6437 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6438 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6439 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6440 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6441 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6442 .expect("test operation should succeed");
6443 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6444 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6445 }
6446
6447 #[test]
6448 fn parses_pack_mtimes() {
6449 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6450 .expect("test operation should succeed");
6451 let mtimes = PackMtimes::write(
6452 ObjectFormat::Sha1,
6453 &[1, 1_700_000_000, u32::MAX],
6454 &pack_checksum,
6455 )
6456 .expect("test operation should succeed");
6457 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6458 .expect("test operation should succeed");
6459 assert_eq!(parsed.version, 1);
6460 assert_eq!(parsed.format, ObjectFormat::Sha1);
6461 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6462 assert_eq!(parsed.pack_checksum, pack_checksum);
6463 assert_eq!(
6464 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6465 .expect("test operation should succeed"),
6466 mtimes
6467 );
6468 }
6469
6470 #[test]
6471 fn rejects_bad_pack_mtimes_checksum() {
6472 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6473 .expect("test operation should succeed");
6474 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6475 .expect("test operation should succeed");
6476 let last = mtimes.len() - 1;
6477 mtimes[last] ^= 1;
6478 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6479 }
6480
6481 #[test]
6482 fn rejects_bad_pack_mtimes_shape() {
6483 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6484 .expect("test operation should succeed");
6485 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6486 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6487
6488 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6489 wrong_hash[11] = 2;
6490 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6491 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6492 .expect("test operation should succeed");
6493 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6494 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6495 }
6496
6497 #[test]
6498 fn parses_multi_pack_index_header_and_chunk_lookup() {
6499 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6500 .expect("test operation should succeed");
6501 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6502 .expect("test operation should succeed");
6503 let chunks = midx_chunks_with_pack_names(
6504 ObjectFormat::Sha1,
6505 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6506 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6507 );
6508 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6509 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6510 .expect("test operation should succeed");
6511 assert_eq!(parsed.version, 2);
6512 assert_eq!(parsed.format, ObjectFormat::Sha1);
6513 assert_eq!(parsed.pack_count, 2);
6514 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6515 assert_eq!(parsed.object_count, 2);
6516 assert_eq!(parsed.objects.len(), 2);
6517 assert_eq!(
6518 parsed
6519 .find(&first)
6520 .expect("test operation should succeed")
6521 .pack_int_id,
6522 0
6523 );
6524 assert_eq!(
6525 parsed
6526 .find(&first)
6527 .expect("test operation should succeed")
6528 .offset,
6529 12
6530 );
6531 assert_eq!(
6532 parsed
6533 .find(&second)
6534 .expect("test operation should succeed")
6535 .pack_int_id,
6536 1
6537 );
6538 assert_eq!(
6539 parsed
6540 .find(&second)
6541 .expect("test operation should succeed")
6542 .offset,
6543 0x1_0000_0000
6544 );
6545 assert_eq!(parsed.reverse_index, None);
6546 assert_eq!(parsed.bitmapped_packs, None);
6547 assert_eq!(parsed.chunks.len(), 5);
6548 assert_eq!(parsed.chunks[0].id, *b"PNAM");
6549 assert_eq!(parsed.chunks[0].offset, 84);
6550 assert_eq!(parsed.chunks[0].len, 24);
6551 assert_eq!(parsed.chunks[1].id, *b"OIDF");
6552 assert_eq!(parsed.chunks[1].offset, 108);
6553 assert_eq!(parsed.chunks[1].len, 1024);
6554 }
6555
6556 #[test]
6557 fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6558 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6559 .expect("test operation should succeed");
6560 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6561 .expect("test operation should succeed");
6562 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6563 .expect("test operation should succeed");
6564 let chunks = midx_chunks_with_pack_names(
6565 ObjectFormat::Sha1,
6566 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6567 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6568 );
6569 let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6570 let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6571 .expect("test operation should succeed");
6572
6573 assert!(lookup.contains(&first));
6574 assert!(lookup.contains(&second));
6575 assert!(!lookup.contains(&missing));
6576
6577 let first_entry = lookup
6578 .find(&first)
6579 .expect("test operation should succeed")
6580 .expect("object should be present");
6581 assert_eq!(
6582 lookup.pack_name(first_entry.pack_int_id),
6583 Some("pack-a.idx")
6584 );
6585 assert_eq!(first_entry.offset, 12);
6586
6587 let second_entry = lookup
6588 .find(&second)
6589 .expect("test operation should succeed")
6590 .expect("object should be present");
6591 assert_eq!(
6592 lookup.pack_name(second_entry.pack_int_id),
6593 Some("pack-b.idx")
6594 );
6595 assert_eq!(second_entry.offset, 0x1_0000_0000);
6596 assert!(
6597 lookup
6598 .find(&missing)
6599 .expect("test operation should succeed")
6600 .is_none()
6601 );
6602 }
6603
6604 #[test]
6605 fn rejects_bad_multi_pack_index_checksum() {
6606 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6607 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6608 let last = midx.len() - 1;
6609 midx[last] ^= 1;
6610 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6611 }
6612
6613 #[test]
6614 fn rejects_bad_multi_pack_index_shape() {
6615 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6616 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6617 wrong_hash[5] = 2;
6618 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6619 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6620 .expect("test operation should succeed");
6621 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6622 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6623
6624 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6625 missing_terminator[12] = b'B';
6626 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6627 let checksum =
6628 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6629 .expect("test operation should succeed");
6630 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6631 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6632
6633 let mut bad_offset = multi_pack_index(
6634 ObjectFormat::Sha1,
6635 2,
6636 0,
6637 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6638 );
6639 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6640 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6641 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6642 .expect("test operation should succeed");
6643 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6644 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6645 }
6646
6647 #[test]
6648 fn rejects_bad_multi_pack_index_pack_names() {
6649 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6650 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6651
6652 let too_few = multi_pack_index(
6653 ObjectFormat::Sha1,
6654 2,
6655 2,
6656 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6657 );
6658 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6659
6660 let bad_padding = multi_pack_index(
6661 ObjectFormat::Sha1,
6662 2,
6663 1,
6664 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6665 );
6666 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6667
6668 let unsorted_v1 = multi_pack_index(
6669 ObjectFormat::Sha1,
6670 1,
6671 2,
6672 &midx_chunks_with_pack_names(
6673 ObjectFormat::Sha1,
6674 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6675 &[],
6676 ),
6677 );
6678 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6679
6680 let unsorted_v2 = multi_pack_index(
6681 ObjectFormat::Sha1,
6682 2,
6683 2,
6684 &midx_chunks_with_pack_names(
6685 ObjectFormat::Sha1,
6686 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6687 &[],
6688 ),
6689 );
6690 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6691 .expect("test operation should succeed");
6692 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6693 }
6694
6695 #[test]
6696 fn rejects_bad_multi_pack_index_object_tables() {
6697 let oid_a = ObjectId::from_hex(
6698 ObjectFormat::Sha1,
6699 "1111111111111111111111111111111111111111",
6700 )
6701 .expect("test operation should succeed");
6702 let oid_b = ObjectId::from_hex(
6703 ObjectFormat::Sha1,
6704 "2222222222222222222222222222222222222222",
6705 )
6706 .expect("test operation should succeed");
6707
6708 let missing_oidf = multi_pack_index(
6709 ObjectFormat::Sha1,
6710 2,
6711 1,
6712 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6713 );
6714 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6715
6716 let bad_fanout = vec![
6717 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6718 (*b"OIDF", vec![0; 256 * 4]),
6719 (*b"OIDL", oid_a.as_bytes().to_vec()),
6720 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6721 ];
6722 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6723 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6724
6725 let mut unsorted = Vec::new();
6726 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6727 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6728 let mut oid_lookup = Vec::new();
6729 oid_lookup.extend_from_slice(oid_b.as_bytes());
6730 oid_lookup.extend_from_slice(oid_a.as_bytes());
6731 unsorted.push((*b"OIDL", oid_lookup));
6732 unsorted.push((
6733 *b"OOFF",
6734 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6735 ));
6736 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6737 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6738
6739 let bad_pack = multi_pack_index(
6740 ObjectFormat::Sha1,
6741 2,
6742 1,
6743 &midx_chunks_with_pack_names(
6744 ObjectFormat::Sha1,
6745 b"pack-a.idx\0\0".to_vec(),
6746 &[(oid_a.clone(), 1, 12)],
6747 ),
6748 );
6749 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6750
6751 let mut large_offsets = Vec::new();
6752 let missing_loff = vec![
6753 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6754 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6755 (*b"OIDL", oid_a.as_bytes().to_vec()),
6756 (
6757 *b"OOFF",
6758 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6759 ),
6760 ];
6761 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6762 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6763
6764 let mut bad_loff =
6765 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6766 bad_loff.push((*b"LOFF", vec![0]));
6767 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6768 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6769 }
6770
6771 #[test]
6772 fn parses_multi_pack_index_bitmap_chunks() {
6773 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6774 .expect("test operation should succeed");
6775 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6776 .expect("test operation should succeed");
6777 let mut chunks = midx_chunks_with_pack_names(
6778 ObjectFormat::Sha1,
6779 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6780 &[(first, 0, 12), (second, 1, 24)],
6781 );
6782 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6783 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6784 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6785
6786 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6787 .expect("test operation should succeed");
6788 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6789 assert_eq!(
6790 parsed.bitmapped_packs,
6791 Some(vec![
6792 MultiPackBitmapPack {
6793 bitmap_pos: 0,
6794 bitmap_nr: 1,
6795 },
6796 MultiPackBitmapPack {
6797 bitmap_pos: 1,
6798 bitmap_nr: 1,
6799 },
6800 ])
6801 );
6802 }
6803
6804 #[test]
6805 fn writes_multi_pack_index_that_round_trips() {
6806 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6807 .expect("test operation should succeed");
6808 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6809 .expect("test operation should succeed");
6810 let bytes = MultiPackIndex::write(
6811 ObjectFormat::Sha1,
6812 2,
6813 &["pack-b.idx".into(), "pack-a.idx".into()],
6814 &[
6815 MultiPackIndexEntry {
6816 oid: second.clone(),
6817 pack_int_id: 0,
6818 offset: 0x1_0000_0000,
6819 force_large_offset: false,
6820 },
6821 MultiPackIndexEntry {
6822 oid: first.clone(),
6823 pack_int_id: 1,
6824 offset: 12,
6825 force_large_offset: false,
6826 },
6827 ],
6828 )
6829 .expect("test operation should succeed");
6830
6831 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6832 .expect("test operation should succeed");
6833 assert_eq!(parsed.version, 2);
6834 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6835 assert_eq!(parsed.object_count, 2);
6836 assert_eq!(
6837 parsed
6838 .find(&first)
6839 .expect("test operation should succeed")
6840 .pack_int_id,
6841 1
6842 );
6843 assert_eq!(
6844 parsed
6845 .find(&first)
6846 .expect("test operation should succeed")
6847 .offset,
6848 12
6849 );
6850 assert_eq!(
6851 parsed
6852 .find(&second)
6853 .expect("test operation should succeed")
6854 .pack_int_id,
6855 0
6856 );
6857 assert_eq!(
6858 parsed
6859 .find(&second)
6860 .expect("test operation should succeed")
6861 .offset,
6862 0x1_0000_0000
6863 );
6864 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6865 }
6866
6867 #[test]
6868 fn write_multi_pack_index_rejects_invalid_inputs() {
6869 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6870 .expect("test operation should succeed");
6871 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6872 assert!(
6873 MultiPackIndex::write(
6874 ObjectFormat::Sha1,
6875 1,
6876 &["pack-b.idx".into(), "pack-a.idx".into()],
6877 &[],
6878 )
6879 .is_err()
6880 );
6881 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6882 assert!(
6883 MultiPackIndex::write(
6884 ObjectFormat::Sha1,
6885 2,
6886 &["pack-a.idx".into()],
6887 &[MultiPackIndexEntry {
6888 oid,
6889 pack_int_id: 1,
6890 offset: 12,
6891 force_large_offset: false,
6892 }],
6893 )
6894 .is_err()
6895 );
6896 assert!(
6897 MultiPackIndex::write(
6898 ObjectFormat::Sha1,
6899 2,
6900 &["pack-a.idx".into()],
6901 &[
6902 MultiPackIndexEntry {
6903 oid,
6904 pack_int_id: 0,
6905 offset: 12,
6906 force_large_offset: false,
6907 },
6908 MultiPackIndexEntry {
6909 oid,
6910 pack_int_id: 0,
6911 offset: 24,
6912 force_large_offset: false,
6913 },
6914 ],
6915 )
6916 .is_err()
6917 );
6918 }
6919
6920 #[test]
6921 fn rejects_bad_multi_pack_index_bitmap_chunks() {
6922 let oid_a = ObjectId::from_hex(
6923 ObjectFormat::Sha1,
6924 "1111111111111111111111111111111111111111",
6925 )
6926 .expect("test operation should succeed");
6927 let oid_b = ObjectId::from_hex(
6928 ObjectFormat::Sha1,
6929 "2222222222222222222222222222222222222222",
6930 )
6931 .expect("test operation should succeed");
6932
6933 let mut duplicate_ridx = midx_chunks_with_pack_names(
6934 ObjectFormat::Sha1,
6935 b"pack-a.idx\0\0".to_vec(),
6936 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6937 );
6938 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6939 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6940 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6941
6942 let mut short_btmp = midx_chunks_with_pack_names(
6943 ObjectFormat::Sha1,
6944 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6945 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6946 );
6947 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6948 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6949 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6950
6951 let mut out_of_range_btmp = midx_chunks_with_pack_names(
6952 ObjectFormat::Sha1,
6953 b"pack-a.idx\0\0".to_vec(),
6954 &[(oid_a, 0, 12), (oid_b, 0, 24)],
6955 );
6956 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6957 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6958 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6959 }
6960
6961 #[test]
6962 fn parses_pack_bitmap_index_with_hash_cache() {
6963 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6964 .expect("test operation should succeed");
6965 let bitmap = pack_bitmap_index(
6966 ObjectFormat::Sha1,
6967 3,
6968 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6969 &pack_checksum,
6970 &[(2, 0, 1, &[0b101])],
6971 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6972 );
6973
6974 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6975 .expect("test operation should succeed");
6976 assert_eq!(parsed.version, 1);
6977 assert_eq!(parsed.format, ObjectFormat::Sha1);
6978 assert_eq!(
6979 parsed.options,
6980 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6981 );
6982 assert_eq!(parsed.pack_checksum, pack_checksum);
6983 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6984 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6985 assert_eq!(parsed.entries.len(), 1);
6986 let entry = parsed
6987 .entry_for_index_position(2)
6988 .expect("test operation should succeed");
6989 assert_eq!(entry.xor_offset, 0);
6990 assert_eq!(entry.flags, 1);
6991 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6992 assert_eq!(
6993 parsed.name_hash_cache,
6994 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6995 );
6996 }
6997
6998 #[test]
6999 fn parses_pack_bitmap_index_sha256() {
7000 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7001 .expect("test operation should succeed");
7002 let bitmap = pack_bitmap_index(
7003 ObjectFormat::Sha256,
7004 2,
7005 PackBitmapIndex::OPTION_FULL_DAG,
7006 &pack_checksum,
7007 &[(0, 0, 0, &[0b11])],
7008 None,
7009 );
7010
7011 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
7012 .expect("test operation should succeed");
7013 assert_eq!(parsed.version, 1);
7014 assert_eq!(parsed.format, ObjectFormat::Sha256);
7015 assert_eq!(parsed.pack_checksum, pack_checksum);
7016 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7017 assert_eq!(parsed.entries[0].object_position, 0);
7018 assert_eq!(parsed.name_hash_cache, None);
7019 }
7020
7021 #[test]
7022 fn parses_upstream_git_written_pack_bitmap_index() {
7023 let root = unique_temp_dir("git-pack-bitmap-upstream");
7024 fs::create_dir_all(&root).expect("test operation should succeed");
7025 {
7026 run_git_success(&root, &["init", "-q", "-b", "main"]);
7027 run_git_success(
7028 &root,
7029 &[
7030 "-c",
7031 "user.name=Example User",
7032 "-c",
7033 "user.email=example@example.invalid",
7034 "commit",
7035 "--allow-empty",
7036 "-q",
7037 "-m",
7038 "one",
7039 ],
7040 );
7041 run_git_success(
7042 &root,
7043 &[
7044 "-c",
7045 "user.name=Example User",
7046 "-c",
7047 "user.email=example@example.invalid",
7048 "commit",
7049 "--allow-empty",
7050 "-q",
7051 "-m",
7052 "two",
7053 ],
7054 );
7055 run_git_success(&root, &["repack", "-adb"]);
7056 let pack_dir = root.join(".git").join("objects").join("pack");
7057 let idx_path = single_path_with_extension(&pack_dir, "idx");
7058 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
7059 let index = PackIndex::parse(
7060 &fs::read(idx_path).expect("test operation should succeed"),
7061 ObjectFormat::Sha1,
7062 )
7063 .expect("test operation should succeed");
7064 let bitmap = PackBitmapIndex::parse(
7065 &fs::read(bitmap_path).expect("test operation should succeed"),
7066 ObjectFormat::Sha1,
7067 index.entries.len(),
7068 )
7069 .expect("test operation should succeed");
7070 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
7071 assert!(!bitmap.entries.is_empty());
7072 };
7073 let _ = fs::remove_dir_all(&root);
7074 }
7075
7076 #[test]
7077 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
7078 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7079 .expect("test operation should succeed");
7080 let bitmap = pack_bitmap_index(
7081 ObjectFormat::Sha1,
7082 1,
7083 PackBitmapIndex::OPTION_FULL_DAG,
7084 &pack_checksum,
7085 &[(0, 0, 0, &[1])],
7086 None,
7087 );
7088
7089 let mut bad_signature = bitmap.clone();
7090 bad_signature[0] = b'X';
7091 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
7092
7093 let mut bad_version = bitmap.clone();
7094 bad_version[5] = 2;
7095 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
7096 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
7097
7098 let mut bad_option = bitmap.clone();
7099 bad_option[7] = 0x20;
7100 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
7101 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
7102
7103 let mut bad_checksum = bitmap;
7104 let last = bad_checksum.len() - 1;
7105 bad_checksum[last] ^= 1;
7106 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
7107 }
7108
7109 #[test]
7110 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
7111 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7112 .expect("test operation should succeed");
7113 let bitmap = pack_bitmap_index(
7114 ObjectFormat::Sha1,
7115 2,
7116 PackBitmapIndex::OPTION_FULL_DAG,
7117 &pack_checksum,
7118 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7119 None,
7120 );
7121
7122 let mut truncated = bitmap.clone();
7123 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7124 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7125 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7126
7127 let mut out_of_range_position = pack_bitmap_index(
7128 ObjectFormat::Sha1,
7129 2,
7130 PackBitmapIndex::OPTION_FULL_DAG,
7131 &pack_checksum,
7132 &[(2, 0, 0, &[0b01])],
7133 None,
7134 );
7135 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7136 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7137 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7138
7139 let invalid_xor = pack_bitmap_index(
7140 ObjectFormat::Sha1,
7141 2,
7142 PackBitmapIndex::OPTION_FULL_DAG,
7143 &pack_checksum,
7144 &[(0, 1, 0, &[0b01])],
7145 None,
7146 );
7147 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7148 }
7149
7150 #[test]
7151 fn parses_single_entry_pack_index_sha256() {
7152 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7153 .expect("test operation should succeed");
7154 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7155 .expect("test operation should succeed");
7156 let index = single_entry_index(
7157 ObjectFormat::Sha256,
7158 oid,
7159 0x1234_5678,
7160 12,
7161 pack_checksum.clone(),
7162 );
7163 let parsed =
7164 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7165 assert_eq!(parsed.version, 2);
7166 assert_eq!(parsed.pack_checksum, pack_checksum);
7167 assert_eq!(parsed.entries.len(), 1);
7168 assert_eq!(
7169 parsed
7170 .find(&oid)
7171 .expect("test operation should succeed")
7172 .offset,
7173 12
7174 );
7175 assert_eq!(
7176 parsed
7177 .find(&oid)
7178 .expect("test operation should succeed")
7179 .crc32,
7180 0x1234_5678
7181 );
7182 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7183 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7184 }
7185
7186 #[test]
7187 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7188 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7189 }
7190
7191 #[test]
7192 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7193 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7194 }
7195
7196 #[test]
7197 fn write_packed_rejects_duplicate_objects() {
7198 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7199 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7200 }
7201
7202 #[test]
7203 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7204 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7205 let sha1 = object
7206 .object_id(ObjectFormat::Sha1)
7207 .expect("test operation should succeed");
7208 let sha256 = object
7209 .object_id(ObjectFormat::Sha256)
7210 .expect("test operation should succeed");
7211 let duplicate = [
7212 PackInput {
7213 oid: &sha1,
7214 object: &object,
7215 },
7216 PackInput {
7217 oid: &sha1,
7218 object: &object,
7219 },
7220 ];
7221 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7222
7223 let wrong_format = [PackInput {
7224 oid: &sha256,
7225 object: &object,
7226 }];
7227 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7228 }
7229
7230 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7231 let objects = similar_blob_family(8);
7232 let packed =
7233 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7234 let undeltified =
7235 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7236
7237 assert!(
7240 packed.pack.len() < undeltified.pack.len(),
7241 "expected delta pack ({}) smaller than undeltified pack ({})",
7242 packed.pack.len(),
7243 undeltified.pack.len()
7244 );
7245
7246 let kinds = pack_entry_kinds(&packed.pack, format);
7248 let delta_count = kinds
7249 .iter()
7250 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7251 .count();
7252 assert!(
7253 delta_count >= 1,
7254 "expected at least one delta entry, found kinds {kinds:?}"
7255 );
7256
7257 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7259 assert_eq!(parsed.entries.len(), objects.len());
7260 for object in &objects {
7261 let oid = object
7262 .object_id(format)
7263 .expect("test operation should succeed");
7264 let found = parsed
7265 .entries
7266 .iter()
7267 .find(|entry| entry.entry.oid == oid)
7268 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
7269 assert_eq!(&found.object, object, "object {oid} did not round-trip");
7270 }
7271
7272 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
7274 assert_eq!(index.pack_checksum, packed.checksum);
7275 for object in &objects {
7276 let oid = object
7277 .object_id(format)
7278 .expect("test operation should succeed");
7279 assert!(index.find(&oid).is_some(), "index missing {oid}");
7280 }
7281 }
7282
7283 #[test]
7284 fn write_packed_emits_ofs_delta_by_default() {
7285 let objects = similar_blob_family(6);
7286 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7287 .expect("test operation should succeed");
7288 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7289 assert!(
7290 kinds.contains(&PackObjectKind::OfsDelta),
7291 "expected an ofs-delta entry by default, found {kinds:?}"
7292 );
7293 assert!(
7294 !kinds.contains(&PackObjectKind::RefDelta),
7295 "default self-contained pack must not use ref-delta, found {kinds:?}"
7296 );
7297 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
7299 }
7300
7301 #[test]
7302 fn write_packed_can_emit_ref_delta() {
7303 let objects = similar_blob_family(6);
7304 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
7305 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7306 .expect("test operation should succeed");
7307 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7308 assert!(
7309 kinds.contains(&PackObjectKind::RefDelta),
7310 "expected a ref-delta entry, found {kinds:?}"
7311 );
7312 assert!(
7313 !kinds.contains(&PackObjectKind::OfsDelta),
7314 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
7315 );
7316
7317 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
7320 .expect("test operation should succeed");
7321 assert_eq!(parsed.entries.len(), objects.len());
7322 }
7323
7324 #[test]
7325 fn write_packed_bounds_delta_chain_depth() {
7326 let objects = incremental_blob_chain(20);
7330 let format = ObjectFormat::Sha1;
7331
7332 for max_depth in [1usize, 2, 5] {
7333 let options = PackWriteOptions::new()
7334 .with_window(20)
7335 .with_depth(max_depth);
7336 let packed = PackFile::write_packed_with_options(&objects, format, &options)
7337 .expect("test operation should succeed");
7338
7339 let depths = pack_entry_depths(&packed.pack, format);
7340 let observed = depths.iter().copied().max().unwrap_or(0);
7341 assert!(
7342 observed <= max_depth,
7343 "max chain depth {observed} exceeded bound {max_depth}"
7344 );
7345
7346 let parsed =
7348 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7349 for object in &objects {
7350 let oid = object
7351 .object_id(format)
7352 .expect("test operation should succeed");
7353 let found = parsed
7354 .entries
7355 .iter()
7356 .find(|entry| entry.entry.oid == oid)
7357 .expect("test operation should succeed");
7358 assert_eq!(&found.object, object);
7359 }
7360 }
7361 }
7362
7363 #[test]
7364 fn write_packed_depth_zero_stores_everything_undeltified() {
7365 let objects = similar_blob_family(5);
7366 let options = PackWriteOptions::new().with_depth(0);
7367 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7368 .expect("test operation should succeed");
7369 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7370 assert!(
7371 kinds
7372 .iter()
7373 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
7374 "depth 0 must disable deltas, found {kinds:?}"
7375 );
7376 }
7377
7378 #[test]
7379 fn write_thin_uses_external_base_and_round_trips_sha1() {
7380 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
7381 }
7382
7383 #[test]
7384 fn write_thin_uses_external_base_and_round_trips_sha256() {
7385 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
7386 }
7387
7388 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
7389 let base = blob_with_marker("EXTERNAL-BASE");
7392 let target = blob_with_marker("EXTERNAL-TARGET");
7393 let base_oid = base
7394 .object_id(format)
7395 .expect("test operation should succeed");
7396
7397 let mut external = HashMap::new();
7398 external.insert(base_oid, base.clone());
7399 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
7400 .expect("test operation should succeed");
7401
7402 let kinds = pack_entry_kinds(&packed.pack, format);
7404 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7405
7406 let mut offset = 12usize;
7408 let header =
7409 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7410 assert_eq!(header.kind, PackObjectKind::RefDelta);
7411 let referenced =
7412 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7413 .expect("test operation should succeed");
7414 assert_eq!(referenced, base_oid);
7415
7416 assert!(PackFile::parse(&packed.pack, format).is_err());
7418
7419 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7421 if oid == &base_oid {
7422 Ok(Some(base.clone()))
7423 } else {
7424 Ok(None)
7425 }
7426 })
7427 .expect("test operation should succeed");
7428 assert_eq!(parsed.entries.len(), 1);
7429 assert_eq!(parsed.entries[0].object, target);
7430 }
7431
7432 #[test]
7433 fn write_packed_preserves_distinct_objects_with_no_similarity() {
7434 let objects = vec![
7437 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7438 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7439 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7440 ];
7441 let format = ObjectFormat::Sha1;
7442 let packed =
7443 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7444 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7445 assert_eq!(parsed.entries.len(), objects.len());
7446 for object in &objects {
7447 let oid = object
7448 .object_id(format)
7449 .expect("test operation should succeed");
7450 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7451 }
7452 }
7453
7454 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7458 let mut common_head = Vec::new();
7459 for _ in 0..200 {
7460 common_head.extend_from_slice(b"shared header line for delta testing\n");
7461 }
7462 let mut common_tail = Vec::new();
7463 for _ in 0..200 {
7464 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7465 }
7466 (0..count)
7467 .map(|idx| {
7468 let mut body = common_head.clone();
7469 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7470 body.extend_from_slice(&common_tail);
7471 EncodedObject::new(ObjectType::Blob, body)
7472 })
7473 .collect()
7474 }
7475
7476 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7479 let mut body = Vec::new();
7480 for _ in 0..100 {
7481 body.extend_from_slice(b"baseline content shared across the whole chain\n");
7482 }
7483 let mut objects = Vec::with_capacity(count);
7484 for idx in 0..count {
7485 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7486 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7487 }
7488 objects
7489 }
7490
7491 fn blob_with_marker(marker: &str) -> EncodedObject {
7492 let mut body = Vec::new();
7493 for _ in 0..150 {
7494 body.extend_from_slice(b"common body shared between base and target\n");
7495 }
7496 body.extend_from_slice(marker.as_bytes());
7497 body.push(b'\n');
7498 for _ in 0..150 {
7499 body.extend_from_slice(b"more common body shared between objects\n");
7500 }
7501 EncodedObject::new(ObjectType::Blob, body)
7502 }
7503
7504 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7506 pack_entry_descriptors(pack, format)
7507 .into_iter()
7508 .map(|descriptor| descriptor.kind)
7509 .collect()
7510 }
7511
7512 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7516 let descriptors = pack_entry_descriptors(pack, format);
7517 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7518 let mut depths = Vec::with_capacity(descriptors.len());
7519 for descriptor in &descriptors {
7520 let depth = match &descriptor.base {
7521 EntryBase::None => 0,
7522 EntryBase::Offset(base_offset) => {
7523 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7524 }
7525 EntryBase::Ref => 1,
7529 };
7530 depth_by_offset.insert(descriptor.offset, depth);
7531 depths.push(depth);
7532 }
7533 depths
7534 }
7535
7536 struct EntryDescriptor {
7537 offset: u64,
7538 kind: PackObjectKind,
7539 base: EntryBase,
7540 }
7541
7542 enum EntryBase {
7543 None,
7544 Offset(u64),
7545 Ref,
7546 }
7547
7548 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7549 let trailer_offset = pack.len() - format.raw_len();
7550 let count = u32_be(&pack[8..12]) as usize;
7551 let mut offset = 12usize;
7552 let mut descriptors = Vec::with_capacity(count);
7553 for _ in 0..count {
7554 let entry_offset = offset as u64;
7555 let header =
7556 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7557 let base = match header.kind {
7558 PackObjectKind::OfsDelta => {
7559 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7560 .expect("test operation should succeed");
7561 EntryBase::Offset(base_offset)
7562 }
7563 PackObjectKind::RefDelta => {
7564 offset += format.raw_len();
7565 EntryBase::Ref
7566 }
7567 _ => EntryBase::None,
7568 };
7569 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7570 let mut body = Vec::new();
7571 decoder
7572 .read_to_end(&mut body)
7573 .expect("test operation should succeed");
7574 offset += decoder.total_in() as usize;
7575 descriptors.push(EntryDescriptor {
7576 offset: entry_offset,
7577 kind: header.kind,
7578 base,
7579 });
7580 }
7581 descriptors
7582 }
7583
7584 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7585 let mut base = Vec::new();
7586 for _ in 0..300 {
7587 base.extend_from_slice(b"common payload\n");
7588 }
7589 base.extend_from_slice(b"base\n");
7590 let mut changed = Vec::new();
7591 for _ in 0..300 {
7592 changed.extend_from_slice(b"common payload\n");
7593 }
7594 changed.extend_from_slice(b"changed\n");
7595 (
7596 EncodedObject::new(ObjectType::Blob, base),
7597 EncodedObject::new(ObjectType::Blob, changed),
7598 )
7599 }
7600
7601 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7602 let mut pack = Vec::new();
7603 pack.extend_from_slice(b"PACK");
7604 pack.extend_from_slice(&2u32.to_be_bytes());
7605 pack.extend_from_slice(&1u32.to_be_bytes());
7606 write_entry_header(&mut pack, object_type, body.len() as u64);
7607 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7608 encoder
7609 .write_all(body)
7610 .expect("test operation should succeed");
7611 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7612 let checksum =
7613 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7614 pack.extend_from_slice(checksum.as_bytes());
7615 pack
7616 }
7617
7618 #[derive(Clone, Copy, Debug)]
7619 enum DeltaKind {
7620 Offset,
7621 Ref,
7622 }
7623
7624 fn two_object_delta_pack(
7625 format: ObjectFormat,
7626 base: &[u8],
7627 result: &[u8],
7628 delta_kind: DeltaKind,
7629 ) -> Vec<u8> {
7630 let mut pack = Vec::new();
7631 pack.extend_from_slice(b"PACK");
7632 pack.extend_from_slice(&2u32.to_be_bytes());
7633 pack.extend_from_slice(&2u32.to_be_bytes());
7634
7635 let base_offset = pack.len();
7636 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7637 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7638 encoder
7639 .write_all(base)
7640 .expect("test operation should succeed");
7641 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7642
7643 let delta = append_suffix_delta(base, result);
7644 let delta_offset = pack.len();
7645 write_pack_entry_header_kind(
7646 &mut pack,
7647 match delta_kind {
7648 DeltaKind::Offset => 6,
7649 DeltaKind::Ref => 7,
7650 },
7651 delta.len() as u64,
7652 );
7653 match delta_kind {
7654 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7655 DeltaKind::Ref => {
7656 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7657 .expect("test operation should succeed");
7658 pack.extend_from_slice(base_oid.as_bytes());
7659 }
7660 }
7661 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7662 encoder
7663 .write_all(&delta)
7664 .expect("test operation should succeed");
7665 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7666
7667 let checksum =
7668 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7669 pack.extend_from_slice(checksum.as_bytes());
7670 pack
7671 }
7672
7673 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7674 let mut pack = Vec::new();
7675 pack.extend_from_slice(b"PACK");
7676 pack.extend_from_slice(&2u32.to_be_bytes());
7677 pack.extend_from_slice(&1u32.to_be_bytes());
7678
7679 let delta = append_suffix_delta(base, result);
7680 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7681 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7682 .expect("test operation should succeed");
7683 pack.extend_from_slice(base_oid.as_bytes());
7684 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7685 encoder
7686 .write_all(&delta)
7687 .expect("test operation should succeed");
7688 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7689
7690 let checksum =
7691 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7692 pack.extend_from_slice(checksum.as_bytes());
7693 pack
7694 }
7695
7696 fn unique_temp_dir(name: &str) -> PathBuf {
7697 let nanos = SystemTime::now()
7698 .duration_since(UNIX_EPOCH)
7699 .expect("test operation should succeed")
7700 .as_nanos();
7701 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7702 }
7703
7704 fn run_git_success(cwd: &Path, args: &[&str]) {
7705 let output = Command::new("git")
7706 .current_dir(cwd)
7707 .args(args)
7708 .output()
7709 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7710 assert!(
7711 output.status.success(),
7712 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7713 output.status.code(),
7714 String::from_utf8_lossy(&output.stdout),
7715 String::from_utf8_lossy(&output.stderr)
7716 );
7717 }
7718
7719 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7720 let mut paths = fs::read_dir(dir)
7721 .expect("test operation should succeed")
7722 .map(|entry| entry.expect("test operation should succeed").path())
7723 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7724 .collect::<Vec<_>>();
7725 assert_eq!(paths.len(), 1, "expected one .{extension} file");
7726 paths.remove(0)
7727 }
7728
7729 fn pack_bitmap_index(
7730 format: ObjectFormat,
7731 object_count: u32,
7732 options: u16,
7733 pack_checksum: &ObjectId,
7734 entries: &[(u32, u8, u8, &[u64])],
7735 name_hash_cache: Option<&[u32]>,
7736 ) -> Vec<u8> {
7737 let mut out = Vec::new();
7738 out.extend_from_slice(b"BITM");
7739 out.extend_from_slice(&1u16.to_be_bytes());
7740 out.extend_from_slice(&options.to_be_bytes());
7741 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7742 out.extend_from_slice(pack_checksum.as_bytes());
7743 write_test_ewah(&mut out, object_count, &[0b001]);
7744 write_test_ewah(&mut out, object_count, &[0b010]);
7745 write_test_ewah(&mut out, object_count, &[0b100]);
7746 write_test_ewah(&mut out, object_count, &[0]);
7747 for (position, xor_offset, flags, words) in entries {
7748 out.extend_from_slice(&position.to_be_bytes());
7749 out.push(*xor_offset);
7750 out.push(*flags);
7751 write_test_ewah(&mut out, object_count, words);
7752 }
7753 if let Some(cache) = name_hash_cache {
7754 for value in cache {
7755 out.extend_from_slice(&value.to_be_bytes());
7756 }
7757 }
7758 let checksum =
7759 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7760 out.extend_from_slice(checksum.as_bytes());
7761 out
7762 }
7763
7764 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7765 out.extend_from_slice(&bit_size.to_be_bytes());
7766 let words = ewah_literal_words(literals);
7767 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7768 for word in words {
7769 out.extend_from_slice(&word.to_be_bytes());
7770 }
7771 out.extend_from_slice(&0u32.to_be_bytes());
7772 }
7773
7774 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7775 let rlw = (literals.len() as u64) << 33;
7776 let mut words = vec![rlw];
7777 words.extend_from_slice(literals);
7778 words
7779 }
7780
7781 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7782 let checksum_offset = bytes.len() - format.raw_len();
7783 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7784 .expect("test operation should succeed");
7785 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7786 }
7787
7788 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7789 assert!(result.starts_with(base));
7790 let suffix = &result[base.len()..];
7791 assert!(base.len() < 0x10000);
7792 assert!(suffix.len() < 0x80);
7793 let mut delta = Vec::new();
7794 write_delta_varint(&mut delta, base.len() as u64);
7795 write_delta_varint(&mut delta, result.len() as u64);
7796 delta.push(0x90);
7797 delta.push(base.len() as u8);
7798 delta.push(suffix.len() as u8);
7799 delta.extend_from_slice(suffix);
7800 delta
7801 }
7802
7803 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7804 loop {
7805 let mut byte = (value as u8) & 0x7f;
7806 value >>= 7;
7807 if value != 0 {
7808 byte |= 0x80;
7809 }
7810 out.push(byte);
7811 if value == 0 {
7812 break;
7813 }
7814 }
7815 }
7816
7817 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7818 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7819 size >>= 4;
7820 if size != 0 {
7821 byte |= 0x80;
7822 }
7823 out.push(byte);
7824 while size != 0 {
7825 let mut byte = (size as u8) & 0x7f;
7826 size >>= 7;
7827 if size != 0 {
7828 byte |= 0x80;
7829 }
7830 out.push(byte);
7831 }
7832 }
7833
7834 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7835 assert!(relative < 0x80);
7836 out.push(relative as u8);
7837 }
7838
7839 fn single_entry_index(
7840 format: ObjectFormat,
7841 oid: ObjectId,
7842 crc32: u32,
7843 offset: u32,
7844 pack_checksum: ObjectId,
7845 ) -> Vec<u8> {
7846 let mut index = Vec::new();
7847 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7848 index.extend_from_slice(&2u32.to_be_bytes());
7849 for idx in 0..256 {
7850 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7851 1u32
7852 } else {
7853 0u32
7854 };
7855 index.extend_from_slice(&count.to_be_bytes());
7856 }
7857 index.extend_from_slice(oid.as_bytes());
7858 index.extend_from_slice(&crc32.to_be_bytes());
7859 index.extend_from_slice(&offset.to_be_bytes());
7860 index.extend_from_slice(pack_checksum.as_bytes());
7861 let checksum =
7862 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7863 index.extend_from_slice(checksum.as_bytes());
7864 index
7865 }
7866
7867 fn single_entry_index_v1(
7868 format: ObjectFormat,
7869 oid: ObjectId,
7870 offset: u32,
7871 pack_checksum: ObjectId,
7872 ) -> Vec<u8> {
7873 let mut index = Vec::new();
7874 for idx in 0..256 {
7875 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7876 1u32
7877 } else {
7878 0u32
7879 };
7880 index.extend_from_slice(&count.to_be_bytes());
7881 }
7882 index.extend_from_slice(&offset.to_be_bytes());
7883 index.extend_from_slice(oid.as_bytes());
7884 index.extend_from_slice(pack_checksum.as_bytes());
7885 let checksum =
7886 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7887 index.extend_from_slice(checksum.as_bytes());
7888 index
7889 }
7890
7891 fn pack_reverse_index(
7892 format: ObjectFormat,
7893 positions: &[u32],
7894 pack_checksum: ObjectId,
7895 ) -> Vec<u8> {
7896 let mut reverse_index = Vec::new();
7897 reverse_index.extend_from_slice(b"RIDX");
7898 reverse_index.extend_from_slice(&1u32.to_be_bytes());
7899 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7900 for position in positions {
7901 reverse_index.extend_from_slice(&position.to_be_bytes());
7902 }
7903 reverse_index.extend_from_slice(pack_checksum.as_bytes());
7904 let checksum =
7905 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7906 reverse_index.extend_from_slice(checksum.as_bytes());
7907 reverse_index
7908 }
7909
7910 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7911 let mut out = Vec::new();
7912 out.extend_from_slice(b"MTME");
7913 out.extend_from_slice(&1u32.to_be_bytes());
7914 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7915 for mtime in mtimes {
7916 out.extend_from_slice(&mtime.to_be_bytes());
7917 }
7918 out.extend_from_slice(pack_checksum.as_bytes());
7919 let checksum =
7920 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7921 out.extend_from_slice(checksum.as_bytes());
7922 out
7923 }
7924
7925 fn midx_chunks_with_pack_names(
7926 _format: ObjectFormat,
7927 pack_names: Vec<u8>,
7928 entries: &[(ObjectId, u32, u64)],
7929 ) -> Vec<([u8; 4], Vec<u8>)> {
7930 let mut entries = entries.to_vec();
7931 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7932 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7933 let mut large_offsets = Vec::new();
7934 let mut chunks = vec![
7935 (*b"PNAM", pack_names),
7936 (*b"OIDF", midx_oid_fanout(&object_ids)),
7937 (*b"OIDL", midx_oid_lookup(&object_ids)),
7938 (
7939 *b"OOFF",
7940 midx_ooff_entries(
7941 &entries
7942 .iter()
7943 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7944 .collect::<Vec<_>>(),
7945 &mut large_offsets,
7946 ),
7947 ),
7948 ];
7949 if !large_offsets.is_empty() {
7950 chunks.push((*b"LOFF", large_offsets));
7951 }
7952 chunks
7953 }
7954
7955 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7956 let mut counts = [0u32; 256];
7957 for oid in object_ids {
7958 counts[oid.as_bytes()[0] as usize] += 1;
7959 }
7960 let mut running = 0u32;
7961 let mut out = Vec::new();
7962 for count in counts {
7963 running += count;
7964 out.extend_from_slice(&running.to_be_bytes());
7965 }
7966 out
7967 }
7968
7969 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7970 let mut out = Vec::new();
7971 for oid in object_ids {
7972 out.extend_from_slice(oid.as_bytes());
7973 }
7974 out
7975 }
7976
7977 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7978 let mut out = Vec::new();
7979 for (pack_int_id, offset) in entries {
7980 out.extend_from_slice(&pack_int_id.to_be_bytes());
7981 if *offset < 0x8000_0000 {
7982 out.extend_from_slice(&(*offset as u32).to_be_bytes());
7983 } else {
7984 let large_idx = (large_offsets.len() / 8) as u32;
7985 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7986 large_offsets.extend_from_slice(&offset.to_be_bytes());
7987 }
7988 }
7989 out
7990 }
7991
7992 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7993 let mut out = Vec::new();
7994 for value in values {
7995 out.extend_from_slice(&value.to_be_bytes());
7996 }
7997 out
7998 }
7999
8000 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
8001 let mut out = Vec::new();
8002 for (bitmap_pos, bitmap_nr) in entries {
8003 out.extend_from_slice(&bitmap_pos.to_be_bytes());
8004 out.extend_from_slice(&bitmap_nr.to_be_bytes());
8005 }
8006 out
8007 }
8008
8009 fn multi_pack_index(
8010 format: ObjectFormat,
8011 version: u8,
8012 pack_count: u32,
8013 chunks: &[([u8; 4], Vec<u8>)],
8014 ) -> Vec<u8> {
8015 let lookup_len = (chunks.len() + 1) * 12;
8016 let mut out = Vec::new();
8017 out.extend_from_slice(b"MIDX");
8018 out.push(version);
8019 out.push(hash_function_id(format) as u8);
8020 out.push(chunks.len() as u8);
8021 out.push(0);
8022 out.extend_from_slice(&pack_count.to_be_bytes());
8023 let mut chunk_offset = (12 + lookup_len) as u64;
8024 for (id, data) in chunks {
8025 out.extend_from_slice(id);
8026 out.extend_from_slice(&chunk_offset.to_be_bytes());
8027 chunk_offset += data.len() as u64;
8028 }
8029 out.extend_from_slice(&[0, 0, 0, 0]);
8030 out.extend_from_slice(&chunk_offset.to_be_bytes());
8031 for (_id, data) in chunks {
8032 out.extend_from_slice(data);
8033 }
8034 let checksum =
8035 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8036 out.extend_from_slice(checksum.as_bytes());
8037 out
8038 }
8039
8040 fn pack_checksum_sha1() -> ObjectId {
8043 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
8044 }
8045
8046 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
8047 let mut offset = 0usize;
8050 let checksum_offset = bytes.len();
8051 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
8052 .expect("test operation should succeed")
8053 }
8054
8055 #[test]
8056 fn ewah_encodes_single_literal_word_matching_helper() {
8057 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
8061 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
8062 assert_eq!(ewah.rlw_position, 0);
8063 assert_eq!(ewah.bit_size, 64);
8064 }
8065
8066 #[test]
8067 fn ewah_byte_layout_is_big_endian() {
8068 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
8069 .expect("test operation should succeed");
8070 let bytes = ewah.to_bytes();
8071 let mut expected = Vec::new();
8072 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
8076 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
8078 }
8079
8080 #[test]
8081 fn ewah_empty_bitmap_serialises_like_git() {
8082 let ewah = EwahBitmap::empty();
8083 let bytes = ewah.to_bytes();
8084 assert_eq!(bytes, vec![0u8; 12]);
8086 let parsed = parse_ewah_bytes(&bytes);
8088 assert_eq!(parsed, ewah);
8089 assert!(
8090 parsed
8091 .to_positions()
8092 .expect("test operation should succeed")
8093 .is_empty()
8094 );
8095 }
8096
8097 #[test]
8098 fn ewah_compresses_clean_zero_run() {
8099 let ewah =
8102 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
8103 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
8104 let rlw = ewah.words[0];
8105 assert_eq!(rlw & 1, 0, "run bit should be zero");
8106 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8107 assert_eq!(rlw >> 33, 1, "literal length should be 1");
8108 assert_eq!(ewah.words[1], 0b1);
8109 }
8110
8111 #[test]
8112 fn ewah_compresses_clean_ones_run() {
8113 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
8114 .expect("test operation should succeed");
8115 assert_eq!(ewah.words.len(), 1);
8117 let rlw = ewah.words[0];
8118 assert_eq!(rlw & 1, 1, "run bit should be one");
8119 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8120 assert_eq!(rlw >> 33, 0, "no literals");
8121 }
8122
8123 #[test]
8124 fn ewah_run_then_literal_then_run_roundtrips() {
8125 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8126 let bit_size = (words.len() * 64) as u32;
8127 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8128 assert_eq!(
8129 ewah.to_words().expect("test operation should succeed"),
8130 words
8131 );
8132 }
8133
8134 #[test]
8135 fn ewah_drops_trailing_clean_zero_words() {
8136 let words = vec![0b1, 0, 0, 0];
8139 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8140 assert_eq!(ewah.bit_size, 1);
8142 assert_eq!(
8143 ewah.to_words().expect("test operation should succeed"),
8144 vec![0b1]
8145 );
8146 }
8147
8148 #[test]
8149 fn ewah_from_positions_roundtrips_via_positions() {
8150 let positions = [0u32, 1, 63, 64, 65, 200, 511];
8151 let ewah =
8152 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8153 let mut decoded = ewah.to_positions().expect("test operation should succeed");
8154 decoded.sort_unstable();
8155 assert_eq!(decoded, positions);
8156 }
8157
8158 #[test]
8159 fn ewah_from_positions_dedupes_and_orders() {
8160 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8161 .expect("test operation should succeed");
8162 assert_eq!(
8163 ewah.to_positions().expect("test operation should succeed"),
8164 vec![5, 100]
8165 );
8166 }
8167
8168 #[test]
8169 fn ewah_huge_zero_run_spans_multiple_rlws() {
8170 let mut builder = EwahBuilder::new(0);
8175 builder.add_empty_words(false, 0xffff_ffff);
8176 builder.add_empty_words(false, 5);
8177 let ewah = builder.finish().expect("test operation should succeed");
8178 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8179 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8180 assert_eq!(ewah.words[1] & 1, 0);
8181 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8182 assert_eq!(ewah.rlw_position, 1);
8183 }
8184
8185 #[test]
8186 fn ewah_from_words_rejects_oversized_bit_size() {
8187 assert!(EwahBitmap::from_words(65, &[0]).is_err());
8189 }
8190
8191 #[test]
8192 fn ewah_from_positions_rejects_out_of_range() {
8193 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8194 }
8195
8196 #[test]
8197 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8198 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8201 let bit_size = (words.len() * 64) as u32;
8202 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8203 let bytes = ewah.to_bytes();
8204 let parsed = parse_ewah_bytes(&bytes);
8205 assert_eq!(parsed, ewah);
8206 assert_eq!(
8207 parsed.to_words().expect("test operation should succeed"),
8208 words
8209 );
8210 }
8211
8212 #[test]
8213 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8214 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8216 let bytes = write_bitmap(
8217 ObjectFormat::Sha1,
8218 pack_checksum_sha1(),
8219 &object_types,
8220 &[(0u32, 0u32, vec![1u32, 2u32])],
8221 None,
8222 )
8223 .expect("test operation should succeed");
8224 assert_eq!(&bytes[..4], b"BITM");
8225
8226 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8227 .expect("test operation should succeed");
8228 assert_eq!(parsed.version, 1);
8229 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8230 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8231 assert_eq!(
8232 parsed
8233 .type_bitmaps
8234 .commits
8235 .to_positions()
8236 .expect("test operation should succeed"),
8237 vec![0]
8238 );
8239 assert_eq!(
8240 parsed
8241 .type_bitmaps
8242 .trees
8243 .to_positions()
8244 .expect("test operation should succeed"),
8245 vec![1]
8246 );
8247 assert_eq!(
8248 parsed
8249 .type_bitmaps
8250 .blobs
8251 .to_positions()
8252 .expect("test operation should succeed"),
8253 vec![2]
8254 );
8255 assert!(
8256 parsed
8257 .type_bitmaps
8258 .tags
8259 .to_positions()
8260 .expect("test operation should succeed")
8261 .is_empty()
8262 );
8263 assert_eq!(parsed.entries.len(), 1);
8264 let entry = parsed
8265 .entry_for_index_position(0)
8266 .expect("test operation should succeed");
8267 assert_eq!(entry.xor_offset, 0);
8268 assert_eq!(entry.flags, 0);
8269 assert_eq!(
8270 entry
8271 .bitmap
8272 .to_positions()
8273 .expect("test operation should succeed"),
8274 vec![0, 1, 2]
8275 );
8276 assert_eq!(parsed.name_hash_cache, None);
8277 }
8278
8279 #[test]
8280 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
8281 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8282 .expect("test operation should succeed");
8283 let object_types = [ObjectType::Commit, ObjectType::Tree];
8284 let bytes = write_bitmap(
8285 ObjectFormat::Sha256,
8286 pack_checksum.clone(),
8287 &object_types,
8288 &[(0u32, 0u32, vec![1u32])],
8289 None,
8290 )
8291 .expect("test operation should succeed");
8292 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
8293 .expect("test operation should succeed");
8294 assert_eq!(parsed.format, ObjectFormat::Sha256);
8295 assert_eq!(parsed.pack_checksum, pack_checksum);
8296 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8297 assert_eq!(
8298 parsed.entries[0]
8299 .bitmap
8300 .to_positions()
8301 .expect("test operation should succeed"),
8302 vec![0, 1]
8303 );
8304 }
8305
8306 #[test]
8307 fn pack_bitmap_index_write_includes_name_hash_cache() {
8308 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8309 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
8310 let bytes = write_bitmap(
8311 ObjectFormat::Sha1,
8312 pack_checksum_sha1(),
8313 &object_types,
8314 &[(0u32, 0u32, vec![1u32, 2u32])],
8315 Some(cache.clone()),
8316 )
8317 .expect("test operation should succeed");
8318 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8319 .expect("test operation should succeed");
8320 assert_eq!(
8321 parsed.options,
8322 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8323 );
8324 assert_eq!(parsed.name_hash_cache, Some(cache));
8325 }
8326
8327 #[test]
8328 fn pack_bitmap_writer_supports_multiple_commits() {
8329 let object_types = [
8330 ObjectType::Commit,
8331 ObjectType::Commit,
8332 ObjectType::Tree,
8333 ObjectType::Blob,
8334 ];
8335 let mut writer =
8336 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8337 .expect("test operation should succeed");
8338 writer
8339 .add_commit(0, 0, &[2, 3])
8340 .expect("test operation should succeed");
8341 writer
8342 .add_commit(1, 1, &[2])
8343 .expect("test operation should succeed");
8344 let bytes = writer.write().expect("test operation should succeed");
8345 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
8346 .expect("test operation should succeed");
8347 assert_eq!(parsed.entries.len(), 2);
8348 assert_eq!(
8349 parsed
8350 .type_bitmaps
8351 .commits
8352 .to_positions()
8353 .expect("test operation should succeed"),
8354 vec![0, 1]
8355 );
8356 let first = parsed
8357 .entry_for_index_position(0)
8358 .expect("test operation should succeed");
8359 assert_eq!(
8360 first
8361 .bitmap
8362 .to_positions()
8363 .expect("test operation should succeed"),
8364 vec![0, 2, 3]
8365 );
8366 let second = parsed
8367 .entry_for_index_position(1)
8368 .expect("test operation should succeed");
8369 assert_eq!(
8370 second
8371 .bitmap
8372 .to_positions()
8373 .expect("test operation should succeed"),
8374 vec![1, 2]
8375 );
8376 }
8377
8378 #[test]
8379 fn pack_bitmap_index_recomputes_checksum_on_write() {
8380 let object_types = [ObjectType::Commit, ObjectType::Blob];
8383 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8384 .expect("test operation should succeed");
8385 let mut index = writer.build().expect("test operation should succeed");
8386 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
8388 index.entries.clear(); index.entries.push(PackBitmapEntry {
8390 object_position: 0,
8391 xor_offset: 0,
8392 flags: 0,
8393 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
8394 });
8395 let bytes = index.write().expect("test operation should succeed");
8396 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
8398 .expect("test operation should succeed");
8399 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
8400 }
8401
8402 #[test]
8403 fn pack_bitmap_writer_rejects_non_commit_selection() {
8404 let object_types = [ObjectType::Commit, ObjectType::Blob];
8405 let mut writer =
8406 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8407 .expect("test operation should succeed");
8408 assert!(writer.add_commit(1, 1, &[]).is_err());
8410 assert!(writer.add_commit(5, 5, &[]).is_err());
8412 assert!(writer.add_commit(0, 5, &[]).is_err());
8414 assert!(writer.add_commit(0, 0, &[9]).is_err());
8416 }
8417
8418 #[test]
8419 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8420 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8421 .expect("test operation should succeed");
8422 assert!(
8423 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8424 .is_err()
8425 );
8426 }
8427
8428 #[test]
8429 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8430 let writer = PackBitmapWriter::new(
8431 ObjectFormat::Sha1,
8432 pack_checksum_sha1(),
8433 &[ObjectType::Commit],
8434 )
8435 .expect("test operation should succeed");
8436 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8437 }
8438
8439 #[test]
8440 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8441 let mut index = PackBitmapWriter::new(
8442 ObjectFormat::Sha1,
8443 pack_checksum_sha1(),
8444 &[ObjectType::Commit],
8445 )
8446 .expect("test operation should succeed")
8447 .build()
8448 .expect("test operation should succeed");
8449 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8451 assert!(index.write().is_err());
8452 index.options = PackBitmapIndex::OPTION_FULL_DAG;
8454 index.name_hash_cache = Some(vec![0]);
8455 assert!(index.write().is_err());
8456 }
8457
8458 #[test]
8459 fn write_bitmap_roundtrips_through_upstream_git_parser() {
8460 let root = unique_temp_dir("git-pack-bitmap-writer");
8464 fs::create_dir_all(&root).expect("test operation should succeed");
8465 {
8466 run_git_success(&root, &["init", "-q", "-b", "main"]);
8467 run_git_success(
8468 &root,
8469 &[
8470 "-c",
8471 "user.name=Example User",
8472 "-c",
8473 "user.email=example@example.invalid",
8474 "commit",
8475 "--allow-empty",
8476 "-q",
8477 "-m",
8478 "one",
8479 ],
8480 );
8481 run_git_success(&root, &["repack", "-adb"]);
8482 let pack_dir = root.join(".git").join("objects").join("pack");
8483 let idx_path = single_path_with_extension(&pack_dir, "idx");
8484 let index = PackIndex::parse(
8485 &fs::read(idx_path).expect("test operation should succeed"),
8486 ObjectFormat::Sha1,
8487 )
8488 .expect("test operation should succeed");
8489 let pack_path = single_path_with_extension(&pack_dir, "pack");
8491 let pack =
8492 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8493 .expect("test operation should succeed");
8494 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8497 offsets.sort_unstable();
8498 let position_of = |offset: u64| -> u32 {
8499 offsets
8500 .iter()
8501 .position(|value| *value == offset)
8502 .expect("test operation should succeed") as u32
8503 };
8504 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8505 for entry in &index.entries {
8506 let position = position_of(entry.offset) as usize;
8507 if let Some(parsed) = pack
8509 .entries
8510 .iter()
8511 .find(|po| po.entry.offset == entry.offset)
8512 {
8513 object_types[position] = parsed.object.object_type;
8514 }
8515 }
8516 let commit_position = object_types
8518 .iter()
8519 .position(|ty| *ty == ObjectType::Commit)
8520 .expect("test operation should succeed") as u32;
8521 let commit_index_position = index
8523 .entries
8524 .iter()
8525 .position(|entry| position_of(entry.offset) == commit_position)
8526 .expect("test operation should succeed")
8527 as u32;
8528 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8529 let bytes = write_bitmap(
8530 ObjectFormat::Sha1,
8531 index.pack_checksum.clone(),
8532 &object_types,
8533 &[(commit_position, commit_index_position, reachable)],
8534 None,
8535 )
8536 .expect("test operation should succeed");
8537 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8538 .expect("test operation should succeed");
8539 assert_eq!(parsed.pack_checksum, index.pack_checksum);
8540 assert_eq!(parsed.entries.len(), 1);
8541 assert_eq!(
8542 parsed.entries[0]
8543 .bitmap
8544 .to_positions()
8545 .expect("test operation should succeed")
8546 .len(),
8547 index.entries.len()
8548 );
8549 };
8550 let _ = fs::remove_dir_all(&root);
8551 }
8552}