1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18 pub oid: ObjectId,
19 pub compressed_size: u64,
20 pub uncompressed_size: u64,
21 pub offset: u64,
22}
23
24pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55 pub window: usize,
58 pub depth: usize,
60 pub prefer_ofs_delta: bool,
64 pub thin_bases: HashMap<ObjectId, EncodedObject>,
69 pub reorder: bool,
75}
76
77impl Default for PackWriteOptions {
78 fn default() -> Self {
79 Self::new()
80 }
81}
82
83impl PackWriteOptions {
84 pub fn new() -> Self {
88 Self {
89 window: DEFAULT_PACK_WINDOW,
90 depth: DEFAULT_PACK_DEPTH,
91 prefer_ofs_delta: true,
92 thin_bases: HashMap::new(),
93 reorder: true,
94 }
95 }
96
97 pub fn with_window(mut self, window: usize) -> Self {
99 self.window = window;
100 self
101 }
102
103 pub fn with_depth(mut self, depth: usize) -> Self {
105 self.depth = depth;
106 self
107 }
108
109 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
112 self.prefer_ofs_delta = prefer_ofs_delta;
113 self
114 }
115
116 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
118 self.thin_bases = thin_bases;
119 self
120 }
121
122 pub fn with_reorder(mut self, reorder: bool) -> Self {
125 self.reorder = reorder;
126 self
127 }
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct RepackPolicy {
132 pub write_bitmaps: bool,
133 pub cruft_packs: bool,
134 pub geometric_factor: Option<u8>,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct PackFile {
139 pub version: u32,
140 pub entries: Vec<PackObject>,
141 pub checksum: ObjectId,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct PackObject {
146 pub entry: PackEntry,
147 pub object: EncodedObject,
148}
149
150#[derive(Debug, Clone, PartialEq, Eq)]
153pub struct PackVerifyStat {
154 pub oid: ObjectId,
156 pub object_type: ObjectType,
158 pub size: u64,
160 pub size_in_pack: u64,
163 pub offset: u64,
165 pub delta_depth: u32,
167 pub base_oid: Option<ObjectId>,
170}
171
172#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct PackVerifyStats {
176 pub objects: Vec<PackVerifyStat>,
177 pub checksum: ObjectId,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct PackWrite {
182 pub pack: Vec<u8>,
183 pub index: Vec<u8>,
184 pub checksum: ObjectId,
185 pub entries: Vec<PackIndexEntry>,
186}
187
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub struct PackInput<'a> {
190 pub oid: &'a ObjectId,
191 pub object: &'a EncodedObject,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct PackIndexBuild {
196 pub index: Vec<u8>,
197 pub pack_checksum: ObjectId,
198 pub entries: Vec<PackIndexEntry>,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct PackIndex {
203 pub version: u32,
204 pub fanout: [u32; 256],
205 pub entries: Vec<PackIndexEntry>,
206 pub pack_checksum: ObjectId,
207 pub index_checksum: ObjectId,
208}
209
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct PackIndexView<'a> {
212 pub version: u32,
213 pub count: usize,
214 pub fanout: [u32; 256],
215 pub pack_checksum: ObjectId,
216 pub index_checksum: ObjectId,
217 bytes: &'a [u8],
218 format: ObjectFormat,
219 tables: PackIndexViewTables,
220}
221
222pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
223 fn as_bytes(&self) -> &[u8];
224}
225
226impl<T> PackIndexByteSource for T
227where
228 T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
229{
230 fn as_bytes(&self) -> &[u8] {
231 self.as_ref()
232 }
233}
234
235#[derive(Debug)]
236struct SharedIndexBytes(Arc<[u8]>);
237
238impl PackIndexByteSource for SharedIndexBytes {
239 fn as_bytes(&self) -> &[u8] {
240 self.0.as_ref()
241 }
242}
243
244#[derive(Debug, Clone)]
245pub struct PackIndexViewData {
246 pub version: u32,
247 pub count: usize,
248 pub fanout: [u32; 256],
249 pub pack_checksum: ObjectId,
250 pub index_checksum: ObjectId,
251 bytes: Arc<dyn PackIndexByteSource>,
252 format: ObjectFormat,
253 tables: PackIndexViewTables,
254}
255
256#[derive(Debug, Clone, PartialEq, Eq)]
257pub struct PackIndexEntry {
258 pub oid: ObjectId,
259 pub crc32: u32,
260 pub offset: u64,
261}
262
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub struct PackIndexLookup {
265 pub crc32: u32,
266 pub offset: u64,
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
270enum PackIndexViewTables {
271 V1 {
272 entry_table: Range<usize>,
273 },
274 V2 {
275 oid_table: Range<usize>,
276 crc_table: Range<usize>,
277 small_offset_table: Range<usize>,
278 large_offset_table: Range<usize>,
279 },
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct PackReverseIndex {
284 pub version: u32,
285 pub format: ObjectFormat,
286 pub positions: Vec<u32>,
287 pub pack_checksum: ObjectId,
288 pub index_checksum: ObjectId,
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub struct PackMtimes {
293 pub version: u32,
294 pub format: ObjectFormat,
295 pub mtimes: Vec<u32>,
296 pub pack_checksum: ObjectId,
297 pub index_checksum: ObjectId,
298}
299
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct PackBitmapIndex {
302 pub version: u16,
303 pub format: ObjectFormat,
304 pub options: u16,
305 pub pack_checksum: ObjectId,
306 pub index_checksum: ObjectId,
307 pub type_bitmaps: PackBitmapTypeBitmaps,
308 pub entries: Vec<PackBitmapEntry>,
309 pub name_hash_cache: Option<Vec<u32>>,
310}
311
312#[derive(Debug, Clone, PartialEq, Eq)]
313pub struct PackBitmapTypeBitmaps {
314 pub commits: EwahBitmap,
315 pub trees: EwahBitmap,
316 pub blobs: EwahBitmap,
317 pub tags: EwahBitmap,
318}
319
320#[derive(Debug, Clone, PartialEq, Eq)]
321pub struct PackBitmapEntry {
322 pub object_position: u32,
327 pub xor_offset: u8,
328 pub flags: u8,
329 pub bitmap: EwahBitmap,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct EwahBitmap {
336 pub bit_size: u32,
337 pub words: Vec<u64>,
338 pub rlw_position: u32,
339}
340
341#[derive(Debug, Clone, PartialEq, Eq)]
342pub struct MultiPackIndex {
343 pub version: u8,
344 pub format: ObjectFormat,
345 pub pack_count: u32,
346 pub pack_names: Vec<String>,
347 pub object_count: u32,
348 pub fanout: [u32; 256],
349 pub objects: Vec<MultiPackIndexEntry>,
350 pub reverse_index: Option<Vec<u32>>,
351 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
352 pub chunks: Vec<MultiPackIndexChunk>,
353 pub checksum: ObjectId,
354}
355
356#[derive(Debug, Clone)]
357pub struct MultiPackIndexOidLookup {
358 format: ObjectFormat,
359 pack_count: u32,
360 pack_names: Vec<String>,
361 fanout: [u32; 256],
362 object_count: usize,
363 oid_lookup_offset: usize,
364 object_offsets_offset: usize,
365 large_offsets_offset: Option<usize>,
366 large_offsets_len: usize,
367 bytes: Arc<dyn PackIndexByteSource>,
368}
369
370#[derive(Debug, Clone, PartialEq, Eq)]
371pub struct MultiPackIndexEntry {
372 pub oid: ObjectId,
373 pub pack_int_id: u32,
374 pub offset: u64,
375}
376
377#[derive(Debug, Clone, PartialEq, Eq)]
378pub struct MultiPackBitmapPack {
379 pub bitmap_pos: u32,
380 pub bitmap_nr: u32,
381}
382
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct MultiPackIndexChunk {
385 pub id: [u8; 4],
386 pub offset: u64,
387 pub len: u64,
388}
389
390#[derive(Debug, Clone, Copy, PartialEq, Eq)]
391enum PackObjectKind {
392 Commit,
393 Tree,
394 Blob,
395 Tag,
396 OfsDelta,
397 RefDelta,
398}
399
400#[derive(Debug, Clone, PartialEq, Eq)]
401enum ParsedPackEntry {
402 Resolved(PackObject),
403 Delta {
404 base: DeltaBase,
405 compressed_size: u64,
406 delta_size: u64,
407 offset: u64,
408 delta: Vec<u8>,
409 },
410}
411
412#[derive(Debug, Clone, PartialEq, Eq)]
413enum DeltaBase {
414 Offset(u64),
415 Ref(ObjectId),
416}
417
418struct OnDiskEntry {
422 offset: u64,
423 base: Option<DeltaBase>,
424 stream_size: u64,
425}
426
427impl PackFile {
428 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
429 Self::parse(bytes, ObjectFormat::Sha1)
430 }
431
432 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
433 Self::parse_with_base(bytes, format, |_| Ok(None))
434 }
435
436 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
437 Self::parse(&bundle.pack, bundle.format)
438 }
439
440 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
441 let PackIndexBuild {
442 index,
443 pack_checksum,
444 entries,
445 } = PackIndex::write_v2_for_pack(bytes, format)?;
446 Ok(PackWrite {
447 pack: bytes.to_vec(),
448 index,
449 checksum: pack_checksum,
450 entries,
451 })
452 }
453
454 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
455 where
456 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
457 {
458 Self::parse_with_base(bytes, format, external_base)
459 }
460
461 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
462 where
463 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
464 {
465 let trailer_len = format.raw_len();
466 if bytes.len() < 12 + trailer_len {
467 return Err(GitError::InvalidFormat("pack file too short".into()));
468 }
469 let trailer_offset = bytes.len() - trailer_len;
470 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
471 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
472 if checksum != expected {
473 return Err(GitError::InvalidFormat(format!(
474 "pack checksum mismatch: expected {expected}, got {checksum}"
475 )));
476 }
477
478 if &bytes[..4] != b"PACK" {
479 return Err(GitError::InvalidFormat("missing PACK signature".into()));
480 }
481 let version = u32_be(&bytes[4..8]);
482 if version != 2 && version != 3 {
483 return Err(GitError::Unsupported(format!("pack version {version}")));
484 }
485 let count = u32_be(&bytes[8..12]) as usize;
486 let mut offset = 12usize;
487 let mut entries = Vec::with_capacity(count);
488 for _ in 0..count {
489 let entry_offset = offset;
490 let header = parse_entry_header(bytes, &mut offset)?;
491 let base =
492 match header.kind {
493 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
494 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
495 )),
496 PackObjectKind::RefDelta => {
497 let hash_len = format.raw_len();
498 if offset + hash_len > trailer_offset {
499 return Err(GitError::InvalidFormat(
500 "truncated ref-delta base object id".into(),
501 ));
502 }
503 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
504 offset += hash_len;
505 Some(DeltaBase::Ref(oid))
506 }
507 _ => None,
508 };
509 let mut body = Vec::new();
510 let consumed = inflate_into(
511 &bytes[offset..trailer_offset],
512 &mut body,
513 header.size.min(usize::MAX as u64) as usize,
514 )?;
515 if body.len() as u64 != header.size {
516 return Err(GitError::InvalidObject(format!(
517 "pack object declared {} bytes, decoded {}",
518 header.size,
519 body.len()
520 )));
521 }
522 if consumed == 0 {
523 return Err(GitError::InvalidFormat(
524 "empty compressed pack entry".into(),
525 ));
526 }
527 offset = offset
528 .checked_add(consumed)
529 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
530 if offset > trailer_offset {
531 return Err(GitError::InvalidFormat(
532 "pack entry extends past checksum".into(),
533 ));
534 }
535 if let Some(base) = base {
536 entries.push(ParsedPackEntry::Delta {
537 base,
538 compressed_size: consumed as u64,
539 delta_size: header.size,
540 offset: entry_offset as u64,
541 delta: body,
542 });
543 } else {
544 let object_type = match header.kind {
545 PackObjectKind::Commit => ObjectType::Commit,
546 PackObjectKind::Tree => ObjectType::Tree,
547 PackObjectKind::Blob => ObjectType::Blob,
548 PackObjectKind::Tag => ObjectType::Tag,
549 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
550 };
551 let object = EncodedObject::new(object_type, body);
552 let oid = object.object_id(format)?;
553 entries.push(ParsedPackEntry::Resolved(PackObject {
554 entry: PackEntry {
555 oid,
556 compressed_size: consumed as u64,
557 uncompressed_size: header.size,
558 offset: entry_offset as u64,
559 },
560 object,
561 }));
562 }
563 }
564 if offset != trailer_offset {
565 return Err(GitError::InvalidFormat(format!(
566 "pack has {} trailing bytes before checksum",
567 trailer_offset - offset
568 )));
569 }
570 Ok(Self {
571 version,
572 entries: resolve_pack_entries(entries, format, &mut external_base)?,
573 checksum,
574 })
575 }
576
577 pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
588 let pack = Self::parse(bytes, format)?;
592
593 let trailer_len = format.raw_len();
597 let trailer_offset = bytes.len() - trailer_len;
598 let count = u32_be(&bytes[8..12]) as usize;
599 let mut offset = 12usize;
600 let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
606 for _ in 0..count {
607 let entry_offset = offset as u64;
608 let header = parse_entry_header(bytes, &mut offset)?;
609 let stream_size = header.size;
610 let base = match header.kind {
611 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
612 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
613 )),
614 PackObjectKind::RefDelta => {
615 let hash_len = format.raw_len();
616 if offset + hash_len > trailer_offset {
617 return Err(GitError::InvalidFormat(
618 "truncated ref-delta base object id".into(),
619 ));
620 }
621 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
622 offset += hash_len;
623 Some(DeltaBase::Ref(oid))
624 }
625 _ => None,
626 };
627 let mut body = Vec::new();
629 let consumed = inflate_into(
630 &bytes[offset..trailer_offset],
631 &mut body,
632 header.size.min(usize::MAX as u64) as usize,
633 )?;
634 offset = offset
635 .checked_add(consumed)
636 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
637 on_disk.push(OnDiskEntry {
638 offset: entry_offset,
639 base,
640 stream_size,
641 });
642 }
643
644 let mut resolved_by_offset: HashMap<u64, &PackObject> =
646 HashMap::with_capacity(pack.entries.len());
647 for object in &pack.entries {
648 resolved_by_offset.insert(object.entry.offset, object);
649 }
650 let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
652 for entry in &on_disk {
653 if let Some(object) = resolved_by_offset.get(&entry.offset) {
654 oid_by_offset.insert(entry.offset, object.entry.oid);
655 }
656 }
657 let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
659 for (idx, entry) in on_disk.iter().enumerate() {
660 index_by_offset.insert(entry.offset, idx);
661 }
662
663 let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
666 sorted_offsets.sort_unstable();
667 let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
668 for window in sorted_offsets.windows(2) {
669 next_offset.insert(window[0], window[1]);
670 }
671 if let Some(last) = sorted_offsets.last() {
672 next_offset.insert(*last, trailer_offset as u64);
673 }
674
675 let mut depth = vec![None; on_disk.len()];
681 fn resolve_depth(
682 idx: usize,
683 on_disk: &[OnDiskEntry],
684 index_by_offset: &HashMap<u64, usize>,
685 offset_of_oid: &HashMap<ObjectId, u64>,
686 depth: &mut [Option<u32>],
687 ) -> u32 {
688 if let Some(d) = depth[idx] {
689 return d;
690 }
691 let computed = match &on_disk[idx].base {
692 None => 0,
693 Some(base) => {
694 let base_idx = match base {
695 DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
696 DeltaBase::Ref(oid) => offset_of_oid
697 .get(oid)
698 .and_then(|off| index_by_offset.get(off).copied()),
699 };
700 match base_idx {
701 Some(bi) => {
702 resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
703 }
704 None => 1,
706 }
707 }
708 };
709 depth[idx] = Some(computed);
710 computed
711 }
712 let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
713 for (off, oid) in &oid_by_offset {
714 offset_of_oid.insert(*oid, *off);
715 }
716 for idx in 0..on_disk.len() {
717 resolve_depth(
718 idx,
719 &on_disk,
720 &index_by_offset,
721 &offset_of_oid,
722 &mut depth,
723 );
724 }
725
726 let mut stats = Vec::with_capacity(on_disk.len());
727 for (idx, entry) in on_disk.iter().enumerate() {
728 let off = entry.offset;
729 let object = resolved_by_offset.get(&off).ok_or_else(|| {
730 GitError::InvalidFormat("pack offset missing from resolved set".into())
731 })?;
732 let size_in_pack = next_offset
733 .get(&off)
734 .copied()
735 .unwrap_or(trailer_offset as u64)
736 .saturating_sub(off);
737 let base_oid = match &entry.base {
738 None => None,
739 Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
740 Some(DeltaBase::Ref(oid)) => Some(*oid),
741 };
742 stats.push(PackVerifyStat {
743 oid: object.entry.oid,
744 object_type: object.object.object_type,
745 size: entry.stream_size,
748 size_in_pack,
749 offset: off,
750 delta_depth: depth[idx].unwrap_or(0),
751 base_oid,
752 });
753 }
754 stats.sort_by_key(|stat| stat.offset);
756
757 Ok(PackVerifyStats {
758 objects: stats,
759 checksum: pack.checksum,
760 })
761 }
762
763 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
764 where
765 T: Borrow<EncodedObject>,
766 {
767 Self::write_undeltified(objects, ObjectFormat::Sha1)
768 }
769
770 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
776 where
777 T: Borrow<EncodedObject>,
778 {
779 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
780 Self::write_packed_impl(objects, format, &options)
781 }
782
783 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
792 where
793 T: Borrow<EncodedObject>,
794 {
795 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
796 }
797
798 pub fn write_packed_with_options<T>(
802 objects: &[T],
803 format: ObjectFormat,
804 options: &PackWriteOptions,
805 ) -> Result<PackWrite>
806 where
807 T: Borrow<EncodedObject>,
808 {
809 Self::write_packed_impl(objects, format, options)
810 }
811
812 pub fn write_packed_with_known_ids(
821 inputs: &[PackInput<'_>],
822 format: ObjectFormat,
823 ) -> Result<PackWrite> {
824 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
825 }
826
827 pub fn write_packed_with_known_ids_and_options(
830 inputs: &[PackInput<'_>],
831 format: ObjectFormat,
832 options: &PackWriteOptions,
833 ) -> Result<PackWrite> {
834 if inputs.len() > u32::MAX as usize {
835 return Err(GitError::InvalidFormat("too many pack objects".into()));
836 }
837 let mut objects = Vec::with_capacity(inputs.len());
838 let mut object_ids = Vec::with_capacity(inputs.len());
839 for input in inputs {
840 if input.oid.format() != format {
841 return Err(GitError::InvalidObjectId(format!(
842 "pack object id {} uses {}, pack uses {}",
843 input.oid,
844 input.oid.format().name(),
845 format.name()
846 )));
847 }
848 objects.push(input.object);
849 object_ids.push(*input.oid);
850 }
851 Self::write_packed_from_parts(objects, object_ids, format, options)
852 }
853
854 pub fn write_thin<T>(
863 objects: &[T],
864 format: ObjectFormat,
865 external_bases: HashMap<ObjectId, EncodedObject>,
866 ) -> Result<PackWrite>
867 where
868 T: Borrow<EncodedObject>,
869 {
870 let options = PackWriteOptions::new().with_thin_bases(external_bases);
871 Self::write_packed_impl(objects, format, &options)
872 }
873
874 fn write_packed_impl<T>(
875 objects: &[T],
876 format: ObjectFormat,
877 options: &PackWriteOptions,
878 ) -> Result<PackWrite>
879 where
880 T: Borrow<EncodedObject>,
881 {
882 if objects.len() > u32::MAX as usize {
883 return Err(GitError::InvalidFormat("too many pack objects".into()));
884 }
885 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
886
887 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
890 for object in &objects {
891 object_ids.push(object.object_id(format)?);
892 }
893 Self::write_packed_from_parts(objects, object_ids, format, options)
894 }
895
896 fn write_packed_from_parts(
897 objects: Vec<&EncodedObject>,
898 object_ids: Vec<ObjectId>,
899 format: ObjectFormat,
900 options: &PackWriteOptions,
901 ) -> Result<PackWrite> {
902 let mut seen = HashSet::with_capacity(object_ids.len());
903 for oid in &object_ids {
904 if !seen.insert(oid) {
905 return Err(GitError::InvalidFormat(format!(
906 "pack contains duplicate object id {oid}"
907 )));
908 }
909 }
910
911 for oid in options.thin_bases.keys() {
913 if oid.format() != format {
914 return Err(GitError::InvalidObjectId(
915 "thin pack base object id format does not match pack format".into(),
916 ));
917 }
918 }
919
920 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
926
927 let mut pack = Vec::new();
928 pack.extend_from_slice(b"PACK");
929 pack.extend_from_slice(&2u32.to_be_bytes());
930 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
931
932 let mut index_entries = Vec::with_capacity(objects.len());
933 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
936
937 let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
938
939 for (order_pos, &idx) in order.iter().enumerate() {
940 let offset = pack.len() as u64;
941 let mut entry_bytes = Vec::new();
942 match &plan[idx].base {
943 PlannedBase::None => {
944 write_entry_header(
945 &mut entry_bytes,
946 objects[idx].object_type,
947 objects[idx].body.len() as u64,
948 );
949 }
950 PlannedBase::InPack { base_idx, delta } => {
951 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
952 GitError::InvalidFormat(
953 "in-pack delta base emitted after dependent object".into(),
954 )
955 })?;
956 if options.prefer_ofs_delta {
957 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
958 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
959 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
960 })?;
961 write_ofs_delta_offset(&mut entry_bytes, relative)?;
962 } else {
963 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
964 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
965 }
966 }
967 PlannedBase::External { base_oid, delta } => {
968 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
969 entry_bytes.extend_from_slice(base_oid.as_bytes());
970 }
971 }
972 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
973 let crc32 = crc32fast::hash(&entry_bytes);
974 pack.extend_from_slice(&entry_bytes);
975 written_offsets[idx] = Some(offset);
976 index_entries.push(PackIndexEntry {
977 oid: object_ids[idx].clone(),
978 crc32,
979 offset,
980 });
981 }
982
983 let checksum = sley_core::digest_bytes(format, &pack)?;
984 pack.extend_from_slice(checksum.as_bytes());
985 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
986 Ok(PackWrite {
987 pack,
988 index,
989 checksum,
990 entries: index_entries,
991 })
992 }
993}
994
995impl<'a> PackIndexView<'a> {
996 pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
997 Self::parse(bytes, ObjectFormat::Sha1)
998 }
999
1000 pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1001 Self::parse_impl(bytes, format, true, true)
1002 }
1003
1004 pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1008 Self::parse_impl(bytes, format, false, true)
1009 }
1010
1011 pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1018 Self::parse_impl(bytes, format, false, false)
1019 }
1020
1021 pub fn count(&self) -> usize {
1022 self.count
1023 }
1024
1025 pub fn fanout(&self) -> &[u32; 256] {
1026 &self.fanout
1027 }
1028
1029 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1030 if oid.format() != self.format {
1031 return None;
1032 }
1033 let bucket = usize::from(oid.as_bytes()[0]);
1034 let mut start = if bucket == 0 {
1035 0
1036 } else {
1037 self.fanout[bucket - 1] as usize
1038 };
1039 let mut end = self.fanout[bucket] as usize;
1040 let target = oid.as_bytes();
1041
1042 while start < end {
1043 let mid = start + (end - start) / 2;
1044 match self.oid_bytes_at(mid).cmp(target) {
1045 std::cmp::Ordering::Less => start = mid + 1,
1046 std::cmp::Ordering::Equal => return self.lookup_at(mid),
1047 std::cmp::Ordering::Greater => end = mid,
1048 }
1049 }
1050 None
1051 }
1052
1053 fn parse_impl(
1054 bytes: &'a [u8],
1055 format: ObjectFormat,
1056 verify_checksum: bool,
1057 validate_entries: bool,
1058 ) -> Result<Self> {
1059 let hash_len = format.raw_len();
1060 if bytes.len() < 4 {
1061 return Err(GitError::InvalidFormat("pack index too short".into()));
1062 }
1063 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1064 return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1065 }
1066 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1067 return Err(GitError::InvalidFormat("pack index too short".into()));
1068 }
1069 let version = u32_be(&bytes[4..8]);
1070 if version != 2 {
1071 return Err(GitError::Unsupported(format!(
1072 "pack index version {version}"
1073 )));
1074 }
1075 let index_checksum_offset = bytes.len() - hash_len;
1076 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1077 if verify_checksum {
1078 let actual_index_checksum =
1079 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1080 if actual_index_checksum != index_checksum {
1081 return Err(GitError::InvalidFormat(format!(
1082 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1083 )));
1084 }
1085 }
1086
1087 let mut offset = 8usize;
1088 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1089 let count = fanout[255] as usize;
1090 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1091 offset = oid_table.end;
1092 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1093 offset = crc_table.end;
1094 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1095 offset = small_offset_table.end;
1096
1097 let large_offset_count = (0..count)
1098 .filter(|idx| {
1099 let start = small_offset_table.start + idx * 4;
1100 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1101 })
1102 .count();
1103 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1104 offset = large_offset_table.end;
1105
1106 let expected_trailer_offset = bytes.len() - hash_len * 2;
1107 if offset != expected_trailer_offset {
1108 return Err(GitError::InvalidFormat(format!(
1109 "pack index has {} unexpected bytes before trailer",
1110 expected_trailer_offset.saturating_sub(offset)
1111 )));
1112 }
1113 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1114
1115 let view = Self {
1116 version,
1117 count,
1118 fanout,
1119 pack_checksum,
1120 index_checksum,
1121 bytes,
1122 format,
1123 tables: PackIndexViewTables::V2 {
1124 oid_table,
1125 crc_table,
1126 small_offset_table,
1127 large_offset_table,
1128 },
1129 };
1130 if validate_entries {
1131 view.validate_v2_entries()?;
1132 }
1133 Ok(view)
1134 }
1135
1136 fn parse_v1_impl(
1137 bytes: &'a [u8],
1138 format: ObjectFormat,
1139 verify_checksum: bool,
1140 validate_entries: bool,
1141 ) -> Result<Self> {
1142 let hash_len = format.raw_len();
1143 if bytes.len() < 256 * 4 + 2 * hash_len {
1144 return Err(GitError::InvalidFormat("pack index too short".into()));
1145 }
1146 let index_checksum_offset = bytes.len() - hash_len;
1147 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1148 if verify_checksum {
1149 let actual_index_checksum =
1150 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1151 if actual_index_checksum != index_checksum {
1152 return Err(GitError::InvalidFormat(format!(
1153 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1154 )));
1155 }
1156 }
1157
1158 let mut offset = 0usize;
1159 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1160 let count = fanout[255] as usize;
1161 let entry_len = hash_len
1162 .checked_add(4)
1163 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1164 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1165 offset = entry_table.end;
1166 let expected_trailer_offset = bytes.len() - hash_len * 2;
1167 if offset != expected_trailer_offset {
1168 return Err(GitError::InvalidFormat(format!(
1169 "pack index has {} unexpected bytes before trailer",
1170 expected_trailer_offset.saturating_sub(offset)
1171 )));
1172 }
1173 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1174
1175 let view = Self {
1176 version: 1,
1177 count,
1178 fanout,
1179 pack_checksum,
1180 index_checksum,
1181 bytes,
1182 format,
1183 tables: PackIndexViewTables::V1 { entry_table },
1184 };
1185 if validate_entries {
1186 view.validate_v1_entries()?;
1187 }
1188 Ok(view)
1189 }
1190
1191 fn validate_v2_entries(&self) -> Result<()> {
1192 let PackIndexViewTables::V2 {
1193 oid_table,
1194 small_offset_table,
1195 large_offset_table,
1196 ..
1197 } = &self.tables
1198 else {
1199 unreachable!("v2 validation only runs for v2 views");
1200 };
1201 let oid_table = self.slice(oid_table.clone());
1202 let small_offset_table = self.slice(small_offset_table.clone());
1203 let large_offset_table = self.slice(large_offset_table.clone());
1204 let hash_len = self.format.raw_len();
1205 for idx in 0..self.count {
1206 let oid_start = idx * hash_len;
1207 let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1208 if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1209 return Err(GitError::InvalidFormat(
1210 "pack index object ids are not strictly ascending".into(),
1211 ));
1212 }
1213 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1214
1215 let offset_start = idx * 4;
1216 let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1217 pack_index_v2_offset(raw_offset, large_offset_table)?;
1218 }
1219 Ok(())
1220 }
1221
1222 fn validate_v1_entries(&self) -> Result<()> {
1223 let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1224 unreachable!("v1 validation only runs for v1 views");
1225 };
1226 let entry_table = self.slice(entry_table.clone());
1227 let hash_len = self.format.raw_len();
1228 let entry_len = hash_len
1229 .checked_add(4)
1230 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1231 for idx in 0..self.count {
1232 let start = idx * entry_len;
1233 let oid_start = start + 4;
1234 let oid_bytes = &entry_table[oid_start..start + entry_len];
1235 if idx > 0 {
1236 let previous_oid_start = oid_start - entry_len;
1237 let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1238 if previous_oid >= oid_bytes {
1239 return Err(GitError::InvalidFormat(
1240 "pack index object ids are not strictly sorted".into(),
1241 ));
1242 }
1243 }
1244 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1245 }
1246 Ok(())
1247 }
1248
1249 fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1250 let hash_len = self.format.raw_len();
1251 match &self.tables {
1252 PackIndexViewTables::V1 { entry_table } => {
1253 let entry_table = self.slice(entry_table.clone());
1254 let entry_len = hash_len + 4;
1255 let start = idx * entry_len + 4;
1256 &entry_table[start..start + hash_len]
1257 }
1258 PackIndexViewTables::V2 { oid_table, .. } => {
1259 let oid_table = self.slice(oid_table.clone());
1260 let start = idx * hash_len;
1261 &oid_table[start..start + hash_len]
1262 }
1263 }
1264 }
1265
1266 fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1267 if idx >= self.count {
1268 return None;
1269 }
1270 let hash_len = self.format.raw_len();
1271 match &self.tables {
1272 PackIndexViewTables::V1 { entry_table } => {
1273 let entry_table = self.slice(entry_table.clone());
1274 let entry_len = hash_len + 4;
1275 let start = idx * entry_len;
1276 Some(PackIndexLookup {
1277 crc32: 0,
1278 offset: u64::from(u32_be(&entry_table[start..start + 4])),
1279 })
1280 }
1281 PackIndexViewTables::V2 {
1282 crc_table,
1283 small_offset_table,
1284 large_offset_table,
1285 ..
1286 } => {
1287 let crc_table = self.slice(crc_table.clone());
1288 let small_offset_table = self.slice(small_offset_table.clone());
1289 let large_offset_table = self.slice(large_offset_table.clone());
1290 let crc_start = idx * 4;
1291 let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1292 Some(PackIndexLookup {
1293 crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1294 offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1295 })
1296 }
1297 }
1298 }
1299
1300 fn slice(&self, range: Range<usize>) -> &'a [u8] {
1301 &self.bytes[range]
1302 }
1303}
1304
1305impl PackIndexViewData {
1306 pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1307 Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1308 }
1309
1310 pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1314 Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1315 }
1316
1317 pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1320 Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1321 }
1322
1323 pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1324 Self::parse_impl(bytes, format, true, true)
1325 }
1326
1327 pub fn parse_source_without_checksum(
1328 bytes: Arc<dyn PackIndexByteSource>,
1329 format: ObjectFormat,
1330 ) -> Result<Self> {
1331 Self::parse_impl(bytes, format, false, true)
1332 }
1333
1334 pub fn parse_trusted_source_without_checksum(
1335 bytes: Arc<dyn PackIndexByteSource>,
1336 format: ObjectFormat,
1337 ) -> Result<Self> {
1338 Self::parse_impl(bytes, format, false, false)
1339 }
1340
1341 pub fn count(&self) -> usize {
1342 self.count
1343 }
1344
1345 pub fn fanout(&self) -> &[u32; 256] {
1346 &self.fanout
1347 }
1348
1349 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1350 self.as_view().find(oid)
1351 }
1352
1353 pub fn as_view(&self) -> PackIndexView<'_> {
1354 PackIndexView {
1355 version: self.version,
1356 count: self.count,
1357 fanout: self.fanout,
1358 pack_checksum: self.pack_checksum,
1359 index_checksum: self.index_checksum,
1360 bytes: self.bytes.as_bytes(),
1361 format: self.format,
1362 tables: self.tables.clone(),
1363 }
1364 }
1365
1366 fn parse_impl(
1367 bytes: Arc<dyn PackIndexByteSource>,
1368 format: ObjectFormat,
1369 verify_checksum: bool,
1370 validate_entries: bool,
1371 ) -> Result<Self> {
1372 let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1373 let view = PackIndexView::parse_impl(
1374 bytes.as_bytes(),
1375 format,
1376 verify_checksum,
1377 validate_entries,
1378 )?;
1379 (
1380 view.version,
1381 view.count,
1382 view.fanout,
1383 view.pack_checksum,
1384 view.index_checksum,
1385 view.tables,
1386 )
1387 };
1388 Ok(Self {
1389 version,
1390 count,
1391 fanout,
1392 pack_checksum,
1393 index_checksum,
1394 bytes,
1395 format,
1396 tables,
1397 })
1398 }
1399}
1400
1401impl PackIndex {
1402 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1403 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1404 }
1405
1406 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1407 let trailer_len = format.raw_len();
1408 if pack_bytes.len() < 12 + trailer_len {
1409 return Err(GitError::InvalidFormat("pack file too short".into()));
1410 }
1411 let trailer_offset = pack_bytes.len() - trailer_len;
1412 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1413 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1414 if pack_checksum != expected {
1415 return Err(GitError::InvalidFormat(format!(
1416 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1417 )));
1418 }
1419
1420 if &pack_bytes[..4] != b"PACK" {
1421 return Err(GitError::InvalidFormat("missing PACK signature".into()));
1422 }
1423 let version = u32_be(&pack_bytes[4..8]);
1424 if version != 2 && version != 3 {
1425 return Err(GitError::Unsupported(format!("pack version {version}")));
1426 }
1427 let count = u32_be(&pack_bytes[8..12]) as usize;
1428 let mut offset = 12usize;
1429 let mut parsed_entries = Vec::with_capacity(count);
1430 let mut raw_entries = Vec::with_capacity(count);
1431 for _ in 0..count {
1432 let entry_offset = offset;
1433 let header = parse_entry_header(pack_bytes, &mut offset)?;
1434 let base = match header.kind {
1435 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1436 pack_bytes,
1437 &mut offset,
1438 entry_offset as u64,
1439 )?)),
1440 PackObjectKind::RefDelta => {
1441 let hash_len = format.raw_len();
1442 if offset + hash_len > trailer_offset {
1443 return Err(GitError::InvalidFormat(
1444 "truncated ref-delta base object id".into(),
1445 ));
1446 }
1447 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1448 offset += hash_len;
1449 Some(DeltaBase::Ref(oid))
1450 }
1451 _ => None,
1452 };
1453 let mut body = Vec::new();
1454 let consumed = inflate_into(
1455 &pack_bytes[offset..trailer_offset],
1456 &mut body,
1457 header.size.min(usize::MAX as u64) as usize,
1458 )?;
1459 if body.len() as u64 != header.size {
1460 return Err(GitError::InvalidObject(format!(
1461 "pack object declared {} bytes, decoded {}",
1462 header.size,
1463 body.len()
1464 )));
1465 }
1466 if consumed == 0 {
1467 return Err(GitError::InvalidFormat(
1468 "empty compressed pack entry".into(),
1469 ));
1470 }
1471 offset = offset
1472 .checked_add(consumed)
1473 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1474 if offset > trailer_offset {
1475 return Err(GitError::InvalidFormat(
1476 "pack entry extends past checksum".into(),
1477 ));
1478 }
1479 raw_entries.push((
1480 entry_offset as u64,
1481 crc32fast::hash(&pack_bytes[entry_offset..offset]),
1482 ));
1483 if let Some(base) = base {
1484 parsed_entries.push(ParsedPackEntry::Delta {
1485 base,
1486 compressed_size: consumed as u64,
1487 delta_size: header.size,
1488 offset: entry_offset as u64,
1489 delta: body,
1490 });
1491 } else {
1492 let object_type = match header.kind {
1493 PackObjectKind::Commit => ObjectType::Commit,
1494 PackObjectKind::Tree => ObjectType::Tree,
1495 PackObjectKind::Blob => ObjectType::Blob,
1496 PackObjectKind::Tag => ObjectType::Tag,
1497 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1498 };
1499 let object = EncodedObject::new(object_type, body);
1500 let oid = object.object_id(format)?;
1501 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1502 entry: PackEntry {
1503 oid,
1504 compressed_size: consumed as u64,
1505 uncompressed_size: header.size,
1506 offset: entry_offset as u64,
1507 },
1508 object,
1509 }));
1510 }
1511 }
1512 if offset != trailer_offset {
1513 return Err(GitError::InvalidFormat(format!(
1514 "pack has {} trailing bytes before checksum",
1515 trailer_offset - offset
1516 )));
1517 }
1518
1519 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1520 let entries = resolved
1521 .iter()
1522 .zip(raw_entries)
1523 .map(|(object, (offset, crc32))| PackIndexEntry {
1524 oid: object.entry.oid,
1525 crc32,
1526 offset,
1527 })
1528 .collect::<Vec<_>>();
1529 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1530 Ok(PackIndexBuild {
1531 index,
1532 pack_checksum,
1533 entries,
1534 })
1535 }
1536
1537 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1538 Self::parse(bytes, ObjectFormat::Sha1)
1539 }
1540
1541 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1542 let hash_len = format.raw_len();
1543 if bytes.len() < 4 {
1544 return Err(GitError::InvalidFormat("pack index too short".into()));
1545 }
1546 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1547 return Self::parse_v1(bytes, format);
1548 }
1549 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1550 return Err(GitError::InvalidFormat("pack index too short".into()));
1551 }
1552 let version = u32_be(&bytes[4..8]);
1553 if version != 2 {
1554 return Err(GitError::Unsupported(format!(
1555 "pack index version {version}"
1556 )));
1557 }
1558 let index_checksum_offset = bytes.len() - hash_len;
1559 let actual_index_checksum =
1560 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1561 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1562 if actual_index_checksum != index_checksum {
1563 return Err(GitError::InvalidFormat(format!(
1564 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1565 )));
1566 }
1567
1568 let mut offset = 8usize;
1569 let mut fanout = [0u32; 256];
1570 let mut previous = 0u32;
1571 for slot in &mut fanout {
1572 *slot = u32_be(&bytes[offset..offset + 4]);
1573 if *slot < previous {
1574 return Err(GitError::InvalidFormat(
1575 "pack index fanout is not monotonic".into(),
1576 ));
1577 }
1578 previous = *slot;
1579 offset += 4;
1580 }
1581 let count = fanout[255] as usize;
1582 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1583 offset = oid_table.end;
1584 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1585 offset = crc_table.end;
1586 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1587 offset = small_offset_table.end;
1588
1589 let large_offset_count = (0..count)
1590 .filter(|idx| {
1591 let start = small_offset_table.start + idx * 4;
1592 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1593 })
1594 .count();
1595 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1596 offset = large_offset_table.end;
1597
1598 let expected_trailer_offset = bytes.len() - hash_len * 2;
1599 if offset != expected_trailer_offset {
1600 return Err(GitError::InvalidFormat(format!(
1601 "pack index has {} unexpected bytes before trailer",
1602 expected_trailer_offset.saturating_sub(offset)
1603 )));
1604 }
1605 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1606
1607 let mut entries = Vec::with_capacity(count);
1608 for idx in 0..count {
1609 let oid_start = oid_table.start + idx * hash_len;
1610 let crc_start = crc_table.start + idx * 4;
1611 let offset_start = small_offset_table.start + idx * 4;
1612 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1613 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1617 return Err(GitError::InvalidFormat(
1618 "pack index object ids are not strictly ascending".into(),
1619 ));
1620 }
1621 let expected_min = if oid_bytes[0] == 0 {
1622 0
1623 } else {
1624 fanout[usize::from(oid_bytes[0] - 1)]
1625 };
1626 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1627 return Err(GitError::InvalidFormat(
1628 "pack index object id is outside its fanout bucket".into(),
1629 ));
1630 }
1631 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1632 let offset = if raw_offset & 0x8000_0000 == 0 {
1633 u64::from(raw_offset)
1634 } else {
1635 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1636 let large_start = large_offset_table.start + large_idx * 8;
1637 if large_idx >= large_offset_count {
1638 return Err(GitError::InvalidFormat(
1639 "pack index large offset points past table".into(),
1640 ));
1641 }
1642 u64_be(&bytes[large_start..large_start + 8])
1643 };
1644 entries.push(PackIndexEntry {
1645 oid: ObjectId::from_raw(format, oid_bytes)?,
1646 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1647 offset,
1648 });
1649 }
1650 Ok(Self {
1651 version,
1652 fanout,
1653 entries,
1654 pack_checksum,
1655 index_checksum,
1656 })
1657 }
1658
1659 fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1660 let hash_len = format.raw_len();
1661 if bytes.len() < 256 * 4 + 2 * hash_len {
1662 return Err(GitError::InvalidFormat("pack index too short".into()));
1663 }
1664 let index_checksum_offset = bytes.len() - hash_len;
1665 let actual_index_checksum =
1666 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1667 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1668 if actual_index_checksum != index_checksum {
1669 return Err(GitError::InvalidFormat(format!(
1670 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1671 )));
1672 }
1673
1674 let mut offset = 0usize;
1675 let mut fanout = [0u32; 256];
1676 let mut previous = 0u32;
1677 for slot in &mut fanout {
1678 *slot = u32_be(&bytes[offset..offset + 4]);
1679 if *slot < previous {
1680 return Err(GitError::InvalidFormat(
1681 "pack index fanout is not monotonic".into(),
1682 ));
1683 }
1684 previous = *slot;
1685 offset += 4;
1686 }
1687 let count = fanout[255] as usize;
1688 let entry_len = hash_len
1689 .checked_add(4)
1690 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1691 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1692 offset = entry_table.end;
1693 let expected_trailer_offset = bytes.len() - hash_len * 2;
1694 if offset != expected_trailer_offset {
1695 return Err(GitError::InvalidFormat(format!(
1696 "pack index has {} unexpected bytes before trailer",
1697 expected_trailer_offset.saturating_sub(offset)
1698 )));
1699 }
1700 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1701
1702 let mut entries = Vec::with_capacity(count);
1703 let mut previous_oid: Option<ObjectId> = None;
1704 for idx in 0..count {
1705 let start = entry_table.start + idx * entry_len;
1706 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1707 if let Some(previous) = &previous_oid
1708 && previous.as_bytes() >= oid.as_bytes()
1709 {
1710 return Err(GitError::InvalidFormat(
1711 "pack index object ids are not strictly sorted".into(),
1712 ));
1713 }
1714 previous_oid = Some(oid);
1715 entries.push(PackIndexEntry {
1716 oid,
1717 crc32: 0,
1718 offset: u64::from(u32_be(&bytes[start..start + 4])),
1719 });
1720 }
1721 Ok(Self {
1722 version: 1,
1723 fanout,
1724 entries,
1725 pack_checksum,
1726 index_checksum,
1727 })
1728 }
1729
1730 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1731 self.entries
1732 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1733 .ok()
1734 .map(|idx| &self.entries[idx])
1735 }
1736
1737 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1738 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1739 }
1740
1741 pub fn write_v2(
1742 format: ObjectFormat,
1743 entries: &[PackIndexEntry],
1744 pack_checksum: &ObjectId,
1745 ) -> Result<Vec<u8>> {
1746 if pack_checksum.format() != format {
1747 return Err(GitError::InvalidObjectId(
1748 "pack checksum format does not match index format".into(),
1749 ));
1750 }
1751 let mut entries = entries.iter().collect::<Vec<_>>();
1752 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1753 for pair in entries.windows(2) {
1754 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1755 return Err(GitError::InvalidFormat(format!(
1756 "pack index contains duplicate object id {}",
1757 pair[0].oid
1758 )));
1759 }
1760 }
1761 let mut fanout = [0u32; 256];
1762 for entry in &entries {
1763 if entry.oid.format() != format {
1764 return Err(GitError::InvalidObjectId(
1765 "pack index entry format does not match index format".into(),
1766 ));
1767 }
1768 let first = entry.oid.as_bytes()[0] as usize;
1769 fanout[first] = fanout[first]
1770 .checked_add(1)
1771 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1772 }
1773 let mut running = 0u32;
1774 for slot in &mut fanout {
1775 running = running
1776 .checked_add(*slot)
1777 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1778 *slot = running;
1779 }
1780
1781 let mut index = Vec::new();
1782 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1783 index.extend_from_slice(&2u32.to_be_bytes());
1784 for count in fanout {
1785 index.extend_from_slice(&count.to_be_bytes());
1786 }
1787 for entry in &entries {
1788 index.extend_from_slice(entry.oid.as_bytes());
1789 }
1790 for entry in &entries {
1791 index.extend_from_slice(&entry.crc32.to_be_bytes());
1792 }
1793
1794 let mut large_offsets = Vec::new();
1795 for entry in &entries {
1796 if entry.offset < 0x8000_0000 {
1797 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1798 } else {
1799 if large_offsets.len() > 0x7fff_ffff {
1800 return Err(GitError::InvalidFormat(
1801 "too many large pack offsets".into(),
1802 ));
1803 }
1804 let large_idx = large_offsets.len() as u32;
1805 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1806 large_offsets.push(entry.offset);
1807 }
1808 }
1809 for offset in large_offsets {
1810 index.extend_from_slice(&offset.to_be_bytes());
1811 }
1812 index.extend_from_slice(pack_checksum.as_bytes());
1813 let index_checksum = sley_core::digest_bytes(format, &index)?;
1814 index.extend_from_slice(index_checksum.as_bytes());
1815 Ok(index)
1816 }
1817
1818 pub fn write_v1(
1824 format: ObjectFormat,
1825 entries: &[PackIndexEntry],
1826 pack_checksum: &ObjectId,
1827 ) -> Result<Vec<u8>> {
1828 if pack_checksum.format() != format {
1829 return Err(GitError::InvalidObjectId(
1830 "pack checksum format does not match index format".into(),
1831 ));
1832 }
1833 let mut entries = entries.iter().collect::<Vec<_>>();
1834 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1835 for pair in entries.windows(2) {
1836 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1837 return Err(GitError::InvalidFormat(format!(
1838 "pack index contains duplicate object id {}",
1839 pair[0].oid
1840 )));
1841 }
1842 }
1843 let mut fanout = [0u32; 256];
1844 for entry in &entries {
1845 if entry.oid.format() != format {
1846 return Err(GitError::InvalidObjectId(
1847 "pack index entry format does not match index format".into(),
1848 ));
1849 }
1850 if entry.offset > 0xffff_ffff {
1851 return Err(GitError::InvalidFormat(
1852 "pack offset too large for a version-1 index".into(),
1853 ));
1854 }
1855 let first = entry.oid.as_bytes()[0] as usize;
1856 fanout[first] = fanout[first]
1857 .checked_add(1)
1858 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1859 }
1860 let mut running = 0u32;
1861 for slot in &mut fanout {
1862 running = running
1863 .checked_add(*slot)
1864 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1865 *slot = running;
1866 }
1867
1868 let mut index = Vec::new();
1869 for count in fanout {
1870 index.extend_from_slice(&count.to_be_bytes());
1871 }
1872 for entry in &entries {
1873 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1874 index.extend_from_slice(entry.oid.as_bytes());
1875 }
1876 index.extend_from_slice(pack_checksum.as_bytes());
1877 let index_checksum = sley_core::digest_bytes(format, &index)?;
1878 index.extend_from_slice(index_checksum.as_bytes());
1879 Ok(index)
1880 }
1881}
1882
1883pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1888 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1889 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1890 let mut index_position = vec![0u32; entries.len()];
1891 for (position, &entry) in oid_sorted.iter().enumerate() {
1892 index_position[entry] = position as u32;
1893 }
1894 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1895 by_offset.sort_by_key(|&entry| entries[entry].offset);
1896 by_offset
1897 .into_iter()
1898 .map(|entry| index_position[entry])
1899 .collect()
1900}
1901
1902impl PackReverseIndex {
1903 pub fn write(
1904 format: ObjectFormat,
1905 positions: &[u32],
1906 pack_checksum: &ObjectId,
1907 ) -> Result<Vec<u8>> {
1908 if pack_checksum.format() != format {
1909 return Err(GitError::InvalidObjectId(
1910 "pack checksum format does not match reverse index format".into(),
1911 ));
1912 }
1913 validate_position_permutation(positions)?;
1914
1915 let mut out = Vec::new();
1916 out.extend_from_slice(b"RIDX");
1917 out.extend_from_slice(&1u32.to_be_bytes());
1918 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1919 for position in positions {
1920 out.extend_from_slice(&position.to_be_bytes());
1921 }
1922 out.extend_from_slice(pack_checksum.as_bytes());
1923 let checksum = sley_core::digest_bytes(format, &out)?;
1924 out.extend_from_slice(checksum.as_bytes());
1925 Ok(out)
1926 }
1927
1928 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1929 let hash_len = format.raw_len();
1930 let table_len = object_count
1931 .checked_mul(4)
1932 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1933 let min_len = 12usize
1934 .checked_add(table_len)
1935 .and_then(|len| len.checked_add(hash_len * 2))
1936 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1937 if bytes.len() < min_len {
1938 return Err(GitError::InvalidFormat("reverse index too short".into()));
1939 }
1940 if bytes.len() != min_len {
1941 return Err(GitError::InvalidFormat(format!(
1942 "reverse index has {} trailing bytes",
1943 bytes.len() - min_len
1944 )));
1945 }
1946 if &bytes[..4] != b"RIDX" {
1947 return Err(GitError::InvalidFormat(
1948 "missing reverse index signature".into(),
1949 ));
1950 }
1951 let version = u32_be(&bytes[4..8]);
1952 if version != 1 {
1953 return Err(GitError::Unsupported(format!(
1954 "reverse index version {version}"
1955 )));
1956 }
1957 let hash_id = u32_be(&bytes[8..12]);
1958 if hash_id != hash_function_id(format) {
1959 return Err(GitError::InvalidFormat(format!(
1960 "reverse index hash id {hash_id} does not match {}",
1961 format.name()
1962 )));
1963 }
1964
1965 let index_checksum_offset = bytes.len() - hash_len;
1966 let actual_index_checksum =
1967 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1968 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1969 if actual_index_checksum != index_checksum {
1970 return Err(GitError::InvalidFormat(format!(
1971 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1972 )));
1973 }
1974
1975 let pack_checksum_offset = index_checksum_offset - hash_len;
1976 let pack_checksum =
1977 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1978 let mut positions = Vec::with_capacity(object_count);
1979 let mut offset = 12usize;
1980 for _ in 0..object_count {
1981 let position = u32_be(&bytes[offset..offset + 4]);
1982 positions.push(position);
1983 offset += 4;
1984 }
1985 validate_position_permutation(&positions)?;
1986
1987 Ok(Self {
1988 version,
1989 format,
1990 positions,
1991 pack_checksum,
1992 index_checksum,
1993 })
1994 }
1995}
1996
1997impl PackMtimes {
1998 pub fn write(
1999 format: ObjectFormat,
2000 mtimes: &[u32],
2001 pack_checksum: &ObjectId,
2002 ) -> Result<Vec<u8>> {
2003 if pack_checksum.format() != format {
2004 return Err(GitError::InvalidObjectId(
2005 "pack checksum format does not match mtimes format".into(),
2006 ));
2007 }
2008
2009 let mut out = Vec::new();
2010 out.extend_from_slice(b"MTME");
2011 out.extend_from_slice(&1u32.to_be_bytes());
2012 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2013 for mtime in mtimes {
2014 out.extend_from_slice(&mtime.to_be_bytes());
2015 }
2016 out.extend_from_slice(pack_checksum.as_bytes());
2017 let checksum = sley_core::digest_bytes(format, &out)?;
2018 out.extend_from_slice(checksum.as_bytes());
2019 Ok(out)
2020 }
2021
2022 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2023 let hash_len = format.raw_len();
2024 let table_len = object_count
2025 .checked_mul(4)
2026 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2027 let expected_len = 12usize
2028 .checked_add(table_len)
2029 .and_then(|len| len.checked_add(hash_len * 2))
2030 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2031 if bytes.len() < expected_len {
2032 return Err(GitError::InvalidFormat("mtimes file too short".into()));
2033 }
2034 if bytes.len() != expected_len {
2035 return Err(GitError::InvalidFormat(format!(
2036 "mtimes file has {} trailing bytes",
2037 bytes.len() - expected_len
2038 )));
2039 }
2040 if &bytes[..4] != b"MTME" {
2041 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2042 }
2043 let version = u32_be(&bytes[4..8]);
2044 if version != 1 {
2045 return Err(GitError::Unsupported(format!("mtimes version {version}")));
2046 }
2047 let hash_id = u32_be(&bytes[8..12]);
2048 if hash_id != hash_function_id(format) {
2049 return Err(GitError::InvalidFormat(format!(
2050 "mtimes hash id {hash_id} does not match {}",
2051 format.name()
2052 )));
2053 }
2054
2055 let index_checksum_offset = bytes.len() - hash_len;
2056 let actual_index_checksum =
2057 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2058 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2059 if actual_index_checksum != index_checksum {
2060 return Err(GitError::InvalidFormat(format!(
2061 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2062 )));
2063 }
2064
2065 let pack_checksum_offset = index_checksum_offset - hash_len;
2066 let pack_checksum =
2067 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2068 let mut mtimes = Vec::with_capacity(object_count);
2069 let mut offset = 12usize;
2070 for _ in 0..object_count {
2071 mtimes.push(u32_be(&bytes[offset..offset + 4]));
2072 offset += 4;
2073 }
2074
2075 Ok(Self {
2076 version,
2077 format,
2078 mtimes,
2079 pack_checksum,
2080 index_checksum,
2081 })
2082 }
2083}
2084
2085impl PackBitmapIndex {
2086 pub const OPTION_FULL_DAG: u16 = 0x0001;
2087 pub const OPTION_HASH_CACHE: u16 = 0x0004;
2088
2089 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2090 let hash_len = format.raw_len();
2091 let min_len = 12usize
2092 .checked_add(hash_len * 2)
2093 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2094 if bytes.len() < min_len {
2095 return Err(GitError::InvalidFormat("bitmap index too short".into()));
2096 }
2097 if &bytes[..4] != b"BITM" {
2098 return Err(GitError::InvalidFormat(
2099 "missing bitmap index signature".into(),
2100 ));
2101 }
2102 let version = u16_be(&bytes[4..6]);
2103 if version != 1 {
2104 return Err(GitError::Unsupported(format!(
2105 "bitmap index version {version}"
2106 )));
2107 }
2108 let options = u16_be(&bytes[6..8]);
2109 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2110 if options & !known_options != 0 {
2111 return Err(GitError::Unsupported(format!(
2112 "bitmap index options {:#06x}",
2113 options & !known_options
2114 )));
2115 }
2116 let entry_count = u32_be(&bytes[8..12]) as usize;
2117 let checksum_offset = bytes.len() - hash_len;
2118 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2119 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2120 if actual_index_checksum != index_checksum {
2121 return Err(GitError::InvalidFormat(format!(
2122 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2123 )));
2124 }
2125
2126 let pack_checksum_end = 12usize
2127 .checked_add(hash_len)
2128 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2129 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2130 let mut offset = pack_checksum_end;
2131 let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2132 let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2133 let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2134 let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2135
2136 let mut entries = Vec::with_capacity(entry_count);
2137 for idx in 0..entry_count {
2138 if checksum_offset.saturating_sub(offset) < 6 {
2139 return Err(GitError::InvalidFormat(
2140 "truncated bitmap index entry".into(),
2141 ));
2142 }
2143 let object_position = u32_be(&bytes[offset..offset + 4]);
2144 offset += 4;
2145 if object_position as usize >= object_count {
2146 return Err(GitError::InvalidFormat(
2147 "bitmap index entry points past object table".into(),
2148 ));
2149 }
2150 let xor_offset = bytes[offset];
2151 offset += 1;
2152 if xor_offset as usize > idx || xor_offset > 160 {
2153 return Err(GitError::InvalidFormat(
2154 "bitmap index entry has invalid XOR offset".into(),
2155 ));
2156 }
2157 let flags = bytes[offset];
2158 offset += 1;
2159 let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2160 entries.push(PackBitmapEntry {
2161 object_position,
2162 xor_offset,
2163 flags,
2164 bitmap,
2165 });
2166 }
2167
2168 let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2169 let cache_len = object_count
2170 .checked_mul(4)
2171 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2172 if checksum_offset.saturating_sub(offset) < cache_len {
2173 return Err(GitError::InvalidFormat(
2174 "truncated bitmap hash cache".into(),
2175 ));
2176 }
2177 let mut cache = Vec::with_capacity(object_count);
2178 for _ in 0..object_count {
2179 cache.push(u32_be(&bytes[offset..offset + 4]));
2180 offset += 4;
2181 }
2182 Some(cache)
2183 } else {
2184 None
2185 };
2186
2187 if offset != checksum_offset {
2188 return Err(GitError::InvalidFormat(format!(
2189 "bitmap index has {} trailing bytes",
2190 checksum_offset - offset
2191 )));
2192 }
2193
2194 Ok(Self {
2195 version,
2196 format,
2197 options,
2198 pack_checksum,
2199 index_checksum,
2200 type_bitmaps: PackBitmapTypeBitmaps {
2201 commits,
2202 trees,
2203 blobs,
2204 tags,
2205 },
2206 entries,
2207 name_hash_cache,
2208 })
2209 }
2210
2211 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2214 self.entries
2215 .iter()
2216 .find(|entry| entry.object_position == position)
2217 }
2218}
2219
2220fn parse_bitmap_ewah(
2221 bytes: &[u8],
2222 offset: &mut usize,
2223 checksum_offset: usize,
2224 _object_count: usize,
2225) -> Result<EwahBitmap> {
2226 if checksum_offset.saturating_sub(*offset) < 12 {
2227 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2228 }
2229 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2230 *offset += 4;
2231 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2232 *offset += 4;
2233 let words_len = word_count
2234 .checked_mul(8)
2235 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2236 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2237 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2238 }
2239 let mut words = Vec::with_capacity(word_count);
2240 for _ in 0..word_count {
2241 words.push(u64_be(&bytes[*offset..*offset + 8]));
2242 *offset += 8;
2243 }
2244 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2245 *offset += 4;
2246 validate_ewah_words(bit_size, &words, rlw_position)?;
2247 Ok(EwahBitmap {
2248 bit_size,
2249 words,
2250 rlw_position,
2251 })
2252}
2253
2254fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2255 if words.is_empty() {
2256 if rlw_position != 0 || bit_size != 0 {
2257 return Err(GitError::InvalidFormat(
2258 "EWAH bitmap has invalid empty RLW".into(),
2259 ));
2260 }
2261 return Ok(());
2262 }
2263 if rlw_position as usize >= words.len() {
2264 return Err(GitError::InvalidFormat(
2265 "EWAH RLW position points past word table".into(),
2266 ));
2267 }
2268 let mut word_idx = 0usize;
2269 let mut decoded_words = 0u64;
2270 while word_idx < words.len() {
2271 let rlw = words[word_idx];
2272 let run_words = (rlw >> 1) & 0xffff_ffff;
2273 let literal_words = (rlw >> 33) as usize;
2274 word_idx += 1;
2275 word_idx = word_idx
2276 .checked_add(literal_words)
2277 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
2278 if word_idx > words.len() {
2279 return Err(GitError::InvalidFormat(
2280 "EWAH literal words extend past word table".into(),
2281 ));
2282 }
2283 decoded_words = decoded_words
2284 .checked_add(run_words)
2285 .and_then(|value| value.checked_add(literal_words as u64))
2286 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2287 }
2288 let decoded_bits = decoded_words
2289 .checked_mul(64)
2290 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2291 if decoded_bits < u64::from(bit_size) {
2292 return Err(GitError::InvalidFormat(
2293 "EWAH bitmap decodes fewer bits than declared".into(),
2294 ));
2295 }
2296 Ok(())
2297}
2298
2299impl MultiPackIndex {
2300 pub fn write(
2301 format: ObjectFormat,
2302 version: u8,
2303 pack_names: &[String],
2304 objects: &[MultiPackIndexEntry],
2305 ) -> Result<Vec<u8>> {
2306 Self::write_with_reverse_index(format, version, pack_names, objects, None)
2307 }
2308
2309 pub fn write_with_reverse_index(
2318 format: ObjectFormat,
2319 version: u8,
2320 pack_names: &[String],
2321 objects: &[MultiPackIndexEntry],
2322 preferred_pack: Option<u32>,
2323 ) -> Result<Vec<u8>> {
2324 if let Some(preferred) = preferred_pack
2325 && preferred as usize >= pack_names.len()
2326 {
2327 return Err(GitError::InvalidFormat(format!(
2328 "preferred pack {preferred} out of range for {} packs",
2329 pack_names.len()
2330 )));
2331 }
2332 if version != 1 && version != 2 {
2333 return Err(GitError::Unsupported(format!(
2334 "multi-pack-index version {version}"
2335 )));
2336 }
2337 if pack_names.len() > u32::MAX as usize {
2338 return Err(GitError::InvalidFormat(
2339 "too many multi-pack-index packs".into(),
2340 ));
2341 }
2342 if objects.len() > u32::MAX as usize {
2343 return Err(GitError::InvalidFormat(
2344 "too many multi-pack-index objects".into(),
2345 ));
2346 }
2347 validate_midx_pack_names(pack_names)?;
2348 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2349 return Err(GitError::InvalidFormat(
2350 "multi-pack-index v1 pack names must be sorted".into(),
2351 ));
2352 }
2353
2354 let mut objects = objects.iter().collect::<Vec<_>>();
2355 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2356 let mut previous_oid: Option<&ObjectId> = None;
2357 for object in &objects {
2358 if object.oid.format() != format {
2359 return Err(GitError::InvalidObjectId(
2360 "multi-pack-index object format does not match index format".into(),
2361 ));
2362 }
2363 if let Some(previous) = previous_oid
2364 && previous.as_bytes() == object.oid.as_bytes()
2365 {
2366 return Err(GitError::InvalidFormat(
2367 "multi-pack-index contains duplicate object ids".into(),
2368 ));
2369 }
2370 if object.pack_int_id as usize >= pack_names.len() {
2371 return Err(GitError::InvalidFormat(
2372 "multi-pack-index object points past pack table".into(),
2373 ));
2374 }
2375 previous_oid = Some(&object.oid);
2376 }
2377
2378 let mut large_offsets = Vec::new();
2379 let mut chunks = vec![
2380 (*b"PNAM", write_midx_pack_names(pack_names)),
2381 (*b"OIDF", write_midx_oid_fanout(&objects)?),
2382 (*b"OIDL", write_midx_oid_lookup(&objects)),
2383 (
2384 *b"OOFF",
2385 write_midx_object_offsets(&objects, &mut large_offsets)?,
2386 ),
2387 ];
2388 if !large_offsets.is_empty() {
2389 chunks.push((*b"LOFF", large_offsets));
2390 }
2391 if let Some(preferred) = preferred_pack {
2392 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2395 pseudo.sort_by_key(|&midx_pos| {
2396 let object = objects[midx_pos as usize];
2397 (
2398 object.pack_int_id != preferred,
2399 object.pack_int_id,
2400 object.offset,
2401 )
2402 });
2403 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2404 for midx_pos in pseudo {
2405 ridx.extend_from_slice(&midx_pos.to_be_bytes());
2406 }
2407 chunks.push((*b"RIDX", ridx));
2408 }
2409 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2410 }
2411
2412 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2413 let hash_len = format.raw_len();
2414 if bytes.len() < 12 + 12 + hash_len {
2415 return Err(GitError::InvalidFormat(
2416 "multi-pack-index file too short".into(),
2417 ));
2418 }
2419 if &bytes[..4] != b"MIDX" {
2420 return Err(GitError::InvalidFormat(
2421 "missing multi-pack-index signature".into(),
2422 ));
2423 }
2424 let version = bytes[4];
2425 if version != 1 && version != 2 {
2426 return Err(GitError::Unsupported(format!(
2427 "multi-pack-index version {version}"
2428 )));
2429 }
2430 let hash_id = bytes[5];
2431 if u32::from(hash_id) != hash_function_id(format) {
2432 return Err(GitError::InvalidFormat(format!(
2433 "multi-pack-index hash id {hash_id} does not match {}",
2434 format.name()
2435 )));
2436 }
2437 let chunk_count = bytes[6] as usize;
2438 let base_midx_count = bytes[7];
2439 if base_midx_count != 0 {
2440 return Err(GitError::Unsupported(format!(
2441 "multi-pack-index base count {base_midx_count}"
2442 )));
2443 }
2444 let pack_count = u32_be(&bytes[8..12]);
2445 let lookup_len = (chunk_count + 1)
2446 .checked_mul(12)
2447 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2448 let data_start = 12usize
2449 .checked_add(lookup_len)
2450 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2451 let checksum_offset = bytes.len() - hash_len;
2452 if data_start > checksum_offset {
2453 return Err(GitError::InvalidFormat(
2454 "truncated multi-pack-index chunk lookup".into(),
2455 ));
2456 }
2457
2458 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2459 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2460 if actual_checksum != checksum {
2461 return Err(GitError::InvalidFormat(format!(
2462 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2463 )));
2464 }
2465
2466 let mut entries = Vec::with_capacity(chunk_count + 1);
2467 let mut offset = 12usize;
2468 for _ in 0..=chunk_count {
2469 let id = [
2470 bytes[offset],
2471 bytes[offset + 1],
2472 bytes[offset + 2],
2473 bytes[offset + 3],
2474 ];
2475 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2476 entries.push((id, chunk_offset));
2477 offset += 12;
2478 }
2479 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2480 return Err(GitError::InvalidFormat(
2481 "multi-pack-index chunk lookup is empty".into(),
2482 ));
2483 };
2484 if terminator_id != [0, 0, 0, 0] {
2485 return Err(GitError::InvalidFormat(
2486 "multi-pack-index chunk lookup missing terminator".into(),
2487 ));
2488 }
2489 if terminator_offset != checksum_offset as u64 {
2490 return Err(GitError::InvalidFormat(
2491 "multi-pack-index terminator does not point at checksum".into(),
2492 ));
2493 }
2494
2495 let mut chunks = Vec::with_capacity(chunk_count);
2496 let mut previous_offset = data_start as u64;
2497 for pair in entries.windows(2) {
2498 let (id, chunk_offset) = pair[0];
2499 let (_next_id, next_offset) = pair[1];
2500 if id == [0, 0, 0, 0] {
2501 return Err(GitError::InvalidFormat(
2502 "multi-pack-index chunk id is zero before terminator".into(),
2503 ));
2504 }
2505 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2506 return Err(GitError::InvalidFormat(
2507 "multi-pack-index chunk offsets are not monotonic".into(),
2508 ));
2509 }
2510 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2511 return Err(GitError::InvalidFormat(
2512 "multi-pack-index chunk length is invalid".into(),
2513 ));
2514 }
2515 chunks.push(MultiPackIndexChunk {
2516 id,
2517 offset: chunk_offset,
2518 len: next_offset - chunk_offset,
2519 });
2520 previous_offset = chunk_offset;
2521 }
2522
2523 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2524 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2525 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2526 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2527 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2528 let bitmapped_packs =
2529 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2530
2531 Ok(Self {
2532 version,
2533 format,
2534 pack_count,
2535 pack_names,
2536 object_count: object_count as u32,
2537 fanout,
2538 objects,
2539 reverse_index,
2540 bitmapped_packs,
2541 chunks,
2542 checksum,
2543 })
2544 }
2545
2546 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2547 self.objects
2548 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2549 .ok()
2550 .map(|idx| &self.objects[idx])
2551 }
2552}
2553
2554impl MultiPackIndexOidLookup {
2555 pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
2556 let raw = bytes.as_bytes();
2557 let hash_len = format.raw_len();
2558 if raw.len() < 12 + 12 + hash_len {
2559 return Err(GitError::InvalidFormat(
2560 "multi-pack-index file too short".into(),
2561 ));
2562 }
2563 if &raw[..4] != b"MIDX" {
2564 return Err(GitError::InvalidFormat(
2565 "missing multi-pack-index signature".into(),
2566 ));
2567 }
2568 let version = raw[4];
2569 if version != 1 && version != 2 {
2570 return Err(GitError::Unsupported(format!(
2571 "multi-pack-index version {version}"
2572 )));
2573 }
2574 let hash_id = raw[5];
2575 if u32::from(hash_id) != hash_function_id(format) {
2576 return Err(GitError::InvalidFormat(format!(
2577 "multi-pack-index hash id {hash_id} does not match {}",
2578 format.name()
2579 )));
2580 }
2581 let chunk_count = raw[6] as usize;
2582 let base_midx_count = raw[7];
2583 if base_midx_count != 0 {
2584 return Err(GitError::Unsupported(format!(
2585 "multi-pack-index base count {base_midx_count}"
2586 )));
2587 }
2588 let pack_count = u32_be(&raw[8..12]);
2589 let lookup_len = (chunk_count + 1)
2590 .checked_mul(12)
2591 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2592 let data_start = 12usize
2593 .checked_add(lookup_len)
2594 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2595 let checksum_offset = raw.len() - hash_len;
2596 if data_start > checksum_offset {
2597 return Err(GitError::InvalidFormat(
2598 "truncated multi-pack-index chunk lookup".into(),
2599 ));
2600 }
2601
2602 let mut entries = Vec::with_capacity(chunk_count + 1);
2603 let mut offset = 12usize;
2604 for _ in 0..=chunk_count {
2605 let id = [
2606 raw[offset],
2607 raw[offset + 1],
2608 raw[offset + 2],
2609 raw[offset + 3],
2610 ];
2611 let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
2612 entries.push((id, chunk_offset));
2613 offset += 12;
2614 }
2615 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2616 return Err(GitError::InvalidFormat(
2617 "multi-pack-index chunk lookup is empty".into(),
2618 ));
2619 };
2620 if terminator_id != [0, 0, 0, 0] {
2621 return Err(GitError::InvalidFormat(
2622 "multi-pack-index chunk lookup missing terminator".into(),
2623 ));
2624 }
2625 if terminator_offset != checksum_offset as u64 {
2626 return Err(GitError::InvalidFormat(
2627 "multi-pack-index terminator does not point at checksum".into(),
2628 ));
2629 }
2630
2631 let mut chunks = Vec::with_capacity(chunk_count);
2632 let mut previous_offset = data_start as u64;
2633 for pair in entries.windows(2) {
2634 let (id, chunk_offset) = pair[0];
2635 let (_next_id, next_offset) = pair[1];
2636 if id == [0, 0, 0, 0] {
2637 return Err(GitError::InvalidFormat(
2638 "multi-pack-index chunk id is zero before terminator".into(),
2639 ));
2640 }
2641 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2642 return Err(GitError::InvalidFormat(
2643 "multi-pack-index chunk offsets are not monotonic".into(),
2644 ));
2645 }
2646 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2647 return Err(GitError::InvalidFormat(
2648 "multi-pack-index chunk length is invalid".into(),
2649 ));
2650 }
2651 chunks.push(MultiPackIndexChunk {
2652 id,
2653 offset: chunk_offset,
2654 len: next_offset - chunk_offset,
2655 });
2656 previous_offset = chunk_offset;
2657 }
2658
2659 let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
2660 let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
2661 let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
2662 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2663 let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2664 GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2665 })?;
2666 if oid_lookup.len() != expected_len {
2667 return Err(GitError::InvalidFormat(
2668 "multi-pack-index OIDL chunk has invalid length".into(),
2669 ));
2670 }
2671 let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
2672 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2673 let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2674 GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2675 })?;
2676 if object_offsets.len() != expected_offsets_len {
2677 return Err(GitError::InvalidFormat(
2678 "multi-pack-index OOFF chunk has invalid length".into(),
2679 ));
2680 }
2681 let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
2682 if let Some(large_offsets) = large_offsets
2683 && large_offsets.len() % 8 != 0
2684 {
2685 return Err(GitError::InvalidFormat(
2686 "multi-pack-index LOFF chunk has invalid length".into(),
2687 ));
2688 }
2689 let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
2690 let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
2691 let (large_offsets_offset, large_offsets_len) = match large_offsets {
2692 Some(large_offsets) => (
2693 Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
2694 large_offsets.len(),
2695 ),
2696 None => (None, 0),
2697 };
2698 Ok(Self {
2699 format,
2700 pack_count,
2701 pack_names,
2702 fanout,
2703 object_count,
2704 oid_lookup_offset,
2705 object_offsets_offset,
2706 large_offsets_offset,
2707 large_offsets_len,
2708 bytes,
2709 })
2710 }
2711
2712 pub fn contains(&self, oid: &ObjectId) -> bool {
2713 self.find_position(oid).is_some()
2714 }
2715
2716 pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2717 let Some(position) = self.find_position(oid) else {
2718 return Ok(None);
2719 };
2720 let bytes = self.bytes.as_bytes();
2721 let hash_len = self.format.raw_len();
2722 let oid_start = self
2723 .oid_lookup_offset
2724 .checked_add(position * hash_len)
2725 .ok_or_else(|| {
2726 GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2727 })?;
2728 let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
2729 let offset_start = self
2730 .object_offsets_offset
2731 .checked_add(position * 8)
2732 .ok_or_else(|| {
2733 GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2734 })?;
2735 let data = &bytes[offset_start..offset_start + 8];
2736 let pack_int_id = u32_be(&data[..4]);
2737 if pack_int_id >= self.pack_count {
2738 return Err(GitError::InvalidFormat(
2739 "multi-pack-index object points past pack table".into(),
2740 ));
2741 }
2742 let raw_offset = u32_be(&data[4..8]);
2743 let offset = if raw_offset & 0x8000_0000 == 0 {
2744 u64::from(raw_offset)
2745 } else {
2746 let Some(large_offsets_offset) = self.large_offsets_offset else {
2747 return Err(GitError::InvalidFormat(
2748 "multi-pack-index large offset missing LOFF chunk".into(),
2749 ));
2750 };
2751 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2752 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2753 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2754 })?;
2755 let large_end = large_start.checked_add(8).ok_or_else(|| {
2756 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2757 })?;
2758 if large_end > self.large_offsets_len {
2759 return Err(GitError::InvalidFormat(
2760 "multi-pack-index large offset points past LOFF chunk".into(),
2761 ));
2762 }
2763 let start = large_offsets_offset + large_start;
2764 u64_be(&bytes[start..start + 8])
2765 };
2766 Ok(Some(MultiPackIndexEntry {
2767 oid,
2768 pack_int_id,
2769 offset,
2770 }))
2771 }
2772
2773 pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2774 self.pack_names
2775 .get(pack_int_id as usize)
2776 .map(String::as_str)
2777 }
2778
2779 fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2780 if oid.format() != self.format || self.object_count == 0 {
2781 return None;
2782 }
2783 let first = oid.as_bytes()[0] as usize;
2784 let start = if first == 0 {
2785 0
2786 } else {
2787 self.fanout[first - 1] as usize
2788 };
2789 let end = self.fanout[first] as usize;
2790 if start >= end || end > self.object_count {
2791 return None;
2792 }
2793 let hash_len = self.format.raw_len();
2794 let table_start = self.oid_lookup_offset;
2795 let table_end = table_start + self.object_count * hash_len;
2796 let bytes = self.bytes.as_bytes();
2797 let table = &bytes[table_start..table_end];
2798 let needle = oid.as_bytes();
2799 let mut low = start;
2800 let mut high = end;
2801 while low < high {
2802 let mid = low + (high - low) / 2;
2803 let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2804 match raw.cmp(needle) {
2805 std::cmp::Ordering::Less => low = mid + 1,
2806 std::cmp::Ordering::Equal => return Some(mid),
2807 std::cmp::Ordering::Greater => high = mid,
2808 }
2809 }
2810 None
2811 }
2812}
2813
2814fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2815 for name in pack_names {
2816 if name.is_empty() {
2817 return Err(GitError::InvalidFormat(
2818 "multi-pack-index pack name is empty".into(),
2819 ));
2820 }
2821 if name
2822 .bytes()
2823 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2824 {
2825 return Err(GitError::InvalidFormat(
2826 "multi-pack-index pack name contains an invalid byte".into(),
2827 ));
2828 }
2829 }
2830 Ok(())
2831}
2832
2833fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2834 let mut out = Vec::new();
2835 for name in pack_names {
2836 out.extend_from_slice(name.as_bytes());
2837 out.push(0);
2838 }
2839 while out.len() % 4 != 0 {
2840 out.push(0);
2841 }
2842 out
2843}
2844
2845fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2846 let mut counts = [0u32; 256];
2847 for object in objects {
2848 let first = object.oid.as_bytes()[0] as usize;
2849 counts[first] = counts[first]
2850 .checked_add(1)
2851 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2852 }
2853 let mut running = 0u32;
2854 let mut out = Vec::with_capacity(256 * 4);
2855 for count in counts {
2856 running = running
2857 .checked_add(count)
2858 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2859 out.extend_from_slice(&running.to_be_bytes());
2860 }
2861 Ok(out)
2862}
2863
2864fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2865 let mut out = Vec::new();
2866 for object in objects {
2867 out.extend_from_slice(object.oid.as_bytes());
2868 }
2869 out
2870}
2871
2872fn write_midx_object_offsets(
2873 objects: &[&MultiPackIndexEntry],
2874 large_offsets: &mut Vec<u8>,
2875) -> Result<Vec<u8>> {
2876 let mut out = Vec::new();
2877 for object in objects {
2878 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2879 if object.offset < 0x8000_0000 {
2880 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2881 } else {
2882 let large_idx = large_offsets.len() / 8;
2883 if large_idx > 0x7fff_ffff {
2884 return Err(GitError::InvalidFormat(
2885 "too many multi-pack-index large offsets".into(),
2886 ));
2887 }
2888 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2889 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2890 }
2891 }
2892 Ok(out)
2893}
2894
2895fn write_multi_pack_index_chunks(
2896 format: ObjectFormat,
2897 version: u8,
2898 pack_count: u32,
2899 chunks: &[([u8; 4], Vec<u8>)],
2900) -> Result<Vec<u8>> {
2901 if chunks.len() > u8::MAX as usize {
2902 return Err(GitError::InvalidFormat(
2903 "too many multi-pack-index chunks".into(),
2904 ));
2905 }
2906 let lookup_len = (chunks.len() + 1)
2907 .checked_mul(12)
2908 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2909 let mut out = Vec::new();
2910 out.extend_from_slice(b"MIDX");
2911 out.push(version);
2912 out.push(hash_function_id(format) as u8);
2913 out.push(chunks.len() as u8);
2914 out.push(0);
2915 out.extend_from_slice(&pack_count.to_be_bytes());
2916 let mut chunk_offset = (12usize)
2917 .checked_add(lookup_len)
2918 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
2919 as u64;
2920 for (id, data) in chunks {
2921 out.extend_from_slice(id);
2922 out.extend_from_slice(&chunk_offset.to_be_bytes());
2923 chunk_offset = chunk_offset
2924 .checked_add(data.len() as u64)
2925 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
2926 }
2927 out.extend_from_slice(&[0, 0, 0, 0]);
2928 out.extend_from_slice(&chunk_offset.to_be_bytes());
2929 for (_id, data) in chunks {
2930 out.extend_from_slice(data);
2931 }
2932 let checksum = sley_core::digest_bytes(format, &out)?;
2933 out.extend_from_slice(checksum.as_bytes());
2934 Ok(out)
2935}
2936
2937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2938struct EntryHeader {
2939 kind: PackObjectKind,
2940 size: u64,
2941}
2942
2943pub trait PackDeltaCache {
2957 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
2959 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
2961}
2962
2963struct NoopDeltaCache;
2966
2967impl PackDeltaCache for NoopDeltaCache {
2968 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
2969 None
2970 }
2971 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
2972}
2973
2974thread_local! {
2980 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
2981}
2982
2983const MAX_INFLATE_EXPANSION: usize = 1032;
2994
2995const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3001
3002fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3010 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3011 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3013}
3014
3015fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3024 INFLATE.with(|cell| {
3025 let mut decompress = cell.borrow_mut();
3026 decompress.reset(true);
3027 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3028 let mut input = compressed;
3029 let mut consumed_total = 0usize;
3030 loop {
3031 if out.len() == out.capacity() {
3034 out.reserve(out.len().max(64));
3035 }
3036 let before_in = decompress.total_in();
3037 let before_out = decompress.total_out();
3038 let status = decompress
3039 .decompress_vec(input, out, flate2::FlushDecompress::None)
3040 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3041 let consumed = (decompress.total_in() - before_in) as usize;
3042 let produced = decompress.total_out() - before_out;
3043 input = &input[consumed..];
3044 consumed_total += consumed;
3045 match status {
3046 flate2::Status::StreamEnd => return Ok(consumed_total),
3047 _ if consumed == 0 && produced == 0 => {
3048 return Err(GitError::InvalidObject("truncated zlib stream".into()));
3049 }
3050 _ => {}
3051 }
3052 }
3053 })
3054}
3055
3056fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3060 INFLATE.with(|cell| {
3061 let mut decompress = cell.borrow_mut();
3062 decompress.reset(true);
3063 out.reserve(max_out.max(16));
3064 let mut input = compressed;
3065 while out.len() < max_out {
3066 if out.len() == out.capacity() {
3067 out.reserve(out.len().max(16));
3068 }
3069 let before_in = decompress.total_in();
3070 let before_out = decompress.total_out();
3071 let status = decompress
3072 .decompress_vec(input, out, flate2::FlushDecompress::None)
3073 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3074 let consumed = (decompress.total_in() - before_in) as usize;
3075 let produced = decompress.total_out() - before_out;
3076 input = &input[consumed..];
3077 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3078 break;
3079 }
3080 }
3081 Ok(())
3082 })
3083}
3084
3085pub fn read_object_at_arc<F>(
3093 pack_bytes: &[u8],
3094 offset: u64,
3095 format: ObjectFormat,
3096 resolve_ref_base: F,
3097) -> Result<Arc<EncodedObject>>
3098where
3099 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3100{
3101 read_object_at_with_cache_arc(
3102 pack_bytes,
3103 offset,
3104 format,
3105 resolve_ref_base,
3106 &NoopDeltaCache,
3107 )
3108}
3109
3110pub fn read_object_at_with_cache_arc<F, C>(
3119 pack_bytes: &[u8],
3120 offset: u64,
3121 format: ObjectFormat,
3122 mut resolve_ref_base: F,
3123 cache: &C,
3124) -> Result<Arc<EncodedObject>>
3125where
3126 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3127 C: PackDeltaCache + ?Sized,
3128{
3129 read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3130}
3131
3132fn read_object_at_inner<F, C>(
3133 pack_bytes: &[u8],
3134 offset: u64,
3135 format: ObjectFormat,
3136 resolve_ref_base: &mut F,
3137 cache: &C,
3138) -> Result<Arc<EncodedObject>>
3139where
3140 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3141 C: PackDeltaCache + ?Sized,
3142{
3143 if let Some(object) = cache.get(offset) {
3146 return Ok(object);
3147 }
3148 let trailer_offset = pack_bytes
3149 .len()
3150 .checked_sub(format.raw_len())
3151 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3152 let mut cursor = usize::try_from(offset)
3153 .ok()
3154 .filter(|&value| value < trailer_offset)
3155 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3156 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3157 let base = match header.kind {
3158 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3159 pack_bytes,
3160 &mut cursor,
3161 offset,
3162 )?)),
3163 PackObjectKind::RefDelta => {
3164 let hash_len = format.raw_len();
3165 if cursor + hash_len > trailer_offset {
3166 return Err(GitError::InvalidFormat(
3167 "truncated ref-delta base object id".into(),
3168 ));
3169 }
3170 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3171 cursor += hash_len;
3172 Some(DeltaBase::Ref(oid))
3173 }
3174 _ => None,
3175 };
3176 let mut body = Vec::new();
3177 inflate_into(
3178 &pack_bytes[cursor..trailer_offset],
3179 &mut body,
3180 header.size.min(usize::MAX as u64) as usize,
3181 )?;
3182 if body.len() as u64 != header.size {
3183 return Err(GitError::InvalidObject(format!(
3184 "pack object declared {} bytes, decoded {}",
3185 header.size,
3186 body.len()
3187 )));
3188 }
3189 let object = match base {
3190 None => {
3191 let object_type = match header.kind {
3192 PackObjectKind::Commit => ObjectType::Commit,
3193 PackObjectKind::Tree => ObjectType::Tree,
3194 PackObjectKind::Blob => ObjectType::Blob,
3195 PackObjectKind::Tag => ObjectType::Tag,
3196 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3197 return Err(GitError::InvalidFormat(
3198 "delta pack entry decoded without a base".into(),
3199 ));
3200 }
3201 };
3202 Arc::new(EncodedObject::new(object_type, body))
3203 }
3204 Some(DeltaBase::Offset(base_offset)) => {
3205 let base =
3206 read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
3207 let resolved = apply_pack_delta(&base.body, &body)?;
3208 Arc::new(EncodedObject::new(base.object_type, resolved))
3209 }
3210 Some(DeltaBase::Ref(base_oid)) => {
3211 let base = resolve_ref_base(&base_oid)?
3212 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
3213 let resolved = apply_pack_delta(&base.body, &body)?;
3214 Arc::new(EncodedObject::new(base.object_type, resolved))
3215 }
3216 };
3217 cache.insert(offset, Arc::clone(&object));
3221 Ok(object)
3222}
3223
3224pub fn read_object_header_at<F>(
3234 pack_bytes: &[u8],
3235 offset: u64,
3236 format: ObjectFormat,
3237 mut resolve_ref_base_type: F,
3238) -> Result<(ObjectType, u64)>
3239where
3240 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3241{
3242 read_object_header_at_inner(
3243 pack_bytes,
3244 offset,
3245 format,
3246 &mut resolve_ref_base_type,
3247 &mut NoopHeaderTypeCache,
3248 )
3249}
3250
3251pub trait HeaderTypeCache {
3268 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
3270 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
3272}
3273
3274struct NoopHeaderTypeCache;
3275
3276impl HeaderTypeCache for NoopHeaderTypeCache {
3277 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
3278 None
3279 }
3280 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
3281}
3282
3283pub fn read_object_header_at_with_cache<F, C>(
3289 pack_bytes: &[u8],
3290 offset: u64,
3291 format: ObjectFormat,
3292 mut resolve_ref_base_type: F,
3293 type_cache: &mut C,
3294) -> Result<(ObjectType, u64)>
3295where
3296 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3297 C: HeaderTypeCache + ?Sized,
3298{
3299 if let Some(header) = type_cache.get(offset) {
3300 return Ok(header);
3301 }
3302 read_object_header_at_inner(
3303 pack_bytes,
3304 offset,
3305 format,
3306 &mut resolve_ref_base_type,
3307 type_cache,
3308 )
3309}
3310
3311fn read_object_header_at_inner<F, C>(
3312 pack_bytes: &[u8],
3313 offset: u64,
3314 format: ObjectFormat,
3315 resolve_ref_base_type: &mut F,
3316 type_cache: &mut C,
3317) -> Result<(ObjectType, u64)>
3318where
3319 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3320 C: HeaderTypeCache + ?Sized,
3321{
3322 let trailer_offset = pack_bytes
3323 .len()
3324 .checked_sub(format.raw_len())
3325 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3326 let mut cursor = usize::try_from(offset)
3327 .ok()
3328 .filter(|&value| value < trailer_offset)
3329 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3330 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3331 let resolved = match header.kind {
3332 PackObjectKind::Commit => (ObjectType::Commit, header.size),
3333 PackObjectKind::Tree => (ObjectType::Tree, header.size),
3334 PackObjectKind::Blob => (ObjectType::Blob, header.size),
3335 PackObjectKind::Tag => (ObjectType::Tag, header.size),
3336 PackObjectKind::OfsDelta => {
3337 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3338 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3339 let base_type = match type_cache.get(base_offset) {
3342 Some((base_type, _)) => base_type,
3343 None => {
3344 let (base_type, _) = read_object_header_at_inner(
3345 pack_bytes,
3346 base_offset,
3347 format,
3348 resolve_ref_base_type,
3349 type_cache,
3350 )?;
3351 base_type
3352 }
3353 };
3354 (base_type, size)
3355 }
3356 PackObjectKind::RefDelta => {
3357 let hash_len = format.raw_len();
3358 if cursor + hash_len > trailer_offset {
3359 return Err(GitError::InvalidFormat(
3360 "truncated ref-delta base object id".into(),
3361 ));
3362 }
3363 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3364 cursor += hash_len;
3365 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3366 let base_type = resolve_ref_base_type(&oid)?
3367 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3368 (base_type, size)
3369 }
3370 };
3371 type_cache.put(offset, resolved);
3374 Ok(resolved)
3375}
3376
3377const DELTA_HEADER_PREFIX_LEN: usize = 32;
3381
3382fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3385 let mut prefix = Vec::new();
3386 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3387 decoded_delta_result_size(&prefix)
3388}
3389
3390fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3391 let first = next_byte(bytes, offset)?;
3392 let mut size = u64::from(first & 0x0f);
3393 let kind = match (first >> 4) & 0x07 {
3394 1 => PackObjectKind::Commit,
3395 2 => PackObjectKind::Tree,
3396 3 => PackObjectKind::Blob,
3397 4 => PackObjectKind::Tag,
3398 6 => PackObjectKind::OfsDelta,
3399 7 => PackObjectKind::RefDelta,
3400 other => {
3401 return Err(GitError::InvalidFormat(format!(
3402 "invalid pack object type {other}"
3403 )));
3404 }
3405 };
3406 let mut shift = 4;
3407 let mut byte = first;
3408 while byte & 0x80 != 0 {
3409 byte = next_byte(bytes, offset)?;
3410 let part = u64::from(byte & 0x7f);
3411 size = size
3412 .checked_add(
3413 part.checked_shl(shift)
3414 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3415 )
3416 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3417 shift += 7;
3418 }
3419 Ok(EntryHeader { kind, size })
3420}
3421
3422fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3423 let mut byte = next_byte(bytes, offset)?;
3424 let mut relative = u64::from(byte & 0x7f);
3425 while byte & 0x80 != 0 {
3426 byte = next_byte(bytes, offset)?;
3427 relative = relative
3428 .checked_add(1)
3429 .and_then(|value| value.checked_shl(7))
3430 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3431 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3432 }
3433 entry_offset
3434 .checked_sub(relative)
3435 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3436}
3437
3438fn resolve_pack_entries<F>(
3439 parsed: Vec<ParsedPackEntry>,
3440 format: ObjectFormat,
3441 external_base: &mut F,
3442) -> Result<Vec<PackObject>>
3443where
3444 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3445{
3446 let mut offset_to_index = HashMap::with_capacity(parsed.len());
3447 for (idx, entry) in parsed.iter().enumerate() {
3448 offset_to_index.insert(parsed_entry_offset(entry), idx);
3449 }
3450
3451 let mut resolved = vec![None; parsed.len()];
3452 let mut oid_to_index = HashMap::new();
3453 let mut unresolved = 0usize;
3454 for (idx, entry) in parsed.iter().enumerate() {
3455 match entry {
3456 ParsedPackEntry::Resolved(object) => {
3457 oid_to_index.insert(object.entry.oid, idx);
3458 resolved[idx] = Some(object.clone());
3459 }
3460 ParsedPackEntry::Delta { .. } => unresolved += 1,
3461 }
3462 }
3463
3464 while unresolved != 0 {
3465 let mut progress = false;
3466 for idx in 0..parsed.len() {
3467 if resolved[idx].is_some() {
3468 continue;
3469 }
3470 let ParsedPackEntry::Delta {
3471 base,
3472 compressed_size,
3473 delta_size,
3474 offset,
3475 delta,
3476 } = &parsed[idx]
3477 else {
3478 continue;
3479 };
3480 let Some(base_object) = delta_base_object(
3481 base,
3482 &offset_to_index,
3483 &oid_to_index,
3484 &resolved,
3485 external_base,
3486 )?
3487 else {
3488 continue;
3489 };
3490 let body = apply_pack_delta(base_object.body(), delta)?;
3491 let object = EncodedObject::new(base_object.object_type(), body);
3492 let oid = object.object_id(format)?;
3493 let pack_object = PackObject {
3494 entry: PackEntry {
3495 oid,
3496 compressed_size: *compressed_size,
3497 uncompressed_size: object.body.len() as u64,
3498 offset: *offset,
3499 },
3500 object,
3501 };
3502 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3503 return Err(GitError::InvalidObject(
3504 "resolved delta size does not match delta header".into(),
3505 ));
3506 }
3507 if *delta_size != delta.len() as u64 {
3508 return Err(GitError::InvalidObject(format!(
3509 "pack delta declared {delta_size} bytes, decoded {}",
3510 delta.len()
3511 )));
3512 }
3513 oid_to_index.insert(oid, idx);
3514 resolved[idx] = Some(pack_object);
3515 unresolved -= 1;
3516 progress = true;
3517 }
3518 if !progress {
3519 return Err(GitError::Unsupported("unresolved delta base".into()));
3520 }
3521 }
3522
3523 resolved
3524 .into_iter()
3525 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3526 .collect()
3527}
3528
3529fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3530 match entry {
3531 ParsedPackEntry::Resolved(object) => object.entry.offset,
3532 ParsedPackEntry::Delta { offset, .. } => *offset,
3533 }
3534}
3535
3536enum DeltaBaseObject<'a> {
3537 Borrowed(&'a EncodedObject),
3538 Owned(EncodedObject),
3539}
3540
3541impl DeltaBaseObject<'_> {
3542 fn object_type(&self) -> ObjectType {
3543 match self {
3544 Self::Borrowed(object) => object.object_type,
3545 Self::Owned(object) => object.object_type,
3546 }
3547 }
3548
3549 fn body(&self) -> &[u8] {
3550 match self {
3551 Self::Borrowed(object) => &object.body,
3552 Self::Owned(object) => &object.body,
3553 }
3554 }
3555}
3556
3557fn delta_base_object<'a, F>(
3558 base: &DeltaBase,
3559 offset_to_index: &HashMap<u64, usize>,
3560 oid_to_index: &HashMap<ObjectId, usize>,
3561 resolved: &'a [Option<PackObject>],
3562 external_base: &mut F,
3563) -> Result<Option<DeltaBaseObject<'a>>>
3564where
3565 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3566{
3567 match base {
3568 DeltaBase::Offset(offset) => {
3569 let Some(index) = offset_to_index.get(offset).copied() else {
3570 return Err(GitError::InvalidFormat(format!(
3571 "ofs-delta base offset {offset} not found"
3572 )));
3573 };
3574 Ok(resolved[index]
3575 .as_ref()
3576 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3577 }
3578 DeltaBase::Ref(oid) => {
3579 if let Some(index) = oid_to_index.get(oid).copied() {
3580 return Ok(resolved[index]
3581 .as_ref()
3582 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3583 }
3584 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3585 }
3586 }
3587}
3588
3589fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3590 let mut cursor = 0usize;
3591 let base_size = read_delta_varint(delta, &mut cursor)?;
3592 if base_size != base.len() as u64 {
3593 return Err(GitError::InvalidObject(format!(
3594 "delta base size mismatch: expected {base_size}, got {}",
3595 base.len()
3596 )));
3597 }
3598 let result_size = read_delta_varint(delta, &mut cursor)?;
3599 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3608 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3609 while cursor < delta.len() {
3610 let command = delta[cursor];
3611 cursor += 1;
3612 if command & 0x80 != 0 {
3613 let copy_offset =
3614 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3615 let mut copy_size =
3616 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3617 if copy_size == 0 {
3618 copy_size = 0x10000;
3619 }
3620 let start = usize::try_from(copy_offset)
3621 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3622 let len = usize::try_from(copy_size)
3623 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3624 let end = start
3625 .checked_add(len)
3626 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3627 let Some(slice) = base.get(start..end) else {
3628 return Err(GitError::InvalidObject(
3629 "delta copy range exceeds base object".into(),
3630 ));
3631 };
3632 result.extend_from_slice(slice);
3633 } else if command != 0 {
3634 let len = usize::from(command);
3635 let end = cursor
3636 .checked_add(len)
3637 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3638 let Some(slice) = delta.get(cursor..end) else {
3639 return Err(GitError::InvalidObject(
3640 "delta insert range exceeds delta data".into(),
3641 ));
3642 };
3643 result.extend_from_slice(slice);
3644 cursor = end;
3645 } else {
3646 return Err(GitError::InvalidObject(
3647 "delta contains reserved zero command".into(),
3648 ));
3649 }
3650 }
3651 if result.len() as u64 != result_size {
3652 return Err(GitError::InvalidObject(format!(
3653 "delta result size mismatch: expected {result_size}, got {}",
3654 result.len()
3655 )));
3656 }
3657 Ok(result)
3658}
3659
3660fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3661 let mut cursor = 0usize;
3662 let _ = read_delta_varint(delta, &mut cursor)?;
3663 read_delta_varint(delta, &mut cursor)
3664}
3665
3666const DELTA_BLOCK_SIZE: usize = 16;
3669
3670const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3675
3676const DELTA_BUCKET_BITS: usize = 12;
3679const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3680const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3681
3682struct DeltaIndex<'a> {
3689 base: &'a [u8],
3690 blocks: Vec<DeltaBlock>,
3691 buckets: Vec<usize>,
3692}
3693
3694#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3695struct DeltaBlock {
3696 hash: u32,
3697 offset: usize,
3698}
3699
3700impl<'a> DeltaIndex<'a> {
3701 fn new(base: &'a [u8]) -> Self {
3702 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3703 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3704 for_each_delta_anchor(base.len(), |offset| {
3705 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3706 buckets[delta_bucket(hash) + 1] += 1;
3707 anchors.push(DeltaBlock { hash, offset });
3708 });
3709 for idx in 1..buckets.len() {
3710 buckets[idx] += buckets[idx - 1];
3711 }
3712
3713 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3714 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3715 for anchor in anchors {
3716 let bucket = delta_bucket(anchor.hash);
3717 let next = &mut next_offsets[bucket];
3718 blocks[*next] = anchor;
3719 *next += 1;
3720 }
3721
3722 Self {
3723 base,
3724 blocks,
3725 buckets,
3726 }
3727 }
3728
3729 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3730 let bucket = delta_bucket(hash);
3731 let start = self.buckets[bucket];
3732 let end = self.buckets[bucket + 1];
3733 self.blocks[start..end]
3734 .iter()
3735 .filter(move |block| block.hash == hash)
3736 }
3737
3738 fn has_hash(&self, hash: u32) -> bool {
3739 self.candidate_blocks(hash).next().is_some()
3740 }
3741
3742 fn has_shared_anchor(&self, target: &[u8]) -> bool {
3743 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3744 return false;
3745 }
3746 let last = target.len() - DELTA_BLOCK_SIZE;
3747 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3748 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3749 if self.has_hash(hash) {
3750 return true;
3751 }
3752 }
3753 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3754 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3755 if self.has_hash(hash) {
3756 return true;
3757 }
3758 }
3759 false
3760 }
3761
3762 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3764 if !self.has_shared_anchor(target) {
3765 return None;
3766 }
3767 let base = self.base;
3768 let mut delta = Vec::new();
3769 write_delta_varint(&mut delta, base.len() as u64);
3770 write_delta_varint(&mut delta, target.len() as u64);
3771
3772 let mut pending_insert_start = 0usize;
3773 let mut pos = 0usize;
3774 while pos < target.len() {
3775 let mut best_len = 0usize;
3776 let mut best_offset = 0usize;
3777 if pos + DELTA_BLOCK_SIZE <= target.len() {
3778 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3779 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3780 let candidate = candidate.offset;
3783 let max_len = (base.len() - candidate).min(target.len() - pos);
3784 let mut len = 0usize;
3785 while len < max_len && base[candidate + len] == target[pos + len] {
3786 len += 1;
3787 }
3788 if len > best_len {
3789 best_len = len;
3790 best_offset = candidate;
3791 }
3792 }
3793 }
3794
3795 if best_len >= DELTA_BLOCK_SIZE {
3796 if pending_insert_start < pos {
3797 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3798 }
3799 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3800 pos += best_len;
3801 pending_insert_start = pos;
3802 } else {
3803 pos += 1;
3804 }
3805 }
3806 if pending_insert_start < target.len() {
3807 write_delta_insert(&mut delta, &target[pending_insert_start..]);
3808 }
3809 Some(delta)
3810 }
3811}
3812
3813fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3814 if len < DELTA_BLOCK_SIZE {
3815 return;
3816 }
3817 len -= DELTA_BLOCK_SIZE;
3818 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3819 visit(offset);
3820 }
3821 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3822 visit(len);
3823 }
3824}
3825
3826fn delta_anchor_count(len: usize) -> usize {
3827 if len < DELTA_BLOCK_SIZE {
3828 return 0;
3829 }
3830 let last = len - DELTA_BLOCK_SIZE;
3831 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3832}
3833
3834fn delta_bucket(hash: u32) -> usize {
3835 (hash as usize) & DELTA_BUCKET_MASK
3836}
3837
3838const DELTA_MAX_CHAIN: usize = 64;
3841
3842fn block_hash(block: &[u8]) -> u32 {
3848 let mut hash = 0u32;
3849 for &byte in block {
3850 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3851 }
3852 hash
3853}
3854
3855#[derive(Debug, Clone, PartialEq, Eq)]
3857enum PlannedBase {
3858 None,
3860 InPack { base_idx: usize, delta: Vec<u8> },
3864 External { base_oid: ObjectId, delta: Vec<u8> },
3866}
3867
3868#[derive(Debug, Clone, PartialEq, Eq)]
3869struct PlannedEntry {
3870 base: PlannedBase,
3871}
3872
3873fn compress_planned_payloads(
3874 objects: &[&EncodedObject],
3875 plan: &[PlannedEntry],
3876 order: &[usize],
3877) -> Result<Vec<Vec<u8>>> {
3878 if order.is_empty() {
3879 return Ok(Vec::new());
3880 }
3881
3882 let worker_count = std::thread::available_parallelism()
3883 .map(|threads| threads.get())
3884 .unwrap_or(1)
3885 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3886 .min(order.len());
3887 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3888 let mut payloads = Vec::with_capacity(order.len());
3889 for &idx in order {
3890 payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
3891 }
3892 return Ok(payloads);
3893 }
3894
3895 let chunk_len = order.len().div_ceil(worker_count);
3896 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
3897 std::thread::scope(|scope| {
3898 let mut handles = Vec::new();
3899 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
3900 let chunk_start = chunk_idx * chunk_len;
3901 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
3902 let mut chunk_payloads = Vec::with_capacity(chunk.len());
3903 for (offset, &idx) in chunk.iter().enumerate() {
3904 chunk_payloads.push((
3905 chunk_start + offset,
3906 compressed_payload(planned_payload(objects, plan, idx))?,
3907 ));
3908 }
3909 Ok(chunk_payloads)
3910 }));
3911 }
3912
3913 let mut first_error = None;
3914 for handle in handles {
3915 match handle.join() {
3916 Ok(Ok(chunk_payloads)) => {
3917 if first_error.is_none() {
3918 for (pos, payload) in chunk_payloads {
3919 payloads[pos] = payload;
3920 }
3921 }
3922 }
3923 Ok(Err(err)) => {
3924 first_error.get_or_insert(err);
3925 }
3926 Err(_) => {
3927 first_error.get_or_insert_with(|| {
3928 GitError::InvalidObject("pack compression worker panicked".into())
3929 });
3930 }
3931 }
3932 }
3933
3934 match first_error {
3935 Some(err) => Err(err),
3936 None => Ok(()),
3937 }
3938 })?;
3939 Ok(payloads)
3940}
3941
3942fn planned_payload<'a>(
3943 objects: &'a [&'a EncodedObject],
3944 plan: &'a [PlannedEntry],
3945 idx: usize,
3946) -> &'a [u8] {
3947 match &plan[idx].base {
3948 PlannedBase::None => &objects[idx].body,
3949 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
3950 }
3951}
3952
3953fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
3954 let mut out = Vec::new();
3955 write_compressed_payload(&mut out, body)?;
3956 Ok(out)
3957}
3958
3959const DELTA_MAX_EXTERNAL_BASES: usize = 64;
3962
3963struct DeltaWindowEntry<'a> {
3964 idx: usize,
3965 index: DeltaIndex<'a>,
3966}
3967
3968fn delta_type_rank(object_type: ObjectType) -> u8 {
3971 match object_type {
3972 ObjectType::Commit => 0,
3973 ObjectType::Tree => 1,
3974 ObjectType::Blob => 2,
3975 ObjectType::Tag => 3,
3976 }
3977}
3978
3979fn plan_pack_deltas(
4009 objects: &[&EncodedObject],
4010 object_ids: &[ObjectId],
4011 options: &PackWriteOptions,
4012) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4013 let count = objects.len();
4014 let mut plan: Vec<PlannedEntry> = (0..count)
4015 .map(|_| PlannedEntry {
4016 base: PlannedBase::None,
4017 })
4018 .collect();
4019
4020 let mut order: Vec<usize> = (0..count).collect();
4024 if options.reorder && options.depth > 0 {
4025 order.sort_by(|&left, &right| {
4026 delta_type_rank(objects[left].object_type)
4027 .cmp(&delta_type_rank(objects[right].object_type))
4028 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4029 .then_with(|| {
4030 object_ids[left]
4031 .as_bytes()
4032 .cmp(object_ids[right].as_bytes())
4033 })
4034 });
4035 }
4036
4037 if options.depth == 0 {
4038 return Ok((plan, order));
4039 }
4040
4041 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4044 Vec::with_capacity(options.thin_bases.len());
4045 for (oid, object) in &options.thin_bases {
4046 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4047 }
4048
4049 let mut depth = vec![0usize; count];
4052 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4054 std::collections::VecDeque::new();
4055
4056 for &idx in &order {
4057 let target = &objects[idx].body;
4058 let target_type = objects[idx].object_type;
4059
4060 let mut best_delta: Option<Vec<u8>> = None;
4061 let mut best_base = PlannedBase::None;
4062
4063 for base_entry in window.iter().rev() {
4065 let base_idx = base_entry.idx;
4066 if objects[base_idx].object_type != target_type {
4067 continue;
4068 }
4069 if depth[base_idx] + 1 > options.depth {
4072 continue;
4073 }
4074 let Some(delta) = base_entry.index.delta(target) else {
4075 continue;
4076 };
4077 if !delta_is_acceptable(&delta, target.len()) {
4078 continue;
4079 }
4080 if best_delta
4081 .as_ref()
4082 .is_none_or(|current| delta.len() < current.len())
4083 {
4084 best_delta = Some(delta);
4085 best_base = PlannedBase::InPack {
4086 base_idx,
4087 delta: Vec::new(),
4088 };
4089 }
4090 }
4091
4092 for (base_oid, base_type, base_index) in
4095 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4096 {
4097 if *base_type != target_type {
4098 continue;
4099 }
4100 let Some(delta) = base_index.delta(target) else {
4101 continue;
4102 };
4103 if !delta_is_acceptable(&delta, target.len()) {
4104 continue;
4105 }
4106 if best_delta
4107 .as_ref()
4108 .is_none_or(|current| delta.len() < current.len())
4109 {
4110 best_delta = Some(delta);
4111 best_base = PlannedBase::External {
4112 base_oid: *base_oid,
4113 delta: Vec::new(),
4114 };
4115 }
4116 }
4117
4118 if let Some(delta) = best_delta {
4119 match best_base {
4120 PlannedBase::InPack { base_idx, .. } => {
4121 depth[idx] = depth[base_idx] + 1;
4122 plan[idx].base = PlannedBase::InPack { base_idx, delta };
4123 }
4124 PlannedBase::External { base_oid, .. } => {
4125 depth[idx] = 1;
4126 plan[idx].base = PlannedBase::External { base_oid, delta };
4127 }
4128 PlannedBase::None => {}
4129 }
4130 }
4131
4132 window.push_back(DeltaWindowEntry {
4134 idx,
4135 index: DeltaIndex::new(&objects[idx].body),
4136 });
4137 while window.len() > options.window {
4138 window.pop_front();
4139 }
4140 }
4141
4142 Ok((plan, order))
4143}
4144
4145fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4150 !delta.is_empty() && delta.len() < target_len
4151}
4152
4153fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4154 loop {
4155 let mut byte = (value as u8) & 0x7f;
4156 value >>= 7;
4157 if value != 0 {
4158 byte |= 0x80;
4159 }
4160 out.push(byte);
4161 if value == 0 {
4162 break;
4163 }
4164 }
4165}
4166
4167fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4168 while size != 0 {
4169 let chunk = size.min(0x10000);
4170 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4171 let mut command = 0x80u8;
4172 let mut payload = [0u8; 7];
4173 let mut payload_len = 0usize;
4174 for idx in 0..4 {
4175 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4176 if byte != 0 {
4177 command |= 1 << idx;
4178 payload[payload_len] = byte;
4179 payload_len += 1;
4180 }
4181 }
4182 for idx in 0..3 {
4183 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4184 if byte != 0 {
4185 command |= 0x10 << idx;
4186 payload[payload_len] = byte;
4187 payload_len += 1;
4188 }
4189 }
4190 out.push(command);
4191 out.extend_from_slice(&payload[..payload_len]);
4192 offset += chunk;
4193 size -= chunk;
4194 }
4195}
4196
4197fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
4198 while !bytes.is_empty() {
4199 let chunk_len = bytes.len().min(0x7f);
4200 out.push(chunk_len as u8);
4201 out.extend_from_slice(&bytes[..chunk_len]);
4202 bytes = &bytes[chunk_len..];
4203 }
4204}
4205
4206fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
4207 let mut value = 0u64;
4208 let mut shift = 0u32;
4209 loop {
4210 let Some(byte) = delta.get(*cursor).copied() else {
4211 return Err(GitError::InvalidObject("truncated delta size".into()));
4212 };
4213 *cursor += 1;
4214 value = value
4215 .checked_add(
4216 u64::from(byte & 0x7f)
4217 .checked_shl(shift)
4218 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
4219 )
4220 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4221 if byte & 0x80 == 0 {
4222 return Ok(value);
4223 }
4224 shift = shift
4225 .checked_add(7)
4226 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4227 }
4228}
4229
4230fn read_delta_copy_value(
4231 delta: &[u8],
4232 cursor: &mut usize,
4233 command: u8,
4234 masks: &[u8],
4235) -> Result<u64> {
4236 let mut value = 0u64;
4237 for (shift, mask) in masks.iter().enumerate() {
4238 if command & mask != 0 {
4239 let Some(byte) = delta.get(*cursor).copied() else {
4240 return Err(GitError::InvalidObject(
4241 "truncated delta copy command".into(),
4242 ));
4243 };
4244 *cursor += 1;
4245 value |= u64::from(byte) << (shift * 8);
4246 }
4247 }
4248 Ok(value)
4249}
4250
4251thread_local! {
4252 static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
4253}
4254
4255fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
4256 DEFLATE.with(|cell| {
4257 let mut compressor = cell.borrow_mut();
4258 compressor.reset();
4259 out.reserve(zlib_compress_bound(body.len()));
4260 let status = compressor
4261 .compress_vec(body, out, FlushCompress::Finish)
4262 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
4263 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
4264 return Err(GitError::InvalidObject(
4265 "zlib compression did not finish pack entry".into(),
4266 ));
4267 }
4268 Ok(())
4269 })
4270}
4271
4272fn zlib_compress_bound(len: usize) -> usize {
4273 len.saturating_add(len >> 12)
4274 .saturating_add(len >> 14)
4275 .saturating_add(len >> 25)
4276 .saturating_add(13)
4277}
4278
4279fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
4280 let type_code = match object_type {
4281 ObjectType::Commit => 1,
4282 ObjectType::Tree => 2,
4283 ObjectType::Blob => 3,
4284 ObjectType::Tag => 4,
4285 };
4286 write_pack_entry_header_kind(out, type_code, size);
4287}
4288
4289fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4290 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4291 size >>= 4;
4292 if size != 0 {
4293 byte |= 0x80;
4294 }
4295 out.push(byte);
4296 while size != 0 {
4297 let mut byte = (size as u8) & 0x7f;
4298 size >>= 7;
4299 if size != 0 {
4300 byte |= 0x80;
4301 }
4302 out.push(byte);
4303 }
4304}
4305
4306fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4307 if relative == 0 {
4308 return Err(GitError::InvalidFormat(
4309 "ofs-delta relative offset cannot be zero".into(),
4310 ));
4311 }
4312 let mut value = relative;
4313 let mut bytes = vec![(value & 0x7f) as u8];
4314 value >>= 7;
4315 while value != 0 {
4316 value -= 1;
4317 bytes.push(((value & 0x7f) as u8) | 0x80);
4318 value >>= 7;
4319 }
4320 bytes.reverse();
4321 out.extend_from_slice(&bytes);
4322 Ok(())
4323}
4324
4325fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4326 let Some(byte) = bytes.get(*offset).copied() else {
4327 return Err(GitError::InvalidFormat(
4328 "truncated pack entry header".into(),
4329 ));
4330 };
4331 *offset += 1;
4332 Ok(byte)
4333}
4334
4335fn u16_be(bytes: &[u8]) -> u16 {
4336 u16::from_be_bytes([bytes[0], bytes[1]])
4337}
4338
4339fn u32_be(bytes: &[u8]) -> u32 {
4340 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4341}
4342
4343fn u64_be(bytes: &[u8]) -> u64 {
4344 u64::from_be_bytes([
4345 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4346 ])
4347}
4348
4349fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4350 let mut fanout = [0u32; 256];
4351 let mut previous = 0u32;
4352 for slot in &mut fanout {
4353 *slot = u32_be(&bytes[*offset..*offset + 4]);
4354 if *slot < previous {
4355 return Err(GitError::InvalidFormat(
4356 "pack index fanout is not monotonic".into(),
4357 ));
4358 }
4359 previous = *slot;
4360 *offset += 4;
4361 }
4362 Ok(fanout)
4363}
4364
4365fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4366 let expected_min = if oid_bytes[0] == 0 {
4367 0
4368 } else {
4369 fanout[usize::from(oid_bytes[0] - 1)]
4370 };
4371 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4372 return Err(GitError::InvalidFormat(
4373 "pack index object id is outside its fanout bucket".into(),
4374 ));
4375 }
4376 Ok(())
4377}
4378
4379fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4380 if raw_offset & 0x8000_0000 == 0 {
4381 return Ok(u64::from(raw_offset));
4382 }
4383 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4384 let large_start = large_idx
4385 .checked_mul(8)
4386 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4387 let large_end = large_start
4388 .checked_add(8)
4389 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4390 if large_end > large_offset_table.len() {
4391 return Err(GitError::InvalidFormat(
4392 "pack index large offset points past table".into(),
4393 ));
4394 }
4395 Ok(u64_be(&large_offset_table[large_start..large_end]))
4396}
4397
4398fn checked_range(
4399 start: usize,
4400 count: usize,
4401 width: usize,
4402 total: usize,
4403) -> Result<std::ops::Range<usize>> {
4404 let len = count
4405 .checked_mul(width)
4406 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4407 let end = start
4408 .checked_add(len)
4409 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4410 if end > total {
4411 return Err(GitError::InvalidFormat("truncated pack index table".into()));
4412 }
4413 Ok(start..end)
4414}
4415
4416fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4417 let mut seen = vec![false; positions.len()];
4418 for position in positions {
4419 let idx = *position as usize;
4420 if idx >= positions.len() {
4421 return Err(GitError::InvalidFormat(
4422 "reverse index position points past object table".into(),
4423 ));
4424 }
4425 if seen[idx] {
4426 return Err(GitError::InvalidFormat(
4427 "reverse index position is duplicated".into(),
4428 ));
4429 }
4430 seen[idx] = true;
4431 }
4432 Ok(())
4433}
4434
4435fn parse_midx_pack_names(
4436 bytes: &[u8],
4437 chunks: &[MultiPackIndexChunk],
4438 pack_count: usize,
4439 version: u8,
4440) -> Result<Vec<String>> {
4441 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4442 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4443 let mut names = Vec::with_capacity(pack_count);
4444 let mut offset = 0usize;
4445 while names.len() < pack_count {
4446 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4447 return Err(GitError::InvalidFormat(
4448 "multi-pack-index PNAM entry is unterminated".into(),
4449 ));
4450 };
4451 let name_bytes = &data[offset..offset + relative_end];
4452 if name_bytes.is_empty() {
4453 return Err(GitError::InvalidFormat(
4454 "multi-pack-index PNAM entry is empty".into(),
4455 ));
4456 }
4457 let name = std::str::from_utf8(name_bytes)
4458 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4459 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4460 return Err(GitError::InvalidFormat(
4461 "multi-pack-index PNAM entry contains a path separator".into(),
4462 ));
4463 }
4464 names.push(name.to_string());
4465 offset += relative_end + 1;
4466 }
4467 let padding = &data[offset..];
4468 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4469 return Err(GitError::InvalidFormat(
4470 "multi-pack-index PNAM padding is invalid".into(),
4471 ));
4472 }
4473 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4474 return Err(GitError::InvalidFormat(
4475 "multi-pack-index v1 PNAM entries are not sorted".into(),
4476 ));
4477 }
4478 Ok(names)
4479}
4480
4481fn parse_midx_oid_fanout(
4482 bytes: &[u8],
4483 chunks: &[MultiPackIndexChunk],
4484) -> Result<([u32; 256], usize)> {
4485 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4486 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4487 if data.len() != 256 * 4 {
4488 return Err(GitError::InvalidFormat(
4489 "multi-pack-index OIDF chunk has invalid length".into(),
4490 ));
4491 }
4492 let mut fanout = [0u32; 256];
4493 let mut previous = 0u32;
4494 for (idx, slot) in fanout.iter_mut().enumerate() {
4495 let start = idx * 4;
4496 *slot = u32_be(&data[start..start + 4]);
4497 if *slot < previous {
4498 return Err(GitError::InvalidFormat(
4499 "multi-pack-index OIDF fanout is not monotonic".into(),
4500 ));
4501 }
4502 previous = *slot;
4503 }
4504 Ok((fanout, fanout[255] as usize))
4505}
4506
4507fn parse_midx_object_ids(
4508 bytes: &[u8],
4509 chunks: &[MultiPackIndexChunk],
4510 format: ObjectFormat,
4511 object_count: usize,
4512 fanout: &[u32; 256],
4513) -> Result<Vec<ObjectId>> {
4514 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4515 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4516 let expected_len = object_count
4517 .checked_mul(format.raw_len())
4518 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4519 if data.len() != expected_len {
4520 return Err(GitError::InvalidFormat(
4521 "multi-pack-index OIDL chunk has invalid length".into(),
4522 ));
4523 }
4524
4525 let mut ids = Vec::with_capacity(object_count);
4526 let mut counts = [0u32; 256];
4527 let mut previous_oid: Option<ObjectId> = None;
4528 for idx in 0..object_count {
4529 let start = idx * format.raw_len();
4530 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4531 if let Some(previous) = &previous_oid
4532 && previous.as_bytes() >= oid.as_bytes()
4533 {
4534 return Err(GitError::InvalidFormat(
4535 "multi-pack-index OIDL object ids are not strictly sorted".into(),
4536 ));
4537 }
4538 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4539 .checked_add(1)
4540 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4541 previous_oid = Some(oid);
4542 ids.push(oid);
4543 }
4544
4545 let mut running = 0u32;
4546 for (idx, count) in counts.iter().enumerate() {
4547 running = running
4548 .checked_add(*count)
4549 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4550 if fanout[idx] != running {
4551 return Err(GitError::InvalidFormat(
4552 "multi-pack-index OIDF fanout does not match OIDL".into(),
4553 ));
4554 }
4555 }
4556 Ok(ids)
4557}
4558
4559fn parse_midx_object_offsets(
4560 bytes: &[u8],
4561 chunks: &[MultiPackIndexChunk],
4562 object_ids: Vec<ObjectId>,
4563 pack_count: u32,
4564) -> Result<Vec<MultiPackIndexEntry>> {
4565 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4566 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4567 let expected_len = object_ids
4568 .len()
4569 .checked_mul(8)
4570 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4571 if data.len() != expected_len {
4572 return Err(GitError::InvalidFormat(
4573 "multi-pack-index OOFF chunk has invalid length".into(),
4574 ));
4575 }
4576 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4577 if let Some(large_offsets) = large_offsets
4578 && large_offsets.len() % 8 != 0
4579 {
4580 return Err(GitError::InvalidFormat(
4581 "multi-pack-index LOFF chunk has invalid length".into(),
4582 ));
4583 }
4584
4585 let mut entries = Vec::with_capacity(object_ids.len());
4586 for (idx, oid) in object_ids.into_iter().enumerate() {
4587 let start = idx * 8;
4588 let pack_int_id = u32_be(&data[start..start + 4]);
4589 if pack_int_id >= pack_count {
4590 return Err(GitError::InvalidFormat(
4591 "multi-pack-index object points past pack table".into(),
4592 ));
4593 }
4594 let raw_offset = u32_be(&data[start + 4..start + 8]);
4595 let offset = if raw_offset & 0x8000_0000 == 0 {
4596 u64::from(raw_offset)
4597 } else {
4598 let Some(large_offsets) = large_offsets else {
4599 return Err(GitError::InvalidFormat(
4600 "multi-pack-index large offset missing LOFF chunk".into(),
4601 ));
4602 };
4603 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4604 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4605 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4606 })?;
4607 let large_end = large_start.checked_add(8).ok_or_else(|| {
4608 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4609 })?;
4610 if large_end > large_offsets.len() {
4611 return Err(GitError::InvalidFormat(
4612 "multi-pack-index large offset points past LOFF chunk".into(),
4613 ));
4614 }
4615 u64_be(&large_offsets[large_start..large_end])
4616 };
4617 entries.push(MultiPackIndexEntry {
4618 oid,
4619 pack_int_id,
4620 offset,
4621 });
4622 }
4623 Ok(entries)
4624}
4625
4626fn parse_midx_reverse_index(
4627 bytes: &[u8],
4628 chunks: &[MultiPackIndexChunk],
4629 object_count: usize,
4630) -> Result<Option<Vec<u32>>> {
4631 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4632 return Ok(None);
4633 };
4634 let expected_len = object_count
4635 .checked_mul(4)
4636 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4637 if data.len() != expected_len {
4638 return Err(GitError::InvalidFormat(
4639 "multi-pack-index RIDX chunk has invalid length".into(),
4640 ));
4641 }
4642 let mut positions = Vec::with_capacity(object_count);
4643 for idx in 0..object_count {
4644 let start = idx * 4;
4645 positions.push(u32_be(&data[start..start + 4]));
4646 }
4647 validate_position_permutation(&positions)?;
4648 Ok(Some(positions))
4649}
4650
4651fn parse_midx_bitmapped_packs(
4652 bytes: &[u8],
4653 chunks: &[MultiPackIndexChunk],
4654 pack_count: usize,
4655 object_count: usize,
4656) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4657 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4658 return Ok(None);
4659 };
4660 let expected_len = pack_count
4661 .checked_mul(8)
4662 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4663 if data.len() != expected_len {
4664 return Err(GitError::InvalidFormat(
4665 "multi-pack-index BTMP chunk has invalid length".into(),
4666 ));
4667 }
4668 let mut entries = Vec::with_capacity(pack_count);
4669 for idx in 0..pack_count {
4670 let start = idx * 8;
4671 let bitmap_pos = u32_be(&data[start..start + 4]);
4672 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4673 let bitmap_end = u64::from(bitmap_pos)
4674 .checked_add(u64::from(bitmap_nr))
4675 .ok_or_else(|| {
4676 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4677 })?;
4678 if bitmap_end > object_count as u64 {
4679 return Err(GitError::InvalidFormat(
4680 "multi-pack-index BTMP range points past object table".into(),
4681 ));
4682 }
4683 entries.push(MultiPackBitmapPack {
4684 bitmap_pos,
4685 bitmap_nr,
4686 });
4687 }
4688 Ok(Some(entries))
4689}
4690
4691fn midx_chunk_data<'a>(
4692 bytes: &'a [u8],
4693 chunks: &[MultiPackIndexChunk],
4694 id: [u8; 4],
4695 required: bool,
4696) -> Result<Option<&'a [u8]>> {
4697 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4698 if required {
4699 return Err(GitError::InvalidFormat(format!(
4700 "multi-pack-index missing {} chunk",
4701 std::str::from_utf8(&id).unwrap_or("required")
4702 )));
4703 }
4704 return Ok(None);
4705 };
4706 let start = usize::try_from(chunk.offset)
4707 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4708 let len = usize::try_from(chunk.len)
4709 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4710 let end = start
4711 .checked_add(len)
4712 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4713 let Some(data) = bytes.get(start..end) else {
4714 return Err(GitError::InvalidFormat(
4715 "multi-pack-index chunk extends past file".into(),
4716 ));
4717 };
4718 Ok(Some(data))
4719}
4720
4721fn hash_function_id(format: ObjectFormat) -> u32 {
4722 match format {
4723 ObjectFormat::Sha1 => 1,
4724 ObjectFormat::Sha256 => 2,
4725 }
4726}
4727
4728const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4731
4732const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4735
4736const EWAH_ALL_ONES: u64 = u64::MAX;
4738
4739impl EwahBitmap {
4740 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4754 let required_words = bit_size.div_ceil(64) as usize;
4755 if required_words > words.len() {
4756 return Err(GitError::InvalidFormat(format!(
4757 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4758 words.len()
4759 )));
4760 }
4761 let significant = &words[..required_words];
4764 let mut builder = EwahBuilder::new(bit_size);
4765 for &word in significant {
4766 if word == 0 {
4767 builder.add_empty_words(false, 1);
4768 } else if word == EWAH_ALL_ONES {
4769 builder.add_empty_words(true, 1);
4770 } else {
4771 builder.add_literal(word);
4772 }
4773 }
4774 builder.finish()
4775 }
4776
4777 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4783 let word_count = bit_size.div_ceil(64) as usize;
4784 let mut words = vec![0u64; word_count];
4785 for &position in positions {
4786 if position >= bit_size {
4787 return Err(GitError::InvalidFormat(format!(
4788 "EWAH bit position {position} out of range for bit_size {bit_size}"
4789 )));
4790 }
4791 let word_index = (position / 64) as usize;
4792 let bit_index = position % 64;
4793 words[word_index] |= 1u64 << bit_index;
4794 }
4795 Self::from_words(bit_size, &words)
4796 }
4797
4798 pub fn empty() -> Self {
4801 Self {
4802 bit_size: 0,
4803 words: Vec::new(),
4804 rlw_position: 0,
4805 }
4806 }
4807
4808 pub fn to_words(&self) -> Result<Vec<u64>> {
4814 let mut out = Vec::new();
4815 let mut word_idx = 0usize;
4816 while word_idx < self.words.len() {
4817 let rlw = self.words[word_idx];
4818 let run_bit = rlw & 1;
4819 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4820 let literal_words = (rlw >> 33) as usize;
4821 word_idx += 1;
4822 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4823 for _ in 0..run_words {
4824 out.push(fill);
4825 }
4826 let literal_end = word_idx
4827 .checked_add(literal_words)
4828 .filter(|end| *end <= self.words.len())
4829 .ok_or_else(|| {
4830 GitError::InvalidFormat("EWAH literal words extend past word table".into())
4831 })?;
4832 out.extend_from_slice(&self.words[word_idx..literal_end]);
4833 word_idx = literal_end;
4834 }
4835 let required_words = (self.bit_size as usize).div_ceil(64);
4836 if out.len() < required_words {
4837 out.resize(required_words, 0);
4838 }
4839 out.truncate(required_words);
4840 Ok(out)
4841 }
4842
4843 pub fn to_positions(&self) -> Result<Vec<u32>> {
4845 let words = self.to_words()?;
4846 let mut positions = Vec::new();
4847 for (word_index, word) in words.iter().enumerate() {
4848 let mut remaining = *word;
4849 while remaining != 0 {
4850 let bit = remaining.trailing_zeros();
4851 let position = (word_index as u64) * 64 + u64::from(bit);
4852 if position < u64::from(self.bit_size) {
4853 positions.push(position as u32);
4855 }
4856 remaining &= remaining - 1;
4857 }
4858 }
4859 Ok(positions)
4860 }
4861
4862 pub fn to_bytes(&self) -> Vec<u8> {
4866 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4867 self.append_bytes(&mut out);
4868 out
4869 }
4870
4871 fn append_bytes(&self, out: &mut Vec<u8>) {
4872 out.extend_from_slice(&self.bit_size.to_be_bytes());
4873 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4874 for word in &self.words {
4875 out.extend_from_slice(&word.to_be_bytes());
4876 }
4877 out.extend_from_slice(&self.rlw_position.to_be_bytes());
4878 }
4879}
4880
4881struct EwahBuilder {
4889 bit_size: u32,
4890 words: Vec<u64>,
4891 rlw_position: usize,
4892}
4893
4894impl EwahBuilder {
4895 fn new(bit_size: u32) -> Self {
4896 Self {
4898 bit_size,
4899 words: vec![0u64],
4900 rlw_position: 0,
4901 }
4902 }
4903
4904 fn rlw(&self) -> u64 {
4905 self.words[self.rlw_position]
4906 }
4907
4908 fn set_rlw(&mut self, value: u64) {
4909 self.words[self.rlw_position] = value;
4910 }
4911
4912 fn rlw_running_len(&self) -> u64 {
4913 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
4914 }
4915
4916 fn rlw_running_bit(&self) -> bool {
4917 self.rlw() & 1 == 1
4918 }
4919
4920 fn rlw_literal_len(&self) -> u64 {
4921 self.rlw() >> 33
4922 }
4923
4924 fn set_running_bit(&mut self, bit: bool) {
4925 let mut value = self.rlw();
4926 value &= !1;
4927 value |= u64::from(bit);
4928 self.set_rlw(value);
4929 }
4930
4931 fn set_running_len(&mut self, len: u64) {
4932 let mut value = self.rlw();
4933 value &= !(EWAH_MAX_RUNNING_LEN << 1);
4934 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
4935 self.set_rlw(value);
4936 }
4937
4938 fn set_literal_len(&mut self, len: u64) {
4939 let mut value = self.rlw();
4940 value &= (1u64 << 33) - 1;
4941 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
4942 self.set_rlw(value);
4943 }
4944
4945 fn push_rlw(&mut self) {
4947 self.rlw_position = self.words.len();
4948 self.words.push(0);
4949 }
4950
4951 fn add_empty_words(&mut self, value: bool, mut number: u64) {
4959 while number > 0 {
4960 let can_extend = self.rlw_literal_len() == 0
4964 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
4965 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
4966 if !can_extend {
4967 self.push_rlw();
4968 }
4969 if self.rlw_running_len() == 0 {
4970 self.set_running_bit(value);
4971 }
4972 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
4973 let take = available.min(number);
4974 self.set_running_len(self.rlw_running_len() + take);
4975 number -= take;
4976 }
4977 }
4978
4979 fn add_literal(&mut self, word: u64) {
4982 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
4983 self.push_rlw();
4984 }
4985 let literal_len = self.rlw_literal_len();
4986 self.set_literal_len(literal_len + 1);
4987 self.words.push(word);
4988 }
4989
4990 fn finish(self) -> Result<EwahBitmap> {
4991 let rlw_position = u32::try_from(self.rlw_position)
4992 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
4993 if self.words.len() > u32::MAX as usize {
4994 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
4995 }
4996 Ok(EwahBitmap {
4997 bit_size: self.bit_size,
4998 words: self.words,
4999 rlw_position,
5000 })
5001 }
5002}
5003
5004#[derive(Debug, Clone)]
5017pub struct PackBitmapWriter {
5018 format: ObjectFormat,
5019 pack_checksum: ObjectId,
5020 object_count: u32,
5021 commit_positions: Vec<u32>,
5022 tree_positions: Vec<u32>,
5023 blob_positions: Vec<u32>,
5024 tag_positions: Vec<u32>,
5025 name_hash_cache: Option<Vec<u32>>,
5026 selected: Vec<SelectedCommit>,
5027}
5028
5029#[derive(Debug, Clone)]
5030struct SelectedCommit {
5031 commit_index_position: u32,
5035 flags: u8,
5036 reachable: Vec<u32>,
5037}
5038
5039impl PackBitmapWriter {
5040 pub const FLAG_NONE: u8 = 0;
5044
5045 pub fn new(
5052 format: ObjectFormat,
5053 pack_checksum: ObjectId,
5054 object_types: &[ObjectType],
5055 ) -> Result<Self> {
5056 if object_types.len() > u32::MAX as usize {
5057 return Err(GitError::InvalidFormat(
5058 "too many objects for a pack bitmap".into(),
5059 ));
5060 }
5061 if pack_checksum.format() != format {
5062 return Err(GitError::InvalidObjectId(
5063 "pack checksum format does not match bitmap format".into(),
5064 ));
5065 }
5066 let object_count = object_types.len() as u32;
5067 let mut commit_positions = Vec::new();
5068 let mut tree_positions = Vec::new();
5069 let mut blob_positions = Vec::new();
5070 let mut tag_positions = Vec::new();
5071 for (index, object_type) in object_types.iter().enumerate() {
5072 let position = index as u32;
5073 match object_type {
5074 ObjectType::Commit => commit_positions.push(position),
5075 ObjectType::Tree => tree_positions.push(position),
5076 ObjectType::Blob => blob_positions.push(position),
5077 ObjectType::Tag => tag_positions.push(position),
5078 }
5079 }
5080 Ok(Self {
5081 format,
5082 pack_checksum,
5083 object_count,
5084 commit_positions,
5085 tree_positions,
5086 blob_positions,
5087 tag_positions,
5088 name_hash_cache: None,
5089 selected: Vec::new(),
5090 })
5091 }
5092
5093 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5099 if cache.len() != self.object_count as usize {
5100 return Err(GitError::InvalidFormat(format!(
5101 "name hash cache has {} entries but pack has {} objects",
5102 cache.len(),
5103 self.object_count
5104 )));
5105 }
5106 self.name_hash_cache = Some(cache);
5107 Ok(self)
5108 }
5109
5110 pub fn add_commit(
5122 &mut self,
5123 commit_position: u32,
5124 commit_index_position: u32,
5125 reachable: &[u32],
5126 ) -> Result<()> {
5127 if commit_position >= self.object_count {
5128 return Err(GitError::InvalidFormat(format!(
5129 "commit position {commit_position} out of range for {} objects",
5130 self.object_count
5131 )));
5132 }
5133 if commit_index_position >= self.object_count {
5134 return Err(GitError::InvalidFormat(format!(
5135 "commit index position {commit_index_position} out of range for {} objects",
5136 self.object_count
5137 )));
5138 }
5139 if !self.commit_positions.contains(&commit_position) {
5140 return Err(GitError::InvalidFormat(format!(
5141 "bitmap commit position {commit_position} is not a commit object"
5142 )));
5143 }
5144 for &position in reachable {
5145 if position >= self.object_count {
5146 return Err(GitError::InvalidFormat(format!(
5147 "reachable position {position} out of range for {} objects",
5148 self.object_count
5149 )));
5150 }
5151 }
5152 let mut reachable = reachable.to_vec();
5153 reachable.push(commit_position);
5154 self.selected.push(SelectedCommit {
5155 commit_index_position,
5156 flags: Self::FLAG_NONE,
5157 reachable,
5158 });
5159 Ok(())
5160 }
5161
5162 pub fn build(&self) -> Result<PackBitmapIndex> {
5169 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5170 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5171 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5172 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5173
5174 let mut entries = Vec::with_capacity(self.selected.len());
5175 for selected in &self.selected {
5176 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5177 entries.push(PackBitmapEntry {
5178 object_position: selected.commit_index_position,
5179 xor_offset: 0,
5180 flags: selected.flags,
5181 bitmap,
5182 });
5183 }
5184
5185 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5186 if self.name_hash_cache.is_some() {
5187 options |= PackBitmapIndex::OPTION_HASH_CACHE;
5188 }
5189
5190 let placeholder_checksum = ObjectId::null(self.format);
5195 Ok(PackBitmapIndex {
5196 version: 1,
5197 format: self.format,
5198 options,
5199 pack_checksum: self.pack_checksum.clone(),
5200 index_checksum: placeholder_checksum,
5201 type_bitmaps: PackBitmapTypeBitmaps {
5202 commits,
5203 trees,
5204 blobs,
5205 tags,
5206 },
5207 entries,
5208 name_hash_cache: self.name_hash_cache.clone(),
5209 })
5210 }
5211
5212 pub fn write(&self) -> Result<Vec<u8>> {
5215 self.build()?.write()
5216 }
5217}
5218
5219impl PackBitmapIndex {
5220 pub fn write(&self) -> Result<Vec<u8>> {
5234 if self.version != 1 {
5235 return Err(GitError::Unsupported(format!(
5236 "bitmap index version {}",
5237 self.version
5238 )));
5239 }
5240 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
5241 if self.options & !known_options != 0 {
5242 return Err(GitError::Unsupported(format!(
5243 "bitmap index options {:#06x}",
5244 self.options & !known_options
5245 )));
5246 }
5247 if self.pack_checksum.format() != self.format {
5248 return Err(GitError::InvalidObjectId(
5249 "bitmap pack checksum format does not match index format".into(),
5250 ));
5251 }
5252 if self.entries.len() > u32::MAX as usize {
5253 return Err(GitError::InvalidFormat(
5254 "too many bitmap index entries".into(),
5255 ));
5256 }
5257 let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
5258 match (&self.name_hash_cache, want_cache) {
5259 (Some(_), false) => {
5260 return Err(GitError::InvalidFormat(
5261 "name hash cache present without OPTION_HASH_CACHE".into(),
5262 ));
5263 }
5264 (None, true) => {
5265 return Err(GitError::InvalidFormat(
5266 "OPTION_HASH_CACHE set without a name hash cache".into(),
5267 ));
5268 }
5269 _ => {}
5270 }
5271
5272 let mut out = Vec::new();
5273 out.extend_from_slice(b"BITM");
5274 out.extend_from_slice(&self.version.to_be_bytes());
5275 out.extend_from_slice(&self.options.to_be_bytes());
5276 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
5277 out.extend_from_slice(self.pack_checksum.as_bytes());
5278
5279 self.type_bitmaps.commits.append_bytes(&mut out);
5280 self.type_bitmaps.trees.append_bytes(&mut out);
5281 self.type_bitmaps.blobs.append_bytes(&mut out);
5282 self.type_bitmaps.tags.append_bytes(&mut out);
5283
5284 for (idx, entry) in self.entries.iter().enumerate() {
5285 if entry.xor_offset as usize > idx {
5286 return Err(GitError::InvalidFormat(
5287 "bitmap index entry has invalid XOR offset".into(),
5288 ));
5289 }
5290 out.extend_from_slice(&entry.object_position.to_be_bytes());
5291 out.push(entry.xor_offset);
5292 out.push(entry.flags);
5293 entry.bitmap.append_bytes(&mut out);
5294 }
5295
5296 if let Some(cache) = &self.name_hash_cache {
5297 for value in cache {
5298 out.extend_from_slice(&value.to_be_bytes());
5299 }
5300 }
5301
5302 let checksum = sley_core::digest_bytes(self.format, &out)?;
5303 out.extend_from_slice(checksum.as_bytes());
5304 Ok(out)
5305 }
5306}
5307
5308pub fn write_bitmap(
5317 format: ObjectFormat,
5318 pack_checksum: ObjectId,
5319 object_types: &[ObjectType],
5320 commits: &[(u32, u32, Vec<u32>)],
5321 name_hash_cache: Option<Vec<u32>>,
5322) -> Result<Vec<u8>> {
5323 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5324 if let Some(cache) = name_hash_cache {
5325 writer = writer.with_name_hash_cache(cache)?;
5326 }
5327 for (commit_position, commit_index_position, reachable) in commits {
5328 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5329 }
5330 writer.write()
5331}
5332
5333#[cfg(test)]
5334mod tests {
5335 use super::*;
5336 use flate2::Compression;
5337 use flate2::read::ZlibDecoder;
5338 use flate2::write::ZlibEncoder;
5339 use std::fs;
5340 use std::io::Read;
5341 use std::io::Write;
5342 use std::path::{Path, PathBuf};
5343 use std::process::Command;
5344 use std::time::{SystemTime, UNIX_EPOCH};
5345
5346 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5347 PackWriteOptions::new()
5348 .with_prefer_ofs_delta(prefer_ofs_delta)
5349 .with_reorder(false)
5350 }
5351
5352 #[test]
5353 fn parses_single_blob_pack() {
5354 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5355 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5356 assert_eq!(parsed.version, 2);
5357 assert_eq!(parsed.entries.len(), 1);
5358 let object = &parsed.entries[0].object;
5359 assert_eq!(object.object_type, ObjectType::Blob);
5360 assert_eq!(object.body, b"hello\n");
5361 assert_eq!(
5362 parsed.entries[0].entry.oid.to_hex(),
5363 "ce013625030ba8dba906f756967f9e9ca394464a"
5364 );
5365 }
5366
5367 #[test]
5368 fn parses_single_blob_pack_sha256() {
5369 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5370 let parsed =
5371 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5372 assert_eq!(parsed.version, 2);
5373 assert_eq!(parsed.entries.len(), 1);
5374 let object = &parsed.entries[0].object;
5375 assert_eq!(object.object_type, ObjectType::Blob);
5376 assert_eq!(object.body, b"hello\n");
5377 assert_eq!(
5378 parsed.entries[0].entry.oid,
5379 object
5380 .object_id(ObjectFormat::Sha256)
5381 .expect("test operation should succeed")
5382 );
5383 }
5384
5385 #[test]
5386 fn parses_bundle_pack_payload_with_bundle_format() {
5387 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5388 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5389 .expect("test operation should succeed");
5390 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5391 .into_bytes()
5392 .into_iter()
5393 .chain(pack)
5394 .collect::<Vec<_>>();
5395 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5396 .expect("test operation should succeed");
5397
5398 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5399 assert_eq!(parsed.entries.len(), 1);
5400 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5401 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5402 }
5403
5404 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5410 let mut pack = Vec::new();
5411 pack.extend_from_slice(b"PACK");
5412 pack.extend_from_slice(&2u32.to_be_bytes());
5413 pack.extend_from_slice(&1u32.to_be_bytes());
5414 write_pack_entry_header_kind(&mut pack, 3, declared_size);
5416 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5417 encoder
5418 .write_all(real_body)
5419 .expect("test operation should succeed");
5420 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5421 let checksum =
5422 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5423 pack.extend_from_slice(checksum.as_bytes());
5424 pack
5425 }
5426
5427 #[test]
5440 fn rejects_decompression_bomb_header_without_oom() {
5441 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5442 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5443 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5444 let result = handle.join();
5445 assert!(
5447 result.is_ok(),
5448 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5449 );
5450 let parse_result = result.expect("parse thread should not panic on a bomb header");
5452 assert!(
5453 parse_result.is_err(),
5454 "bomb header (declared={declared}) should be rejected as invalid"
5455 );
5456 }
5457 }
5458
5459 fn lying_result_size_delta_pack(
5466 format: ObjectFormat,
5467 declared_result_size: u64,
5468 delta_kind: DeltaKind,
5469 ) -> Vec<u8> {
5470 let base = b"hello";
5471 let result = b"hello world"; let mut delta = Vec::new();
5475 write_delta_varint(&mut delta, base.len() as u64);
5476 write_delta_varint(&mut delta, declared_result_size);
5477 let suffix = &result[base.len()..];
5479 delta.push(0x90); delta.push(base.len() as u8);
5481 delta.push(suffix.len() as u8);
5482 delta.extend_from_slice(suffix);
5483
5484 let mut pack = Vec::new();
5485 pack.extend_from_slice(b"PACK");
5486 pack.extend_from_slice(&2u32.to_be_bytes());
5487 pack.extend_from_slice(&2u32.to_be_bytes());
5488
5489 let base_offset = pack.len();
5490 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5491 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5492 encoder
5493 .write_all(base)
5494 .expect("test operation should succeed");
5495 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5496
5497 let delta_offset = pack.len();
5498 write_pack_entry_header_kind(
5499 &mut pack,
5500 match delta_kind {
5501 DeltaKind::Offset => 6,
5502 DeltaKind::Ref => 7,
5503 },
5504 delta.len() as u64,
5505 );
5506 match delta_kind {
5507 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5508 DeltaKind::Ref => {
5509 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5510 .expect("test operation should succeed");
5511 pack.extend_from_slice(base_oid.as_bytes());
5512 }
5513 }
5514 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5515 encoder
5516 .write_all(&delta)
5517 .expect("test operation should succeed");
5518 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5519
5520 let checksum =
5521 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5522 pack.extend_from_slice(checksum.as_bytes());
5523 pack
5524 }
5525
5526 #[test]
5536 fn rejects_delta_result_size_bomb_without_oom() {
5537 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5538 for &declared in bombs {
5539 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5540 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5541 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5542 let join_result = handle.join();
5543 assert!(
5544 join_result.is_ok(),
5545 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5546 instead of erroring cleanly"
5547 );
5548 let parse_result =
5549 join_result.expect("parse thread should not panic on a delta bomb");
5550 assert!(
5551 parse_result.is_err(),
5552 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5553 as invalid (result.len() != declared)"
5554 );
5555 }
5556 }
5557 }
5558
5559 #[test]
5563 fn applies_legitimate_delta_after_result_size_bound() {
5564 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5565 let base = b"hello";
5566 let result = b"hello world";
5567 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5568 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5569 assert_eq!(parsed.entries.len(), 2);
5570 assert_eq!(parsed.entries[0].object.body, base);
5571 assert_eq!(parsed.entries[1].object.body, result);
5572 }
5573 }
5574
5575 #[test]
5576 fn bounded_inflate_reserve_caps_attacker_declared_size() {
5577 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5579 assert_eq!(
5581 bounded_inflate_reserve(usize::MAX, usize::MAX),
5582 MAX_INFLATE_RESERVE
5583 );
5584 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5588 assert_eq!(bounded_inflate_reserve(0, 0), 64);
5590 }
5591
5592 #[test]
5593 fn rejects_bundle_pack_payload_with_wrong_object_format() {
5594 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5595 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5596 .expect("test operation should succeed");
5597 let bundle_bytes =
5598 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5599 .into_bytes()
5600 .into_iter()
5601 .chain(pack)
5602 .collect::<Vec<_>>();
5603 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5604 .expect("test operation should succeed");
5605
5606 assert!(PackFile::parse_bundle(&bundle).is_err());
5607 }
5608
5609 fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5610 let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5611 let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5612 let owned_view =
5613 PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5614 .expect("test operation should succeed");
5615
5616 assert_eq!(view.version, owned.version);
5617 assert_eq!(view.count, owned.entries.len());
5618 assert_eq!(view.count(), owned.entries.len());
5619 assert_eq!(view.fanout(), &owned.fanout);
5620 assert_eq!(view.pack_checksum, owned.pack_checksum);
5621 assert_eq!(view.index_checksum, owned.index_checksum);
5622 assert_eq!(owned_view.version, owned.version);
5623 assert_eq!(owned_view.count(), owned.entries.len());
5624 assert_eq!(owned_view.fanout(), &owned.fanout);
5625 assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5626 assert_eq!(owned_view.index_checksum, owned.index_checksum);
5627 for entry in &owned.entries {
5628 let owned_found = owned
5629 .find(&entry.oid)
5630 .expect("test operation should succeed");
5631 let expected = Some(PackIndexLookup {
5632 crc32: owned_found.crc32,
5633 offset: owned_found.offset,
5634 });
5635 assert_eq!(view.find(&entry.oid), expected);
5636 assert_eq!(owned_view.find(&entry.oid), expected);
5637 }
5638 }
5639
5640 #[test]
5641 fn writes_pack_and_index_that_round_trip() {
5642 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5643 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5644 .expect("test operation should succeed");
5645 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5646 let index =
5647 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5648 let oid = object
5649 .object_id(ObjectFormat::Sha1)
5650 .expect("test operation should succeed");
5651 assert_eq!(pack.entries[0].object, object);
5652 assert_eq!(index.pack_checksum, pack.checksum);
5653 assert_eq!(
5654 index
5655 .find(&oid)
5656 .expect("test operation should succeed")
5657 .offset,
5658 12
5659 );
5660 }
5661
5662 #[test]
5663 fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5664 let objects = (0..8)
5665 .map(|idx| {
5666 EncodedObject::new(
5667 ObjectType::Blob,
5668 format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5669 )
5670 })
5671 .collect::<Vec<_>>();
5672 let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5673 .expect("test operation should succeed");
5674
5675 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5676
5677 let view =
5678 PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5679 let missing = sley_core::object_id_for_bytes(
5680 ObjectFormat::Sha1,
5681 "blob",
5682 b"not present in borrowed index\n",
5683 )
5684 .expect("test operation should succeed");
5685 assert_eq!(view.find(&missing), None);
5686 }
5687
5688 #[test]
5689 fn writes_sha256_pack_and_index_that_round_trip() {
5690 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5691 let written =
5692 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5693 .expect("test operation should succeed");
5694 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5695 .expect("test operation should succeed");
5696 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5697 .expect("test operation should succeed");
5698 let oid = object
5699 .object_id(ObjectFormat::Sha256)
5700 .expect("test operation should succeed");
5701 assert_eq!(pack.entries[0].object, object);
5702 assert_eq!(index.pack_checksum, pack.checksum);
5703 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5704 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5705 assert_eq!(
5706 index
5707 .find(&oid)
5708 .expect("test operation should succeed")
5709 .offset,
5710 12
5711 );
5712 }
5713
5714 #[test]
5715 fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5716 let objects = (0..4)
5717 .map(|idx| {
5718 EncodedObject::new(
5719 ObjectType::Blob,
5720 format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5721 )
5722 })
5723 .collect::<Vec<_>>();
5724 let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5725 .expect("test operation should succeed");
5726
5727 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5728 }
5729
5730 #[test]
5731 fn indexes_existing_sha256_pack_bytes() {
5732 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5733 let written =
5734 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5735 .expect("test operation should succeed");
5736
5737 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5738 .expect("test operation should succeed");
5739 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5740 .expect("test operation should succeed");
5741
5742 assert_eq!(indexed.pack_checksum, written.checksum);
5743 assert_eq!(indexed.entries, written.entries);
5744 assert_eq!(index.pack_checksum, written.checksum);
5745 assert_eq!(index.entries, written.entries);
5746 }
5747
5748 #[test]
5749 fn indexes_existing_delta_pack_bytes() {
5750 let (base, changed) = similar_blob_objects();
5751 let options = delta_pack_options(true);
5752 let written = PackFile::write_packed_with_options(
5753 &[base, changed.clone()],
5754 ObjectFormat::Sha1,
5755 &options,
5756 )
5757 .expect("test operation should succeed");
5758
5759 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5760 .expect("test operation should succeed");
5761 let index =
5762 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5763 let changed_oid = changed
5764 .object_id(ObjectFormat::Sha1)
5765 .expect("test operation should succeed");
5766
5767 assert_eq!(indexed.pack_checksum, written.checksum);
5768 assert_eq!(indexed.entries, written.entries);
5769 assert_eq!(
5770 index
5771 .find(&changed_oid)
5772 .expect("test operation should succeed")
5773 .offset,
5774 written.entries[1].offset
5775 );
5776 assert_eq!(
5777 index
5778 .find(&changed_oid)
5779 .expect("test operation should succeed")
5780 .crc32,
5781 written.entries[1].crc32
5782 );
5783 }
5784
5785 #[test]
5786 fn writes_ref_delta_pack_and_index_that_round_trip() {
5787 let (base, changed) = similar_blob_objects();
5788 let options = delta_pack_options(false);
5789 let written = PackFile::write_packed_with_options(
5790 &[base.clone(), changed.clone()],
5791 ObjectFormat::Sha1,
5792 &options,
5793 )
5794 .expect("test operation should succeed");
5795 let mut second_offset = written.entries[1].offset as usize;
5796 let header = parse_entry_header(&written.pack, &mut second_offset)
5797 .expect("test operation should succeed");
5798 assert_eq!(header.kind, PackObjectKind::RefDelta);
5799
5800 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5801 let index =
5802 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5803 let oid = changed
5804 .object_id(ObjectFormat::Sha1)
5805 .expect("test operation should succeed");
5806 assert_eq!(pack.entries[0].object, base);
5807 assert_eq!(pack.entries[1].object, changed);
5808 assert_eq!(index.pack_checksum, pack.checksum);
5809 assert_eq!(
5810 index
5811 .find(&oid)
5812 .expect("test operation should succeed")
5813 .offset,
5814 written.entries[1].offset
5815 );
5816 }
5817
5818 #[test]
5819 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5820 let (base, changed) = similar_blob_objects();
5821 let options = delta_pack_options(true);
5822 let written = PackFile::write_packed_with_options(
5823 &[base, changed.clone()],
5824 ObjectFormat::Sha1,
5825 &options,
5826 )
5827 .expect("test operation should succeed");
5828 let mut second = written.entries[1].offset as usize;
5830 assert_eq!(
5831 parse_entry_header(&written.pack, &mut second)
5832 .expect("test operation should succeed")
5833 .kind,
5834 PackObjectKind::OfsDelta
5835 );
5836 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5838 for po in &parsed.entries {
5839 let got =
5840 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5841 Ok(None)
5842 })
5843 .expect("test operation should succeed");
5844 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5845 }
5846 }
5847
5848 #[derive(Default)]
5851 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5852
5853 impl HeaderTypeCache for MapHeaderTypeCache {
5854 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5855 self.0.get(&pack_offset).copied()
5856 }
5857 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5858 self.0.insert(pack_offset, header);
5859 }
5860 }
5861
5862 #[test]
5863 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5864 let (base, changed) = similar_blob_objects();
5865 let options = delta_pack_options(true);
5866 let written =
5867 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5868 .expect("test operation should succeed");
5869 let mut second = written.entries[1].offset as usize;
5871 assert_eq!(
5872 parse_entry_header(&written.pack, &mut second)
5873 .expect("test operation should succeed")
5874 .kind,
5875 PackObjectKind::OfsDelta
5876 );
5877
5878 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5879 let mut cache = MapHeaderTypeCache::default();
5880 for po in &parsed.entries {
5881 let uncached =
5882 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5883 Ok(None)
5884 })
5885 .expect("test operation should succeed");
5886 assert_eq!(
5888 uncached,
5889 (po.object.object_type, po.object.body.len() as u64),
5890 "uncached header at offset {}",
5891 po.entry.offset
5892 );
5893 let cold = read_object_header_at_with_cache(
5895 &written.pack,
5896 po.entry.offset,
5897 ObjectFormat::Sha1,
5898 |_| Ok(None),
5899 &mut cache,
5900 )
5901 .expect("test operation should succeed");
5902 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
5903 }
5904 for po in &parsed.entries {
5907 let warm = read_object_header_at_with_cache(
5908 &written.pack,
5909 po.entry.offset,
5910 ObjectFormat::Sha1,
5911 |_| panic!("warm cache must not re-walk the chain"),
5912 &mut cache,
5913 )
5914 .expect("test operation should succeed");
5915 assert_eq!(
5916 warm,
5917 (po.object.object_type, po.object.body.len() as u64),
5918 "warm cache at offset {}",
5919 po.entry.offset
5920 );
5921 }
5922 }
5923
5924 #[test]
5925 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
5926 let (base, changed) = similar_blob_objects();
5927 let options = delta_pack_options(false);
5928 let written = PackFile::write_packed_with_options(
5929 &[base, changed.clone()],
5930 ObjectFormat::Sha1,
5931 &options,
5932 )
5933 .expect("test operation should succeed");
5934 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5935 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
5936 .entries
5937 .iter()
5938 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
5939 .collect();
5940 for po in &parsed.entries {
5941 let got =
5942 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
5943 Ok(by_oid.get(oid).cloned())
5944 })
5945 .expect("test operation should succeed");
5946 assert_eq!(*got, po.object);
5947 }
5948 }
5949
5950 #[derive(Default)]
5954 struct CountingDeltaCache {
5955 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
5956 hits: std::cell::Cell<usize>,
5957 inserts: std::cell::Cell<usize>,
5958 }
5959
5960 impl PackDeltaCache for CountingDeltaCache {
5961 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
5962 let hit = self.map.borrow().get(&offset).cloned();
5963 if hit.is_some() {
5964 self.hits.set(self.hits.get() + 1);
5965 }
5966 hit
5967 }
5968 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
5969 self.inserts.set(self.inserts.get() + 1);
5970 self.map.borrow_mut().insert(offset, object);
5971 }
5972 }
5973
5974 #[test]
5975 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
5976 let mut objects = Vec::new();
5979 for idx in 0..8u32 {
5980 let mut body = vec![b'x'; 4096];
5981 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
5982 objects.push(EncodedObject::new(ObjectType::Blob, body));
5983 }
5984 let options = delta_pack_options(true);
5985 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5986 .expect("test operation should succeed");
5987 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5988
5989 let cache = CountingDeltaCache::default();
5990 for _ in 0..2 {
5993 for po in &parsed.entries {
5994 let got = read_object_at_with_cache_arc(
5995 &written.pack,
5996 po.entry.offset,
5997 ObjectFormat::Sha1,
5998 |_| Ok(None),
5999 &cache,
6000 )
6001 .expect("test operation should succeed");
6002 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6003 }
6004 }
6005 assert!(cache.hits.get() > 0, "cache never served a warm object");
6008 }
6009
6010 #[test]
6011 fn writes_ofs_delta_pack_and_index_that_round_trip() {
6012 let (base, changed) = similar_blob_objects();
6013 let options = delta_pack_options(true);
6014 let written = PackFile::write_packed_with_options(
6015 &[base.clone(), changed.clone()],
6016 ObjectFormat::Sha1,
6017 &options,
6018 )
6019 .expect("test operation should succeed");
6020 let mut second_offset = written.entries[1].offset as usize;
6021 let header = parse_entry_header(&written.pack, &mut second_offset)
6022 .expect("test operation should succeed");
6023 assert_eq!(header.kind, PackObjectKind::OfsDelta);
6024
6025 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6026 let index =
6027 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6028 let oid = changed
6029 .object_id(ObjectFormat::Sha1)
6030 .expect("test operation should succeed");
6031 assert_eq!(pack.entries[0].object, base);
6032 assert_eq!(pack.entries[1].object, changed);
6033 assert_eq!(index.pack_checksum, pack.checksum);
6034 assert_eq!(
6035 index
6036 .find(&oid)
6037 .expect("test operation should succeed")
6038 .offset,
6039 written.entries[1].offset
6040 );
6041 }
6042
6043 #[test]
6044 fn resolves_ofs_delta_pack_entry() {
6045 let base = b"hello";
6046 let result = b"hello world";
6047 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6048 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6049 assert_eq!(parsed.entries.len(), 2);
6050 assert_eq!(parsed.entries[0].object.body, base);
6051 assert_eq!(parsed.entries[1].object.body, result);
6052 assert_eq!(
6053 parsed.entries[1].entry.oid,
6054 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6055 .expect("test operation should succeed")
6056 );
6057 }
6058
6059 #[test]
6060 fn resolves_ref_delta_pack_entry() {
6061 let base = b"hello";
6062 let result = b"hello world";
6063 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6064 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6065 assert_eq!(parsed.entries.len(), 2);
6066 assert_eq!(parsed.entries[0].object.body, base);
6067 assert_eq!(parsed.entries[1].object.body, result);
6068 assert_eq!(
6069 parsed.entries[1].entry.oid,
6070 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6071 .expect("test operation should succeed")
6072 );
6073 }
6074
6075 #[test]
6076 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6077 let base = b"hello";
6078 let result = b"hello world";
6079 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6080 assert!(PackFile::parse_sha1(&pack).is_err());
6081
6082 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6083 .expect("test operation should succeed");
6084 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6085 if oid == &base_oid {
6086 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6087 } else {
6088 Ok(None)
6089 }
6090 })
6091 .expect("test operation should succeed");
6092 assert_eq!(parsed.entries.len(), 1);
6093 assert_eq!(parsed.entries[0].object.body, result);
6094 assert_eq!(
6095 parsed.entries[0].entry.oid,
6096 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6097 .expect("test operation should succeed")
6098 );
6099 }
6100
6101 #[test]
6102 fn rejects_bad_pack_checksum() {
6103 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6104 let last = pack.len() - 1;
6105 pack[last] ^= 1;
6106 assert!(PackFile::parse_sha1(&pack).is_err());
6107 }
6108
6109 #[test]
6110 fn raw_pack_index_rejects_bad_pack_checksum() {
6111 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6112 let last = pack.len() - 1;
6113 pack[last] ^= 1;
6114 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6115 }
6116
6117 #[test]
6118 fn pack_index_writer_rejects_duplicate_object_ids() {
6119 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6120 .expect("test operation should succeed");
6121 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6122 .expect("test operation should succeed");
6123 let entries = vec![
6124 PackIndexEntry {
6125 oid,
6126 crc32: 1,
6127 offset: 12,
6128 },
6129 PackIndexEntry {
6130 oid,
6131 crc32: 2,
6132 offset: 24,
6133 },
6134 ];
6135 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6136 }
6137
6138 #[test]
6139 fn parses_single_entry_pack_index() {
6140 let oid = ObjectId::from_hex(
6141 ObjectFormat::Sha1,
6142 "ce013625030ba8dba906f756967f9e9ca394464a",
6143 )
6144 .expect("test operation should succeed");
6145 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6146 .expect("test operation should succeed");
6147 let index = single_entry_index(
6148 ObjectFormat::Sha1,
6149 oid,
6150 0x1234_5678,
6151 12,
6152 pack_checksum.clone(),
6153 );
6154 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6155 assert_eq!(parsed.version, 2);
6156 assert_eq!(parsed.pack_checksum, pack_checksum);
6157 assert_eq!(parsed.entries.len(), 1);
6158 assert_eq!(
6159 parsed
6160 .find(&oid)
6161 .expect("test operation should succeed")
6162 .offset,
6163 12
6164 );
6165 assert_eq!(
6166 parsed
6167 .find(&oid)
6168 .expect("test operation should succeed")
6169 .crc32,
6170 0x1234_5678
6171 );
6172 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6173 }
6174
6175 #[test]
6176 fn parses_single_entry_pack_index_v1() {
6177 let oid = ObjectId::from_hex(
6178 ObjectFormat::Sha1,
6179 "ce013625030ba8dba906f756967f9e9ca394464a",
6180 )
6181 .expect("test operation should succeed");
6182 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6183 .expect("test operation should succeed");
6184 let index =
6185 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6186 let parsed =
6187 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6188 assert_eq!(parsed.version, 1);
6189 assert_eq!(parsed.pack_checksum, pack_checksum);
6190 assert_eq!(parsed.entries.len(), 1);
6191 assert_eq!(
6192 parsed
6193 .find(&oid)
6194 .expect("test operation should succeed")
6195 .offset,
6196 0x1234_5678
6197 );
6198 assert_eq!(
6199 parsed
6200 .find(&oid)
6201 .expect("test operation should succeed")
6202 .crc32,
6203 0
6204 );
6205 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6206 }
6207
6208 #[test]
6209 fn rejects_bad_pack_index_v1_checksum() {
6210 let oid = ObjectId::from_hex(
6211 ObjectFormat::Sha1,
6212 "ce013625030ba8dba906f756967f9e9ca394464a",
6213 )
6214 .expect("test operation should succeed");
6215 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6216 .expect("test operation should succeed");
6217 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
6218 let last = index.len() - 1;
6219 index[last] ^= 1;
6220 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
6221 }
6222
6223 #[test]
6224 fn pack_index_view_reads_v2_large_offsets() {
6225 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
6226 .expect("test operation should succeed");
6227 let second =
6228 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
6229 .expect("test operation should succeed");
6230 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6231 .expect("test operation should succeed");
6232 let entries = vec![
6233 PackIndexEntry {
6234 oid: first,
6235 crc32: 0x1111_2222,
6236 offset: 0x8000_0000,
6237 },
6238 PackIndexEntry {
6239 oid: second,
6240 crc32: 0x3333_4444,
6241 offset: 0x1_0000_0042,
6242 },
6243 ];
6244 let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
6245 .expect("test operation should succeed");
6246
6247 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6248 let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
6249 .expect("test operation should succeed");
6250 for entry in entries {
6251 assert_eq!(
6252 view.find(&entry.oid),
6253 Some(PackIndexLookup {
6254 crc32: entry.crc32,
6255 offset: entry.offset,
6256 })
6257 );
6258 }
6259 }
6260
6261 #[test]
6262 fn pack_index_view_default_parse_checks_index_checksum() {
6263 let oid = ObjectId::from_hex(
6264 ObjectFormat::Sha1,
6265 "ce013625030ba8dba906f756967f9e9ca394464a",
6266 )
6267 .expect("test operation should succeed");
6268 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6269 .expect("test operation should succeed");
6270 let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
6271 let last = index.len() - 1;
6272 index[last] ^= 1;
6273
6274 assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
6275 let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
6276 .expect("test operation should succeed");
6277 let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
6278 Arc::from(index.clone().into_boxed_slice()),
6279 ObjectFormat::Sha1,
6280 )
6281 .expect("test operation should succeed");
6282 assert_eq!(
6283 view.find(&oid),
6284 Some(PackIndexLookup {
6285 crc32: 0x1234_5678,
6286 offset: 12,
6287 })
6288 );
6289 assert_eq!(
6290 trusted_view.find(&oid),
6291 Some(PackIndexLookup {
6292 crc32: 0x1234_5678,
6293 offset: 12,
6294 })
6295 );
6296 }
6297
6298 #[test]
6299 fn parses_pack_reverse_index() {
6300 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6301 .expect("test operation should succeed");
6302 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6303 .expect("test operation should succeed");
6304 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6305 .expect("test operation should succeed");
6306 assert_eq!(parsed.version, 1);
6307 assert_eq!(parsed.format, ObjectFormat::Sha1);
6308 assert_eq!(parsed.positions, vec![2, 0, 1]);
6309 assert_eq!(parsed.pack_checksum, pack_checksum);
6310 assert_eq!(
6311 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6312 .expect("test operation should succeed"),
6313 reverse_index
6314 );
6315 }
6316
6317 #[test]
6318 fn rejects_bad_pack_reverse_index_checksum() {
6319 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6320 .expect("test operation should succeed");
6321 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6322 .expect("test operation should succeed");
6323 let last = reverse_index.len() - 1;
6324 reverse_index[last] ^= 1;
6325 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6326 }
6327
6328 #[test]
6329 fn rejects_bad_pack_reverse_index_positions() {
6330 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6331 .expect("test operation should succeed");
6332 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6333 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6334 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6335 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6336 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6337 .expect("test operation should succeed");
6338 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6339 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6340 }
6341
6342 #[test]
6343 fn parses_pack_mtimes() {
6344 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6345 .expect("test operation should succeed");
6346 let mtimes = PackMtimes::write(
6347 ObjectFormat::Sha1,
6348 &[1, 1_700_000_000, u32::MAX],
6349 &pack_checksum,
6350 )
6351 .expect("test operation should succeed");
6352 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6353 .expect("test operation should succeed");
6354 assert_eq!(parsed.version, 1);
6355 assert_eq!(parsed.format, ObjectFormat::Sha1);
6356 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6357 assert_eq!(parsed.pack_checksum, pack_checksum);
6358 assert_eq!(
6359 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6360 .expect("test operation should succeed"),
6361 mtimes
6362 );
6363 }
6364
6365 #[test]
6366 fn rejects_bad_pack_mtimes_checksum() {
6367 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6368 .expect("test operation should succeed");
6369 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6370 .expect("test operation should succeed");
6371 let last = mtimes.len() - 1;
6372 mtimes[last] ^= 1;
6373 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6374 }
6375
6376 #[test]
6377 fn rejects_bad_pack_mtimes_shape() {
6378 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6379 .expect("test operation should succeed");
6380 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6381 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6382
6383 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6384 wrong_hash[11] = 2;
6385 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6386 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6387 .expect("test operation should succeed");
6388 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6389 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6390 }
6391
6392 #[test]
6393 fn parses_multi_pack_index_header_and_chunk_lookup() {
6394 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6395 .expect("test operation should succeed");
6396 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6397 .expect("test operation should succeed");
6398 let chunks = midx_chunks_with_pack_names(
6399 ObjectFormat::Sha1,
6400 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6401 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6402 );
6403 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6404 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6405 .expect("test operation should succeed");
6406 assert_eq!(parsed.version, 2);
6407 assert_eq!(parsed.format, ObjectFormat::Sha1);
6408 assert_eq!(parsed.pack_count, 2);
6409 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6410 assert_eq!(parsed.object_count, 2);
6411 assert_eq!(parsed.objects.len(), 2);
6412 assert_eq!(
6413 parsed
6414 .find(&first)
6415 .expect("test operation should succeed")
6416 .pack_int_id,
6417 0
6418 );
6419 assert_eq!(
6420 parsed
6421 .find(&first)
6422 .expect("test operation should succeed")
6423 .offset,
6424 12
6425 );
6426 assert_eq!(
6427 parsed
6428 .find(&second)
6429 .expect("test operation should succeed")
6430 .pack_int_id,
6431 1
6432 );
6433 assert_eq!(
6434 parsed
6435 .find(&second)
6436 .expect("test operation should succeed")
6437 .offset,
6438 0x1_0000_0000
6439 );
6440 assert_eq!(parsed.reverse_index, None);
6441 assert_eq!(parsed.bitmapped_packs, None);
6442 assert_eq!(parsed.chunks.len(), 5);
6443 assert_eq!(parsed.chunks[0].id, *b"PNAM");
6444 assert_eq!(parsed.chunks[0].offset, 84);
6445 assert_eq!(parsed.chunks[0].len, 24);
6446 assert_eq!(parsed.chunks[1].id, *b"OIDF");
6447 assert_eq!(parsed.chunks[1].offset, 108);
6448 assert_eq!(parsed.chunks[1].len, 1024);
6449 }
6450
6451 #[test]
6452 fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6453 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6454 .expect("test operation should succeed");
6455 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6456 .expect("test operation should succeed");
6457 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6458 .expect("test operation should succeed");
6459 let chunks = midx_chunks_with_pack_names(
6460 ObjectFormat::Sha1,
6461 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6462 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6463 );
6464 let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6465 let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6466 .expect("test operation should succeed");
6467
6468 assert!(lookup.contains(&first));
6469 assert!(lookup.contains(&second));
6470 assert!(!lookup.contains(&missing));
6471
6472 let first_entry = lookup
6473 .find(&first)
6474 .expect("test operation should succeed")
6475 .expect("object should be present");
6476 assert_eq!(
6477 lookup.pack_name(first_entry.pack_int_id),
6478 Some("pack-a.idx")
6479 );
6480 assert_eq!(first_entry.offset, 12);
6481
6482 let second_entry = lookup
6483 .find(&second)
6484 .expect("test operation should succeed")
6485 .expect("object should be present");
6486 assert_eq!(
6487 lookup.pack_name(second_entry.pack_int_id),
6488 Some("pack-b.idx")
6489 );
6490 assert_eq!(second_entry.offset, 0x1_0000_0000);
6491 assert!(
6492 lookup
6493 .find(&missing)
6494 .expect("test operation should succeed")
6495 .is_none()
6496 );
6497 }
6498
6499 #[test]
6500 fn rejects_bad_multi_pack_index_checksum() {
6501 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6502 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6503 let last = midx.len() - 1;
6504 midx[last] ^= 1;
6505 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6506 }
6507
6508 #[test]
6509 fn rejects_bad_multi_pack_index_shape() {
6510 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6511 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6512 wrong_hash[5] = 2;
6513 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6514 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6515 .expect("test operation should succeed");
6516 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6517 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6518
6519 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6520 missing_terminator[12] = b'B';
6521 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6522 let checksum =
6523 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6524 .expect("test operation should succeed");
6525 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6526 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6527
6528 let mut bad_offset = multi_pack_index(
6529 ObjectFormat::Sha1,
6530 2,
6531 0,
6532 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6533 );
6534 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6535 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6536 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6537 .expect("test operation should succeed");
6538 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6539 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6540 }
6541
6542 #[test]
6543 fn rejects_bad_multi_pack_index_pack_names() {
6544 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6545 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6546
6547 let too_few = multi_pack_index(
6548 ObjectFormat::Sha1,
6549 2,
6550 2,
6551 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6552 );
6553 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6554
6555 let bad_padding = multi_pack_index(
6556 ObjectFormat::Sha1,
6557 2,
6558 1,
6559 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6560 );
6561 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6562
6563 let unsorted_v1 = multi_pack_index(
6564 ObjectFormat::Sha1,
6565 1,
6566 2,
6567 &midx_chunks_with_pack_names(
6568 ObjectFormat::Sha1,
6569 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6570 &[],
6571 ),
6572 );
6573 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6574
6575 let unsorted_v2 = multi_pack_index(
6576 ObjectFormat::Sha1,
6577 2,
6578 2,
6579 &midx_chunks_with_pack_names(
6580 ObjectFormat::Sha1,
6581 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6582 &[],
6583 ),
6584 );
6585 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6586 .expect("test operation should succeed");
6587 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6588 }
6589
6590 #[test]
6591 fn rejects_bad_multi_pack_index_object_tables() {
6592 let oid_a = ObjectId::from_hex(
6593 ObjectFormat::Sha1,
6594 "1111111111111111111111111111111111111111",
6595 )
6596 .expect("test operation should succeed");
6597 let oid_b = ObjectId::from_hex(
6598 ObjectFormat::Sha1,
6599 "2222222222222222222222222222222222222222",
6600 )
6601 .expect("test operation should succeed");
6602
6603 let missing_oidf = multi_pack_index(
6604 ObjectFormat::Sha1,
6605 2,
6606 1,
6607 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6608 );
6609 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6610
6611 let bad_fanout = vec![
6612 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6613 (*b"OIDF", vec![0; 256 * 4]),
6614 (*b"OIDL", oid_a.as_bytes().to_vec()),
6615 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6616 ];
6617 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6618 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6619
6620 let mut unsorted = Vec::new();
6621 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6622 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6623 let mut oid_lookup = Vec::new();
6624 oid_lookup.extend_from_slice(oid_b.as_bytes());
6625 oid_lookup.extend_from_slice(oid_a.as_bytes());
6626 unsorted.push((*b"OIDL", oid_lookup));
6627 unsorted.push((
6628 *b"OOFF",
6629 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6630 ));
6631 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6632 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6633
6634 let bad_pack = multi_pack_index(
6635 ObjectFormat::Sha1,
6636 2,
6637 1,
6638 &midx_chunks_with_pack_names(
6639 ObjectFormat::Sha1,
6640 b"pack-a.idx\0\0".to_vec(),
6641 &[(oid_a.clone(), 1, 12)],
6642 ),
6643 );
6644 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6645
6646 let mut large_offsets = Vec::new();
6647 let missing_loff = vec![
6648 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6649 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6650 (*b"OIDL", oid_a.as_bytes().to_vec()),
6651 (
6652 *b"OOFF",
6653 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6654 ),
6655 ];
6656 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6657 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6658
6659 let mut bad_loff =
6660 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6661 bad_loff.push((*b"LOFF", vec![0]));
6662 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6663 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6664 }
6665
6666 #[test]
6667 fn parses_multi_pack_index_bitmap_chunks() {
6668 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6669 .expect("test operation should succeed");
6670 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6671 .expect("test operation should succeed");
6672 let mut chunks = midx_chunks_with_pack_names(
6673 ObjectFormat::Sha1,
6674 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6675 &[(first, 0, 12), (second, 1, 24)],
6676 );
6677 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6678 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6679 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6680
6681 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6682 .expect("test operation should succeed");
6683 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6684 assert_eq!(
6685 parsed.bitmapped_packs,
6686 Some(vec![
6687 MultiPackBitmapPack {
6688 bitmap_pos: 0,
6689 bitmap_nr: 1,
6690 },
6691 MultiPackBitmapPack {
6692 bitmap_pos: 1,
6693 bitmap_nr: 1,
6694 },
6695 ])
6696 );
6697 }
6698
6699 #[test]
6700 fn writes_multi_pack_index_that_round_trips() {
6701 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6702 .expect("test operation should succeed");
6703 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6704 .expect("test operation should succeed");
6705 let bytes = MultiPackIndex::write(
6706 ObjectFormat::Sha1,
6707 2,
6708 &["pack-b.idx".into(), "pack-a.idx".into()],
6709 &[
6710 MultiPackIndexEntry {
6711 oid: second.clone(),
6712 pack_int_id: 0,
6713 offset: 0x1_0000_0000,
6714 },
6715 MultiPackIndexEntry {
6716 oid: first.clone(),
6717 pack_int_id: 1,
6718 offset: 12,
6719 },
6720 ],
6721 )
6722 .expect("test operation should succeed");
6723
6724 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6725 .expect("test operation should succeed");
6726 assert_eq!(parsed.version, 2);
6727 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6728 assert_eq!(parsed.object_count, 2);
6729 assert_eq!(
6730 parsed
6731 .find(&first)
6732 .expect("test operation should succeed")
6733 .pack_int_id,
6734 1
6735 );
6736 assert_eq!(
6737 parsed
6738 .find(&first)
6739 .expect("test operation should succeed")
6740 .offset,
6741 12
6742 );
6743 assert_eq!(
6744 parsed
6745 .find(&second)
6746 .expect("test operation should succeed")
6747 .pack_int_id,
6748 0
6749 );
6750 assert_eq!(
6751 parsed
6752 .find(&second)
6753 .expect("test operation should succeed")
6754 .offset,
6755 0x1_0000_0000
6756 );
6757 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6758 }
6759
6760 #[test]
6761 fn write_multi_pack_index_rejects_invalid_inputs() {
6762 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6763 .expect("test operation should succeed");
6764 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6765 assert!(
6766 MultiPackIndex::write(
6767 ObjectFormat::Sha1,
6768 1,
6769 &["pack-b.idx".into(), "pack-a.idx".into()],
6770 &[],
6771 )
6772 .is_err()
6773 );
6774 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6775 assert!(
6776 MultiPackIndex::write(
6777 ObjectFormat::Sha1,
6778 2,
6779 &["pack-a.idx".into()],
6780 &[MultiPackIndexEntry {
6781 oid,
6782 pack_int_id: 1,
6783 offset: 12,
6784 }],
6785 )
6786 .is_err()
6787 );
6788 assert!(
6789 MultiPackIndex::write(
6790 ObjectFormat::Sha1,
6791 2,
6792 &["pack-a.idx".into()],
6793 &[
6794 MultiPackIndexEntry {
6795 oid,
6796 pack_int_id: 0,
6797 offset: 12,
6798 },
6799 MultiPackIndexEntry {
6800 oid,
6801 pack_int_id: 0,
6802 offset: 24,
6803 },
6804 ],
6805 )
6806 .is_err()
6807 );
6808 }
6809
6810 #[test]
6811 fn rejects_bad_multi_pack_index_bitmap_chunks() {
6812 let oid_a = ObjectId::from_hex(
6813 ObjectFormat::Sha1,
6814 "1111111111111111111111111111111111111111",
6815 )
6816 .expect("test operation should succeed");
6817 let oid_b = ObjectId::from_hex(
6818 ObjectFormat::Sha1,
6819 "2222222222222222222222222222222222222222",
6820 )
6821 .expect("test operation should succeed");
6822
6823 let mut duplicate_ridx = midx_chunks_with_pack_names(
6824 ObjectFormat::Sha1,
6825 b"pack-a.idx\0\0".to_vec(),
6826 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6827 );
6828 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6829 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6830 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6831
6832 let mut short_btmp = midx_chunks_with_pack_names(
6833 ObjectFormat::Sha1,
6834 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6835 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6836 );
6837 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6838 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6839 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6840
6841 let mut out_of_range_btmp = midx_chunks_with_pack_names(
6842 ObjectFormat::Sha1,
6843 b"pack-a.idx\0\0".to_vec(),
6844 &[(oid_a, 0, 12), (oid_b, 0, 24)],
6845 );
6846 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6847 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6848 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6849 }
6850
6851 #[test]
6852 fn parses_pack_bitmap_index_with_hash_cache() {
6853 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6854 .expect("test operation should succeed");
6855 let bitmap = pack_bitmap_index(
6856 ObjectFormat::Sha1,
6857 3,
6858 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6859 &pack_checksum,
6860 &[(2, 0, 1, &[0b101])],
6861 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6862 );
6863
6864 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6865 .expect("test operation should succeed");
6866 assert_eq!(parsed.version, 1);
6867 assert_eq!(parsed.format, ObjectFormat::Sha1);
6868 assert_eq!(
6869 parsed.options,
6870 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6871 );
6872 assert_eq!(parsed.pack_checksum, pack_checksum);
6873 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6874 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6875 assert_eq!(parsed.entries.len(), 1);
6876 let entry = parsed
6877 .entry_for_index_position(2)
6878 .expect("test operation should succeed");
6879 assert_eq!(entry.xor_offset, 0);
6880 assert_eq!(entry.flags, 1);
6881 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6882 assert_eq!(
6883 parsed.name_hash_cache,
6884 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6885 );
6886 }
6887
6888 #[test]
6889 fn parses_pack_bitmap_index_sha256() {
6890 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6891 .expect("test operation should succeed");
6892 let bitmap = pack_bitmap_index(
6893 ObjectFormat::Sha256,
6894 2,
6895 PackBitmapIndex::OPTION_FULL_DAG,
6896 &pack_checksum,
6897 &[(0, 0, 0, &[0b11])],
6898 None,
6899 );
6900
6901 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
6902 .expect("test operation should succeed");
6903 assert_eq!(parsed.version, 1);
6904 assert_eq!(parsed.format, ObjectFormat::Sha256);
6905 assert_eq!(parsed.pack_checksum, pack_checksum);
6906 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6907 assert_eq!(parsed.entries[0].object_position, 0);
6908 assert_eq!(parsed.name_hash_cache, None);
6909 }
6910
6911 #[test]
6912 fn parses_upstream_git_written_pack_bitmap_index() {
6913 let root = unique_temp_dir("git-pack-bitmap-upstream");
6914 fs::create_dir_all(&root).expect("test operation should succeed");
6915 {
6916 run_git_success(&root, &["init", "-q", "-b", "main"]);
6917 run_git_success(
6918 &root,
6919 &[
6920 "-c",
6921 "user.name=Example User",
6922 "-c",
6923 "user.email=example@example.invalid",
6924 "commit",
6925 "--allow-empty",
6926 "-q",
6927 "-m",
6928 "one",
6929 ],
6930 );
6931 run_git_success(
6932 &root,
6933 &[
6934 "-c",
6935 "user.name=Example User",
6936 "-c",
6937 "user.email=example@example.invalid",
6938 "commit",
6939 "--allow-empty",
6940 "-q",
6941 "-m",
6942 "two",
6943 ],
6944 );
6945 run_git_success(&root, &["repack", "-adb"]);
6946 let pack_dir = root.join(".git").join("objects").join("pack");
6947 let idx_path = single_path_with_extension(&pack_dir, "idx");
6948 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
6949 let index = PackIndex::parse(
6950 &fs::read(idx_path).expect("test operation should succeed"),
6951 ObjectFormat::Sha1,
6952 )
6953 .expect("test operation should succeed");
6954 let bitmap = PackBitmapIndex::parse(
6955 &fs::read(bitmap_path).expect("test operation should succeed"),
6956 ObjectFormat::Sha1,
6957 index.entries.len(),
6958 )
6959 .expect("test operation should succeed");
6960 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
6961 assert!(!bitmap.entries.is_empty());
6962 };
6963 let _ = fs::remove_dir_all(&root);
6964 }
6965
6966 #[test]
6967 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
6968 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6969 .expect("test operation should succeed");
6970 let bitmap = pack_bitmap_index(
6971 ObjectFormat::Sha1,
6972 1,
6973 PackBitmapIndex::OPTION_FULL_DAG,
6974 &pack_checksum,
6975 &[(0, 0, 0, &[1])],
6976 None,
6977 );
6978
6979 let mut bad_signature = bitmap.clone();
6980 bad_signature[0] = b'X';
6981 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
6982
6983 let mut bad_version = bitmap.clone();
6984 bad_version[5] = 2;
6985 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
6986 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
6987
6988 let mut bad_option = bitmap.clone();
6989 bad_option[7] = 0x20;
6990 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
6991 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
6992
6993 let mut bad_checksum = bitmap;
6994 let last = bad_checksum.len() - 1;
6995 bad_checksum[last] ^= 1;
6996 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
6997 }
6998
6999 #[test]
7000 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
7001 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7002 .expect("test operation should succeed");
7003 let bitmap = pack_bitmap_index(
7004 ObjectFormat::Sha1,
7005 2,
7006 PackBitmapIndex::OPTION_FULL_DAG,
7007 &pack_checksum,
7008 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7009 None,
7010 );
7011
7012 let mut truncated = bitmap.clone();
7013 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7014 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7015 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7016
7017 let mut out_of_range_position = pack_bitmap_index(
7018 ObjectFormat::Sha1,
7019 2,
7020 PackBitmapIndex::OPTION_FULL_DAG,
7021 &pack_checksum,
7022 &[(2, 0, 0, &[0b01])],
7023 None,
7024 );
7025 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7026 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7027 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7028
7029 let invalid_xor = pack_bitmap_index(
7030 ObjectFormat::Sha1,
7031 2,
7032 PackBitmapIndex::OPTION_FULL_DAG,
7033 &pack_checksum,
7034 &[(0, 1, 0, &[0b01])],
7035 None,
7036 );
7037 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7038 }
7039
7040 #[test]
7041 fn parses_single_entry_pack_index_sha256() {
7042 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7043 .expect("test operation should succeed");
7044 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7045 .expect("test operation should succeed");
7046 let index = single_entry_index(
7047 ObjectFormat::Sha256,
7048 oid,
7049 0x1234_5678,
7050 12,
7051 pack_checksum.clone(),
7052 );
7053 let parsed =
7054 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7055 assert_eq!(parsed.version, 2);
7056 assert_eq!(parsed.pack_checksum, pack_checksum);
7057 assert_eq!(parsed.entries.len(), 1);
7058 assert_eq!(
7059 parsed
7060 .find(&oid)
7061 .expect("test operation should succeed")
7062 .offset,
7063 12
7064 );
7065 assert_eq!(
7066 parsed
7067 .find(&oid)
7068 .expect("test operation should succeed")
7069 .crc32,
7070 0x1234_5678
7071 );
7072 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7073 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7074 }
7075
7076 #[test]
7077 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7078 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7079 }
7080
7081 #[test]
7082 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7083 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7084 }
7085
7086 #[test]
7087 fn write_packed_rejects_duplicate_objects() {
7088 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7089 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7090 }
7091
7092 #[test]
7093 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7094 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7095 let sha1 = object
7096 .object_id(ObjectFormat::Sha1)
7097 .expect("test operation should succeed");
7098 let sha256 = object
7099 .object_id(ObjectFormat::Sha256)
7100 .expect("test operation should succeed");
7101 let duplicate = [
7102 PackInput {
7103 oid: &sha1,
7104 object: &object,
7105 },
7106 PackInput {
7107 oid: &sha1,
7108 object: &object,
7109 },
7110 ];
7111 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7112
7113 let wrong_format = [PackInput {
7114 oid: &sha256,
7115 object: &object,
7116 }];
7117 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7118 }
7119
7120 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7121 let objects = similar_blob_family(8);
7122 let packed =
7123 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7124 let undeltified =
7125 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7126
7127 assert!(
7130 packed.pack.len() < undeltified.pack.len(),
7131 "expected delta pack ({}) smaller than undeltified pack ({})",
7132 packed.pack.len(),
7133 undeltified.pack.len()
7134 );
7135
7136 let kinds = pack_entry_kinds(&packed.pack, format);
7138 let delta_count = kinds
7139 .iter()
7140 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7141 .count();
7142 assert!(
7143 delta_count >= 1,
7144 "expected at least one delta entry, found kinds {kinds:?}"
7145 );
7146
7147 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7149 assert_eq!(parsed.entries.len(), objects.len());
7150 for object in &objects {
7151 let oid = object
7152 .object_id(format)
7153 .expect("test operation should succeed");
7154 let found = parsed
7155 .entries
7156 .iter()
7157 .find(|entry| entry.entry.oid == oid)
7158 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
7159 assert_eq!(&found.object, object, "object {oid} did not round-trip");
7160 }
7161
7162 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
7164 assert_eq!(index.pack_checksum, packed.checksum);
7165 for object in &objects {
7166 let oid = object
7167 .object_id(format)
7168 .expect("test operation should succeed");
7169 assert!(index.find(&oid).is_some(), "index missing {oid}");
7170 }
7171 }
7172
7173 #[test]
7174 fn write_packed_emits_ofs_delta_by_default() {
7175 let objects = similar_blob_family(6);
7176 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7177 .expect("test operation should succeed");
7178 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7179 assert!(
7180 kinds.contains(&PackObjectKind::OfsDelta),
7181 "expected an ofs-delta entry by default, found {kinds:?}"
7182 );
7183 assert!(
7184 !kinds.contains(&PackObjectKind::RefDelta),
7185 "default self-contained pack must not use ref-delta, found {kinds:?}"
7186 );
7187 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
7189 }
7190
7191 #[test]
7192 fn write_packed_can_emit_ref_delta() {
7193 let objects = similar_blob_family(6);
7194 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
7195 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7196 .expect("test operation should succeed");
7197 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7198 assert!(
7199 kinds.contains(&PackObjectKind::RefDelta),
7200 "expected a ref-delta entry, found {kinds:?}"
7201 );
7202 assert!(
7203 !kinds.contains(&PackObjectKind::OfsDelta),
7204 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
7205 );
7206
7207 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
7210 .expect("test operation should succeed");
7211 assert_eq!(parsed.entries.len(), objects.len());
7212 }
7213
7214 #[test]
7215 fn write_packed_bounds_delta_chain_depth() {
7216 let objects = incremental_blob_chain(20);
7220 let format = ObjectFormat::Sha1;
7221
7222 for max_depth in [1usize, 2, 5] {
7223 let options = PackWriteOptions::new()
7224 .with_window(20)
7225 .with_depth(max_depth);
7226 let packed = PackFile::write_packed_with_options(&objects, format, &options)
7227 .expect("test operation should succeed");
7228
7229 let depths = pack_entry_depths(&packed.pack, format);
7230 let observed = depths.iter().copied().max().unwrap_or(0);
7231 assert!(
7232 observed <= max_depth,
7233 "max chain depth {observed} exceeded bound {max_depth}"
7234 );
7235
7236 let parsed =
7238 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7239 for object in &objects {
7240 let oid = object
7241 .object_id(format)
7242 .expect("test operation should succeed");
7243 let found = parsed
7244 .entries
7245 .iter()
7246 .find(|entry| entry.entry.oid == oid)
7247 .expect("test operation should succeed");
7248 assert_eq!(&found.object, object);
7249 }
7250 }
7251 }
7252
7253 #[test]
7254 fn write_packed_depth_zero_stores_everything_undeltified() {
7255 let objects = similar_blob_family(5);
7256 let options = PackWriteOptions::new().with_depth(0);
7257 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7258 .expect("test operation should succeed");
7259 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7260 assert!(
7261 kinds
7262 .iter()
7263 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
7264 "depth 0 must disable deltas, found {kinds:?}"
7265 );
7266 }
7267
7268 #[test]
7269 fn write_thin_uses_external_base_and_round_trips_sha1() {
7270 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
7271 }
7272
7273 #[test]
7274 fn write_thin_uses_external_base_and_round_trips_sha256() {
7275 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
7276 }
7277
7278 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
7279 let base = blob_with_marker("EXTERNAL-BASE");
7282 let target = blob_with_marker("EXTERNAL-TARGET");
7283 let base_oid = base
7284 .object_id(format)
7285 .expect("test operation should succeed");
7286
7287 let mut external = HashMap::new();
7288 external.insert(base_oid, base.clone());
7289 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
7290 .expect("test operation should succeed");
7291
7292 let kinds = pack_entry_kinds(&packed.pack, format);
7294 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7295
7296 let mut offset = 12usize;
7298 let header =
7299 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7300 assert_eq!(header.kind, PackObjectKind::RefDelta);
7301 let referenced =
7302 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7303 .expect("test operation should succeed");
7304 assert_eq!(referenced, base_oid);
7305
7306 assert!(PackFile::parse(&packed.pack, format).is_err());
7308
7309 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7311 if oid == &base_oid {
7312 Ok(Some(base.clone()))
7313 } else {
7314 Ok(None)
7315 }
7316 })
7317 .expect("test operation should succeed");
7318 assert_eq!(parsed.entries.len(), 1);
7319 assert_eq!(parsed.entries[0].object, target);
7320 }
7321
7322 #[test]
7323 fn write_packed_preserves_distinct_objects_with_no_similarity() {
7324 let objects = vec![
7327 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7328 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7329 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7330 ];
7331 let format = ObjectFormat::Sha1;
7332 let packed =
7333 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7334 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7335 assert_eq!(parsed.entries.len(), objects.len());
7336 for object in &objects {
7337 let oid = object
7338 .object_id(format)
7339 .expect("test operation should succeed");
7340 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7341 }
7342 }
7343
7344 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7348 let mut common_head = Vec::new();
7349 for _ in 0..200 {
7350 common_head.extend_from_slice(b"shared header line for delta testing\n");
7351 }
7352 let mut common_tail = Vec::new();
7353 for _ in 0..200 {
7354 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7355 }
7356 (0..count)
7357 .map(|idx| {
7358 let mut body = common_head.clone();
7359 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7360 body.extend_from_slice(&common_tail);
7361 EncodedObject::new(ObjectType::Blob, body)
7362 })
7363 .collect()
7364 }
7365
7366 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7369 let mut body = Vec::new();
7370 for _ in 0..100 {
7371 body.extend_from_slice(b"baseline content shared across the whole chain\n");
7372 }
7373 let mut objects = Vec::with_capacity(count);
7374 for idx in 0..count {
7375 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7376 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7377 }
7378 objects
7379 }
7380
7381 fn blob_with_marker(marker: &str) -> EncodedObject {
7382 let mut body = Vec::new();
7383 for _ in 0..150 {
7384 body.extend_from_slice(b"common body shared between base and target\n");
7385 }
7386 body.extend_from_slice(marker.as_bytes());
7387 body.push(b'\n');
7388 for _ in 0..150 {
7389 body.extend_from_slice(b"more common body shared between objects\n");
7390 }
7391 EncodedObject::new(ObjectType::Blob, body)
7392 }
7393
7394 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7396 pack_entry_descriptors(pack, format)
7397 .into_iter()
7398 .map(|descriptor| descriptor.kind)
7399 .collect()
7400 }
7401
7402 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7406 let descriptors = pack_entry_descriptors(pack, format);
7407 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7408 let mut depths = Vec::with_capacity(descriptors.len());
7409 for descriptor in &descriptors {
7410 let depth = match &descriptor.base {
7411 EntryBase::None => 0,
7412 EntryBase::Offset(base_offset) => {
7413 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7414 }
7415 EntryBase::Ref => 1,
7419 };
7420 depth_by_offset.insert(descriptor.offset, depth);
7421 depths.push(depth);
7422 }
7423 depths
7424 }
7425
7426 struct EntryDescriptor {
7427 offset: u64,
7428 kind: PackObjectKind,
7429 base: EntryBase,
7430 }
7431
7432 enum EntryBase {
7433 None,
7434 Offset(u64),
7435 Ref,
7436 }
7437
7438 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7439 let trailer_offset = pack.len() - format.raw_len();
7440 let count = u32_be(&pack[8..12]) as usize;
7441 let mut offset = 12usize;
7442 let mut descriptors = Vec::with_capacity(count);
7443 for _ in 0..count {
7444 let entry_offset = offset as u64;
7445 let header =
7446 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7447 let base = match header.kind {
7448 PackObjectKind::OfsDelta => {
7449 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7450 .expect("test operation should succeed");
7451 EntryBase::Offset(base_offset)
7452 }
7453 PackObjectKind::RefDelta => {
7454 offset += format.raw_len();
7455 EntryBase::Ref
7456 }
7457 _ => EntryBase::None,
7458 };
7459 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7460 let mut body = Vec::new();
7461 decoder
7462 .read_to_end(&mut body)
7463 .expect("test operation should succeed");
7464 offset += decoder.total_in() as usize;
7465 descriptors.push(EntryDescriptor {
7466 offset: entry_offset,
7467 kind: header.kind,
7468 base,
7469 });
7470 }
7471 descriptors
7472 }
7473
7474 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7475 let mut base = Vec::new();
7476 for _ in 0..300 {
7477 base.extend_from_slice(b"common payload\n");
7478 }
7479 base.extend_from_slice(b"base\n");
7480 let mut changed = Vec::new();
7481 for _ in 0..300 {
7482 changed.extend_from_slice(b"common payload\n");
7483 }
7484 changed.extend_from_slice(b"changed\n");
7485 (
7486 EncodedObject::new(ObjectType::Blob, base),
7487 EncodedObject::new(ObjectType::Blob, changed),
7488 )
7489 }
7490
7491 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7492 let mut pack = Vec::new();
7493 pack.extend_from_slice(b"PACK");
7494 pack.extend_from_slice(&2u32.to_be_bytes());
7495 pack.extend_from_slice(&1u32.to_be_bytes());
7496 write_entry_header(&mut pack, object_type, body.len() as u64);
7497 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7498 encoder
7499 .write_all(body)
7500 .expect("test operation should succeed");
7501 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7502 let checksum =
7503 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7504 pack.extend_from_slice(checksum.as_bytes());
7505 pack
7506 }
7507
7508 #[derive(Clone, Copy, Debug)]
7509 enum DeltaKind {
7510 Offset,
7511 Ref,
7512 }
7513
7514 fn two_object_delta_pack(
7515 format: ObjectFormat,
7516 base: &[u8],
7517 result: &[u8],
7518 delta_kind: DeltaKind,
7519 ) -> Vec<u8> {
7520 let mut pack = Vec::new();
7521 pack.extend_from_slice(b"PACK");
7522 pack.extend_from_slice(&2u32.to_be_bytes());
7523 pack.extend_from_slice(&2u32.to_be_bytes());
7524
7525 let base_offset = pack.len();
7526 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7527 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7528 encoder
7529 .write_all(base)
7530 .expect("test operation should succeed");
7531 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7532
7533 let delta = append_suffix_delta(base, result);
7534 let delta_offset = pack.len();
7535 write_pack_entry_header_kind(
7536 &mut pack,
7537 match delta_kind {
7538 DeltaKind::Offset => 6,
7539 DeltaKind::Ref => 7,
7540 },
7541 delta.len() as u64,
7542 );
7543 match delta_kind {
7544 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7545 DeltaKind::Ref => {
7546 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7547 .expect("test operation should succeed");
7548 pack.extend_from_slice(base_oid.as_bytes());
7549 }
7550 }
7551 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7552 encoder
7553 .write_all(&delta)
7554 .expect("test operation should succeed");
7555 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7556
7557 let checksum =
7558 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7559 pack.extend_from_slice(checksum.as_bytes());
7560 pack
7561 }
7562
7563 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7564 let mut pack = Vec::new();
7565 pack.extend_from_slice(b"PACK");
7566 pack.extend_from_slice(&2u32.to_be_bytes());
7567 pack.extend_from_slice(&1u32.to_be_bytes());
7568
7569 let delta = append_suffix_delta(base, result);
7570 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7571 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7572 .expect("test operation should succeed");
7573 pack.extend_from_slice(base_oid.as_bytes());
7574 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7575 encoder
7576 .write_all(&delta)
7577 .expect("test operation should succeed");
7578 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7579
7580 let checksum =
7581 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7582 pack.extend_from_slice(checksum.as_bytes());
7583 pack
7584 }
7585
7586 fn unique_temp_dir(name: &str) -> PathBuf {
7587 let nanos = SystemTime::now()
7588 .duration_since(UNIX_EPOCH)
7589 .expect("test operation should succeed")
7590 .as_nanos();
7591 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7592 }
7593
7594 fn run_git_success(cwd: &Path, args: &[&str]) {
7595 let output = Command::new("git")
7596 .current_dir(cwd)
7597 .args(args)
7598 .output()
7599 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7600 assert!(
7601 output.status.success(),
7602 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7603 output.status.code(),
7604 String::from_utf8_lossy(&output.stdout),
7605 String::from_utf8_lossy(&output.stderr)
7606 );
7607 }
7608
7609 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7610 let mut paths = fs::read_dir(dir)
7611 .expect("test operation should succeed")
7612 .map(|entry| entry.expect("test operation should succeed").path())
7613 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7614 .collect::<Vec<_>>();
7615 assert_eq!(paths.len(), 1, "expected one .{extension} file");
7616 paths.remove(0)
7617 }
7618
7619 fn pack_bitmap_index(
7620 format: ObjectFormat,
7621 object_count: u32,
7622 options: u16,
7623 pack_checksum: &ObjectId,
7624 entries: &[(u32, u8, u8, &[u64])],
7625 name_hash_cache: Option<&[u32]>,
7626 ) -> Vec<u8> {
7627 let mut out = Vec::new();
7628 out.extend_from_slice(b"BITM");
7629 out.extend_from_slice(&1u16.to_be_bytes());
7630 out.extend_from_slice(&options.to_be_bytes());
7631 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7632 out.extend_from_slice(pack_checksum.as_bytes());
7633 write_test_ewah(&mut out, object_count, &[0b001]);
7634 write_test_ewah(&mut out, object_count, &[0b010]);
7635 write_test_ewah(&mut out, object_count, &[0b100]);
7636 write_test_ewah(&mut out, object_count, &[0]);
7637 for (position, xor_offset, flags, words) in entries {
7638 out.extend_from_slice(&position.to_be_bytes());
7639 out.push(*xor_offset);
7640 out.push(*flags);
7641 write_test_ewah(&mut out, object_count, words);
7642 }
7643 if let Some(cache) = name_hash_cache {
7644 for value in cache {
7645 out.extend_from_slice(&value.to_be_bytes());
7646 }
7647 }
7648 let checksum =
7649 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7650 out.extend_from_slice(checksum.as_bytes());
7651 out
7652 }
7653
7654 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7655 out.extend_from_slice(&bit_size.to_be_bytes());
7656 let words = ewah_literal_words(literals);
7657 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7658 for word in words {
7659 out.extend_from_slice(&word.to_be_bytes());
7660 }
7661 out.extend_from_slice(&0u32.to_be_bytes());
7662 }
7663
7664 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7665 let rlw = (literals.len() as u64) << 33;
7666 let mut words = vec![rlw];
7667 words.extend_from_slice(literals);
7668 words
7669 }
7670
7671 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7672 let checksum_offset = bytes.len() - format.raw_len();
7673 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7674 .expect("test operation should succeed");
7675 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7676 }
7677
7678 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7679 assert!(result.starts_with(base));
7680 let suffix = &result[base.len()..];
7681 assert!(base.len() < 0x10000);
7682 assert!(suffix.len() < 0x80);
7683 let mut delta = Vec::new();
7684 write_delta_varint(&mut delta, base.len() as u64);
7685 write_delta_varint(&mut delta, result.len() as u64);
7686 delta.push(0x90);
7687 delta.push(base.len() as u8);
7688 delta.push(suffix.len() as u8);
7689 delta.extend_from_slice(suffix);
7690 delta
7691 }
7692
7693 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7694 loop {
7695 let mut byte = (value as u8) & 0x7f;
7696 value >>= 7;
7697 if value != 0 {
7698 byte |= 0x80;
7699 }
7700 out.push(byte);
7701 if value == 0 {
7702 break;
7703 }
7704 }
7705 }
7706
7707 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7708 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7709 size >>= 4;
7710 if size != 0 {
7711 byte |= 0x80;
7712 }
7713 out.push(byte);
7714 while size != 0 {
7715 let mut byte = (size as u8) & 0x7f;
7716 size >>= 7;
7717 if size != 0 {
7718 byte |= 0x80;
7719 }
7720 out.push(byte);
7721 }
7722 }
7723
7724 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7725 assert!(relative < 0x80);
7726 out.push(relative as u8);
7727 }
7728
7729 fn single_entry_index(
7730 format: ObjectFormat,
7731 oid: ObjectId,
7732 crc32: u32,
7733 offset: u32,
7734 pack_checksum: ObjectId,
7735 ) -> Vec<u8> {
7736 let mut index = Vec::new();
7737 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7738 index.extend_from_slice(&2u32.to_be_bytes());
7739 for idx in 0..256 {
7740 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7741 1u32
7742 } else {
7743 0u32
7744 };
7745 index.extend_from_slice(&count.to_be_bytes());
7746 }
7747 index.extend_from_slice(oid.as_bytes());
7748 index.extend_from_slice(&crc32.to_be_bytes());
7749 index.extend_from_slice(&offset.to_be_bytes());
7750 index.extend_from_slice(pack_checksum.as_bytes());
7751 let checksum =
7752 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7753 index.extend_from_slice(checksum.as_bytes());
7754 index
7755 }
7756
7757 fn single_entry_index_v1(
7758 format: ObjectFormat,
7759 oid: ObjectId,
7760 offset: u32,
7761 pack_checksum: ObjectId,
7762 ) -> Vec<u8> {
7763 let mut index = Vec::new();
7764 for idx in 0..256 {
7765 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7766 1u32
7767 } else {
7768 0u32
7769 };
7770 index.extend_from_slice(&count.to_be_bytes());
7771 }
7772 index.extend_from_slice(&offset.to_be_bytes());
7773 index.extend_from_slice(oid.as_bytes());
7774 index.extend_from_slice(pack_checksum.as_bytes());
7775 let checksum =
7776 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7777 index.extend_from_slice(checksum.as_bytes());
7778 index
7779 }
7780
7781 fn pack_reverse_index(
7782 format: ObjectFormat,
7783 positions: &[u32],
7784 pack_checksum: ObjectId,
7785 ) -> Vec<u8> {
7786 let mut reverse_index = Vec::new();
7787 reverse_index.extend_from_slice(b"RIDX");
7788 reverse_index.extend_from_slice(&1u32.to_be_bytes());
7789 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7790 for position in positions {
7791 reverse_index.extend_from_slice(&position.to_be_bytes());
7792 }
7793 reverse_index.extend_from_slice(pack_checksum.as_bytes());
7794 let checksum =
7795 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7796 reverse_index.extend_from_slice(checksum.as_bytes());
7797 reverse_index
7798 }
7799
7800 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7801 let mut out = Vec::new();
7802 out.extend_from_slice(b"MTME");
7803 out.extend_from_slice(&1u32.to_be_bytes());
7804 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7805 for mtime in mtimes {
7806 out.extend_from_slice(&mtime.to_be_bytes());
7807 }
7808 out.extend_from_slice(pack_checksum.as_bytes());
7809 let checksum =
7810 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7811 out.extend_from_slice(checksum.as_bytes());
7812 out
7813 }
7814
7815 fn midx_chunks_with_pack_names(
7816 _format: ObjectFormat,
7817 pack_names: Vec<u8>,
7818 entries: &[(ObjectId, u32, u64)],
7819 ) -> Vec<([u8; 4], Vec<u8>)> {
7820 let mut entries = entries.to_vec();
7821 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7822 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7823 let mut large_offsets = Vec::new();
7824 let mut chunks = vec![
7825 (*b"PNAM", pack_names),
7826 (*b"OIDF", midx_oid_fanout(&object_ids)),
7827 (*b"OIDL", midx_oid_lookup(&object_ids)),
7828 (
7829 *b"OOFF",
7830 midx_ooff_entries(
7831 &entries
7832 .iter()
7833 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7834 .collect::<Vec<_>>(),
7835 &mut large_offsets,
7836 ),
7837 ),
7838 ];
7839 if !large_offsets.is_empty() {
7840 chunks.push((*b"LOFF", large_offsets));
7841 }
7842 chunks
7843 }
7844
7845 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7846 let mut counts = [0u32; 256];
7847 for oid in object_ids {
7848 counts[oid.as_bytes()[0] as usize] += 1;
7849 }
7850 let mut running = 0u32;
7851 let mut out = Vec::new();
7852 for count in counts {
7853 running += count;
7854 out.extend_from_slice(&running.to_be_bytes());
7855 }
7856 out
7857 }
7858
7859 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7860 let mut out = Vec::new();
7861 for oid in object_ids {
7862 out.extend_from_slice(oid.as_bytes());
7863 }
7864 out
7865 }
7866
7867 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7868 let mut out = Vec::new();
7869 for (pack_int_id, offset) in entries {
7870 out.extend_from_slice(&pack_int_id.to_be_bytes());
7871 if *offset < 0x8000_0000 {
7872 out.extend_from_slice(&(*offset as u32).to_be_bytes());
7873 } else {
7874 let large_idx = (large_offsets.len() / 8) as u32;
7875 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7876 large_offsets.extend_from_slice(&offset.to_be_bytes());
7877 }
7878 }
7879 out
7880 }
7881
7882 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7883 let mut out = Vec::new();
7884 for value in values {
7885 out.extend_from_slice(&value.to_be_bytes());
7886 }
7887 out
7888 }
7889
7890 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
7891 let mut out = Vec::new();
7892 for (bitmap_pos, bitmap_nr) in entries {
7893 out.extend_from_slice(&bitmap_pos.to_be_bytes());
7894 out.extend_from_slice(&bitmap_nr.to_be_bytes());
7895 }
7896 out
7897 }
7898
7899 fn multi_pack_index(
7900 format: ObjectFormat,
7901 version: u8,
7902 pack_count: u32,
7903 chunks: &[([u8; 4], Vec<u8>)],
7904 ) -> Vec<u8> {
7905 let lookup_len = (chunks.len() + 1) * 12;
7906 let mut out = Vec::new();
7907 out.extend_from_slice(b"MIDX");
7908 out.push(version);
7909 out.push(hash_function_id(format) as u8);
7910 out.push(chunks.len() as u8);
7911 out.push(0);
7912 out.extend_from_slice(&pack_count.to_be_bytes());
7913 let mut chunk_offset = (12 + lookup_len) as u64;
7914 for (id, data) in chunks {
7915 out.extend_from_slice(id);
7916 out.extend_from_slice(&chunk_offset.to_be_bytes());
7917 chunk_offset += data.len() as u64;
7918 }
7919 out.extend_from_slice(&[0, 0, 0, 0]);
7920 out.extend_from_slice(&chunk_offset.to_be_bytes());
7921 for (_id, data) in chunks {
7922 out.extend_from_slice(data);
7923 }
7924 let checksum =
7925 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7926 out.extend_from_slice(checksum.as_bytes());
7927 out
7928 }
7929
7930 fn pack_checksum_sha1() -> ObjectId {
7933 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
7934 }
7935
7936 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
7937 let mut offset = 0usize;
7940 let checksum_offset = bytes.len();
7941 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
7942 .expect("test operation should succeed")
7943 }
7944
7945 #[test]
7946 fn ewah_encodes_single_literal_word_matching_helper() {
7947 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
7951 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
7952 assert_eq!(ewah.rlw_position, 0);
7953 assert_eq!(ewah.bit_size, 64);
7954 }
7955
7956 #[test]
7957 fn ewah_byte_layout_is_big_endian() {
7958 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
7959 .expect("test operation should succeed");
7960 let bytes = ewah.to_bytes();
7961 let mut expected = Vec::new();
7962 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
7966 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
7968 }
7969
7970 #[test]
7971 fn ewah_empty_bitmap_serialises_like_git() {
7972 let ewah = EwahBitmap::empty();
7973 let bytes = ewah.to_bytes();
7974 assert_eq!(bytes, vec![0u8; 12]);
7976 let parsed = parse_ewah_bytes(&bytes);
7978 assert_eq!(parsed, ewah);
7979 assert!(
7980 parsed
7981 .to_positions()
7982 .expect("test operation should succeed")
7983 .is_empty()
7984 );
7985 }
7986
7987 #[test]
7988 fn ewah_compresses_clean_zero_run() {
7989 let ewah =
7992 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
7993 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
7994 let rlw = ewah.words[0];
7995 assert_eq!(rlw & 1, 0, "run bit should be zero");
7996 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7997 assert_eq!(rlw >> 33, 1, "literal length should be 1");
7998 assert_eq!(ewah.words[1], 0b1);
7999 }
8000
8001 #[test]
8002 fn ewah_compresses_clean_ones_run() {
8003 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
8004 .expect("test operation should succeed");
8005 assert_eq!(ewah.words.len(), 1);
8007 let rlw = ewah.words[0];
8008 assert_eq!(rlw & 1, 1, "run bit should be one");
8009 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8010 assert_eq!(rlw >> 33, 0, "no literals");
8011 }
8012
8013 #[test]
8014 fn ewah_run_then_literal_then_run_roundtrips() {
8015 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8016 let bit_size = (words.len() * 64) as u32;
8017 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8018 assert_eq!(
8019 ewah.to_words().expect("test operation should succeed"),
8020 words
8021 );
8022 }
8023
8024 #[test]
8025 fn ewah_drops_trailing_clean_zero_words() {
8026 let words = vec![0b1, 0, 0, 0];
8029 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8030 assert_eq!(ewah.bit_size, 1);
8032 assert_eq!(
8033 ewah.to_words().expect("test operation should succeed"),
8034 vec![0b1]
8035 );
8036 }
8037
8038 #[test]
8039 fn ewah_from_positions_roundtrips_via_positions() {
8040 let positions = [0u32, 1, 63, 64, 65, 200, 511];
8041 let ewah =
8042 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8043 let mut decoded = ewah.to_positions().expect("test operation should succeed");
8044 decoded.sort_unstable();
8045 assert_eq!(decoded, positions);
8046 }
8047
8048 #[test]
8049 fn ewah_from_positions_dedupes_and_orders() {
8050 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8051 .expect("test operation should succeed");
8052 assert_eq!(
8053 ewah.to_positions().expect("test operation should succeed"),
8054 vec![5, 100]
8055 );
8056 }
8057
8058 #[test]
8059 fn ewah_huge_zero_run_spans_multiple_rlws() {
8060 let mut builder = EwahBuilder::new(0);
8065 builder.add_empty_words(false, 0xffff_ffff);
8066 builder.add_empty_words(false, 5);
8067 let ewah = builder.finish().expect("test operation should succeed");
8068 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8069 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8070 assert_eq!(ewah.words[1] & 1, 0);
8071 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8072 assert_eq!(ewah.rlw_position, 1);
8073 }
8074
8075 #[test]
8076 fn ewah_from_words_rejects_oversized_bit_size() {
8077 assert!(EwahBitmap::from_words(65, &[0]).is_err());
8079 }
8080
8081 #[test]
8082 fn ewah_from_positions_rejects_out_of_range() {
8083 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8084 }
8085
8086 #[test]
8087 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8088 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8091 let bit_size = (words.len() * 64) as u32;
8092 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8093 let bytes = ewah.to_bytes();
8094 let parsed = parse_ewah_bytes(&bytes);
8095 assert_eq!(parsed, ewah);
8096 assert_eq!(
8097 parsed.to_words().expect("test operation should succeed"),
8098 words
8099 );
8100 }
8101
8102 #[test]
8103 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8104 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8106 let bytes = write_bitmap(
8107 ObjectFormat::Sha1,
8108 pack_checksum_sha1(),
8109 &object_types,
8110 &[(0u32, 0u32, vec![1u32, 2u32])],
8111 None,
8112 )
8113 .expect("test operation should succeed");
8114 assert_eq!(&bytes[..4], b"BITM");
8115
8116 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8117 .expect("test operation should succeed");
8118 assert_eq!(parsed.version, 1);
8119 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8120 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8121 assert_eq!(
8122 parsed
8123 .type_bitmaps
8124 .commits
8125 .to_positions()
8126 .expect("test operation should succeed"),
8127 vec![0]
8128 );
8129 assert_eq!(
8130 parsed
8131 .type_bitmaps
8132 .trees
8133 .to_positions()
8134 .expect("test operation should succeed"),
8135 vec![1]
8136 );
8137 assert_eq!(
8138 parsed
8139 .type_bitmaps
8140 .blobs
8141 .to_positions()
8142 .expect("test operation should succeed"),
8143 vec![2]
8144 );
8145 assert!(
8146 parsed
8147 .type_bitmaps
8148 .tags
8149 .to_positions()
8150 .expect("test operation should succeed")
8151 .is_empty()
8152 );
8153 assert_eq!(parsed.entries.len(), 1);
8154 let entry = parsed
8155 .entry_for_index_position(0)
8156 .expect("test operation should succeed");
8157 assert_eq!(entry.xor_offset, 0);
8158 assert_eq!(entry.flags, 0);
8159 assert_eq!(
8160 entry
8161 .bitmap
8162 .to_positions()
8163 .expect("test operation should succeed"),
8164 vec![0, 1, 2]
8165 );
8166 assert_eq!(parsed.name_hash_cache, None);
8167 }
8168
8169 #[test]
8170 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
8171 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8172 .expect("test operation should succeed");
8173 let object_types = [ObjectType::Commit, ObjectType::Tree];
8174 let bytes = write_bitmap(
8175 ObjectFormat::Sha256,
8176 pack_checksum.clone(),
8177 &object_types,
8178 &[(0u32, 0u32, vec![1u32])],
8179 None,
8180 )
8181 .expect("test operation should succeed");
8182 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
8183 .expect("test operation should succeed");
8184 assert_eq!(parsed.format, ObjectFormat::Sha256);
8185 assert_eq!(parsed.pack_checksum, pack_checksum);
8186 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8187 assert_eq!(
8188 parsed.entries[0]
8189 .bitmap
8190 .to_positions()
8191 .expect("test operation should succeed"),
8192 vec![0, 1]
8193 );
8194 }
8195
8196 #[test]
8197 fn pack_bitmap_index_write_includes_name_hash_cache() {
8198 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8199 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
8200 let bytes = write_bitmap(
8201 ObjectFormat::Sha1,
8202 pack_checksum_sha1(),
8203 &object_types,
8204 &[(0u32, 0u32, vec![1u32, 2u32])],
8205 Some(cache.clone()),
8206 )
8207 .expect("test operation should succeed");
8208 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8209 .expect("test operation should succeed");
8210 assert_eq!(
8211 parsed.options,
8212 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8213 );
8214 assert_eq!(parsed.name_hash_cache, Some(cache));
8215 }
8216
8217 #[test]
8218 fn pack_bitmap_writer_supports_multiple_commits() {
8219 let object_types = [
8220 ObjectType::Commit,
8221 ObjectType::Commit,
8222 ObjectType::Tree,
8223 ObjectType::Blob,
8224 ];
8225 let mut writer =
8226 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8227 .expect("test operation should succeed");
8228 writer
8229 .add_commit(0, 0, &[2, 3])
8230 .expect("test operation should succeed");
8231 writer
8232 .add_commit(1, 1, &[2])
8233 .expect("test operation should succeed");
8234 let bytes = writer.write().expect("test operation should succeed");
8235 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
8236 .expect("test operation should succeed");
8237 assert_eq!(parsed.entries.len(), 2);
8238 assert_eq!(
8239 parsed
8240 .type_bitmaps
8241 .commits
8242 .to_positions()
8243 .expect("test operation should succeed"),
8244 vec![0, 1]
8245 );
8246 let first = parsed
8247 .entry_for_index_position(0)
8248 .expect("test operation should succeed");
8249 assert_eq!(
8250 first
8251 .bitmap
8252 .to_positions()
8253 .expect("test operation should succeed"),
8254 vec![0, 2, 3]
8255 );
8256 let second = parsed
8257 .entry_for_index_position(1)
8258 .expect("test operation should succeed");
8259 assert_eq!(
8260 second
8261 .bitmap
8262 .to_positions()
8263 .expect("test operation should succeed"),
8264 vec![1, 2]
8265 );
8266 }
8267
8268 #[test]
8269 fn pack_bitmap_index_recomputes_checksum_on_write() {
8270 let object_types = [ObjectType::Commit, ObjectType::Blob];
8273 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8274 .expect("test operation should succeed");
8275 let mut index = writer.build().expect("test operation should succeed");
8276 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
8278 index.entries.clear(); index.entries.push(PackBitmapEntry {
8280 object_position: 0,
8281 xor_offset: 0,
8282 flags: 0,
8283 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
8284 });
8285 let bytes = index.write().expect("test operation should succeed");
8286 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
8288 .expect("test operation should succeed");
8289 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
8290 }
8291
8292 #[test]
8293 fn pack_bitmap_writer_rejects_non_commit_selection() {
8294 let object_types = [ObjectType::Commit, ObjectType::Blob];
8295 let mut writer =
8296 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8297 .expect("test operation should succeed");
8298 assert!(writer.add_commit(1, 1, &[]).is_err());
8300 assert!(writer.add_commit(5, 5, &[]).is_err());
8302 assert!(writer.add_commit(0, 5, &[]).is_err());
8304 assert!(writer.add_commit(0, 0, &[9]).is_err());
8306 }
8307
8308 #[test]
8309 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8310 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8311 .expect("test operation should succeed");
8312 assert!(
8313 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8314 .is_err()
8315 );
8316 }
8317
8318 #[test]
8319 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8320 let writer = PackBitmapWriter::new(
8321 ObjectFormat::Sha1,
8322 pack_checksum_sha1(),
8323 &[ObjectType::Commit],
8324 )
8325 .expect("test operation should succeed");
8326 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8327 }
8328
8329 #[test]
8330 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8331 let mut index = PackBitmapWriter::new(
8332 ObjectFormat::Sha1,
8333 pack_checksum_sha1(),
8334 &[ObjectType::Commit],
8335 )
8336 .expect("test operation should succeed")
8337 .build()
8338 .expect("test operation should succeed");
8339 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8341 assert!(index.write().is_err());
8342 index.options = PackBitmapIndex::OPTION_FULL_DAG;
8344 index.name_hash_cache = Some(vec![0]);
8345 assert!(index.write().is_err());
8346 }
8347
8348 #[test]
8349 fn write_bitmap_roundtrips_through_upstream_git_parser() {
8350 let root = unique_temp_dir("git-pack-bitmap-writer");
8354 fs::create_dir_all(&root).expect("test operation should succeed");
8355 {
8356 run_git_success(&root, &["init", "-q", "-b", "main"]);
8357 run_git_success(
8358 &root,
8359 &[
8360 "-c",
8361 "user.name=Example User",
8362 "-c",
8363 "user.email=example@example.invalid",
8364 "commit",
8365 "--allow-empty",
8366 "-q",
8367 "-m",
8368 "one",
8369 ],
8370 );
8371 run_git_success(&root, &["repack", "-adb"]);
8372 let pack_dir = root.join(".git").join("objects").join("pack");
8373 let idx_path = single_path_with_extension(&pack_dir, "idx");
8374 let index = PackIndex::parse(
8375 &fs::read(idx_path).expect("test operation should succeed"),
8376 ObjectFormat::Sha1,
8377 )
8378 .expect("test operation should succeed");
8379 let pack_path = single_path_with_extension(&pack_dir, "pack");
8381 let pack =
8382 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8383 .expect("test operation should succeed");
8384 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8387 offsets.sort_unstable();
8388 let position_of = |offset: u64| -> u32 {
8389 offsets
8390 .iter()
8391 .position(|value| *value == offset)
8392 .expect("test operation should succeed") as u32
8393 };
8394 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8395 for entry in &index.entries {
8396 let position = position_of(entry.offset) as usize;
8397 if let Some(parsed) = pack
8399 .entries
8400 .iter()
8401 .find(|po| po.entry.offset == entry.offset)
8402 {
8403 object_types[position] = parsed.object.object_type;
8404 }
8405 }
8406 let commit_position = object_types
8408 .iter()
8409 .position(|ty| *ty == ObjectType::Commit)
8410 .expect("test operation should succeed") as u32;
8411 let commit_index_position = index
8413 .entries
8414 .iter()
8415 .position(|entry| position_of(entry.offset) == commit_position)
8416 .expect("test operation should succeed")
8417 as u32;
8418 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8419 let bytes = write_bitmap(
8420 ObjectFormat::Sha1,
8421 index.pack_checksum.clone(),
8422 &object_types,
8423 &[(commit_position, commit_index_position, reachable)],
8424 None,
8425 )
8426 .expect("test operation should succeed");
8427 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8428 .expect("test operation should succeed");
8429 assert_eq!(parsed.pack_checksum, index.pack_checksum);
8430 assert_eq!(parsed.entries.len(), 1);
8431 assert_eq!(
8432 parsed.entries[0]
8433 .bitmap
8434 .to_positions()
8435 .expect("test operation should succeed")
8436 .len(),
8437 index.entries.len()
8438 );
8439 };
8440 let _ = fs::remove_dir_all(&root);
8441 }
8442}