1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18 pub oid: ObjectId,
19 pub compressed_size: u64,
20 pub uncompressed_size: u64,
21 pub offset: u64,
22}
23
24pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55 pub window: usize,
58 pub depth: usize,
60 pub prefer_ofs_delta: bool,
64 pub thin_bases: HashMap<ObjectId, EncodedObject>,
69 pub reorder: bool,
75}
76
77impl Default for PackWriteOptions {
78 fn default() -> Self {
79 Self::new()
80 }
81}
82
83impl PackWriteOptions {
84 pub fn new() -> Self {
88 Self {
89 window: DEFAULT_PACK_WINDOW,
90 depth: DEFAULT_PACK_DEPTH,
91 prefer_ofs_delta: true,
92 thin_bases: HashMap::new(),
93 reorder: true,
94 }
95 }
96
97 pub fn with_window(mut self, window: usize) -> Self {
99 self.window = window;
100 self
101 }
102
103 pub fn with_depth(mut self, depth: usize) -> Self {
105 self.depth = depth;
106 self
107 }
108
109 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
112 self.prefer_ofs_delta = prefer_ofs_delta;
113 self
114 }
115
116 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
118 self.thin_bases = thin_bases;
119 self
120 }
121
122 pub fn with_reorder(mut self, reorder: bool) -> Self {
125 self.reorder = reorder;
126 self
127 }
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct RepackPolicy {
132 pub write_bitmaps: bool,
133 pub cruft_packs: bool,
134 pub geometric_factor: Option<u8>,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct PackFile {
139 pub version: u32,
140 pub entries: Vec<PackObject>,
141 pub checksum: ObjectId,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct PackObject {
146 pub entry: PackEntry,
147 pub object: EncodedObject,
148}
149
150#[derive(Debug, Clone, PartialEq, Eq)]
153pub struct PackVerifyStat {
154 pub oid: ObjectId,
156 pub object_type: ObjectType,
158 pub size: u64,
160 pub size_in_pack: u64,
163 pub offset: u64,
165 pub delta_depth: u32,
167 pub base_oid: Option<ObjectId>,
170}
171
172#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct PackVerifyStats {
176 pub objects: Vec<PackVerifyStat>,
177 pub checksum: ObjectId,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct PackWrite {
182 pub pack: Vec<u8>,
183 pub index: Vec<u8>,
184 pub checksum: ObjectId,
185 pub entries: Vec<PackIndexEntry>,
186}
187
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub struct PackInput<'a> {
190 pub oid: &'a ObjectId,
191 pub object: &'a EncodedObject,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct PackIndexBuild {
196 pub index: Vec<u8>,
197 pub pack_checksum: ObjectId,
198 pub entries: Vec<PackIndexEntry>,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct PackIndex {
203 pub version: u32,
204 pub fanout: [u32; 256],
205 pub entries: Vec<PackIndexEntry>,
206 pub pack_checksum: ObjectId,
207 pub index_checksum: ObjectId,
208}
209
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct PackIndexView<'a> {
212 pub version: u32,
213 pub count: usize,
214 pub fanout: [u32; 256],
215 pub pack_checksum: ObjectId,
216 pub index_checksum: ObjectId,
217 bytes: &'a [u8],
218 format: ObjectFormat,
219 tables: PackIndexViewTables,
220}
221
222pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
223 fn as_bytes(&self) -> &[u8];
224}
225
226impl<T> PackIndexByteSource for T
227where
228 T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
229{
230 fn as_bytes(&self) -> &[u8] {
231 self.as_ref()
232 }
233}
234
235#[derive(Debug)]
236struct SharedIndexBytes(Arc<[u8]>);
237
238impl PackIndexByteSource for SharedIndexBytes {
239 fn as_bytes(&self) -> &[u8] {
240 self.0.as_ref()
241 }
242}
243
244#[derive(Debug, Clone)]
245pub struct PackIndexViewData {
246 pub version: u32,
247 pub count: usize,
248 pub fanout: [u32; 256],
249 pub pack_checksum: ObjectId,
250 pub index_checksum: ObjectId,
251 bytes: Arc<dyn PackIndexByteSource>,
252 format: ObjectFormat,
253 tables: PackIndexViewTables,
254}
255
256#[derive(Debug, Clone, PartialEq, Eq)]
257pub struct PackIndexEntry {
258 pub oid: ObjectId,
259 pub crc32: u32,
260 pub offset: u64,
261}
262
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub struct PackIndexLookup {
265 pub crc32: u32,
266 pub offset: u64,
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
270enum PackIndexViewTables {
271 V1 {
272 entry_table: Range<usize>,
273 },
274 V2 {
275 oid_table: Range<usize>,
276 crc_table: Range<usize>,
277 small_offset_table: Range<usize>,
278 large_offset_table: Range<usize>,
279 },
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct PackReverseIndex {
284 pub version: u32,
285 pub format: ObjectFormat,
286 pub positions: Vec<u32>,
287 pub pack_checksum: ObjectId,
288 pub index_checksum: ObjectId,
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub struct PackMtimes {
293 pub version: u32,
294 pub format: ObjectFormat,
295 pub mtimes: Vec<u32>,
296 pub pack_checksum: ObjectId,
297 pub index_checksum: ObjectId,
298}
299
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct PackBitmapIndex {
302 pub version: u16,
303 pub format: ObjectFormat,
304 pub options: u16,
305 pub pack_checksum: ObjectId,
306 pub index_checksum: ObjectId,
307 pub type_bitmaps: PackBitmapTypeBitmaps,
308 pub entries: Vec<PackBitmapEntry>,
309 pub name_hash_cache: Option<Vec<u32>>,
310}
311
312#[derive(Debug, Clone, PartialEq, Eq)]
313pub struct PackBitmapTypeBitmaps {
314 pub commits: EwahBitmap,
315 pub trees: EwahBitmap,
316 pub blobs: EwahBitmap,
317 pub tags: EwahBitmap,
318}
319
320#[derive(Debug, Clone, PartialEq, Eq)]
321pub struct PackBitmapEntry {
322 pub object_position: u32,
327 pub xor_offset: u8,
328 pub flags: u8,
329 pub bitmap: EwahBitmap,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct EwahBitmap {
336 pub bit_size: u32,
337 pub words: Vec<u64>,
338 pub rlw_position: u32,
339}
340
341#[derive(Debug, Clone, PartialEq, Eq)]
342pub struct MultiPackIndex {
343 pub version: u8,
344 pub format: ObjectFormat,
345 pub pack_count: u32,
346 pub pack_names: Vec<String>,
347 pub object_count: u32,
348 pub fanout: [u32; 256],
349 pub objects: Vec<MultiPackIndexEntry>,
350 pub reverse_index: Option<Vec<u32>>,
351 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
352 pub chunks: Vec<MultiPackIndexChunk>,
353 pub checksum: ObjectId,
354}
355
356#[derive(Debug, Clone)]
357pub struct MultiPackIndexOidLookup {
358 format: ObjectFormat,
359 pack_count: u32,
360 pack_names: Vec<String>,
361 fanout: [u32; 256],
362 object_count: usize,
363 oid_lookup_offset: usize,
364 object_offsets_offset: usize,
365 large_offsets_offset: Option<usize>,
366 large_offsets_len: usize,
367 bytes: Arc<Vec<u8>>,
368}
369
370#[derive(Debug, Clone, PartialEq, Eq)]
371pub struct MultiPackIndexEntry {
372 pub oid: ObjectId,
373 pub pack_int_id: u32,
374 pub offset: u64,
375}
376
377#[derive(Debug, Clone, PartialEq, Eq)]
378pub struct MultiPackBitmapPack {
379 pub bitmap_pos: u32,
380 pub bitmap_nr: u32,
381}
382
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct MultiPackIndexChunk {
385 pub id: [u8; 4],
386 pub offset: u64,
387 pub len: u64,
388}
389
390#[derive(Debug, Clone, Copy, PartialEq, Eq)]
391enum PackObjectKind {
392 Commit,
393 Tree,
394 Blob,
395 Tag,
396 OfsDelta,
397 RefDelta,
398}
399
400#[derive(Debug, Clone, PartialEq, Eq)]
401enum ParsedPackEntry {
402 Resolved(PackObject),
403 Delta {
404 base: DeltaBase,
405 compressed_size: u64,
406 delta_size: u64,
407 offset: u64,
408 delta: Vec<u8>,
409 },
410}
411
412#[derive(Debug, Clone, PartialEq, Eq)]
413enum DeltaBase {
414 Offset(u64),
415 Ref(ObjectId),
416}
417
418struct OnDiskEntry {
422 offset: u64,
423 base: Option<DeltaBase>,
424 stream_size: u64,
425}
426
427impl PackFile {
428 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
429 Self::parse(bytes, ObjectFormat::Sha1)
430 }
431
432 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
433 Self::parse_with_base(bytes, format, |_| Ok(None))
434 }
435
436 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
437 Self::parse(&bundle.pack, bundle.format)
438 }
439
440 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
441 let PackIndexBuild {
442 index,
443 pack_checksum,
444 entries,
445 } = PackIndex::write_v2_for_pack(bytes, format)?;
446 Ok(PackWrite {
447 pack: bytes.to_vec(),
448 index,
449 checksum: pack_checksum,
450 entries,
451 })
452 }
453
454 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
455 where
456 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
457 {
458 Self::parse_with_base(bytes, format, external_base)
459 }
460
461 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
462 where
463 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
464 {
465 let trailer_len = format.raw_len();
466 if bytes.len() < 12 + trailer_len {
467 return Err(GitError::InvalidFormat("pack file too short".into()));
468 }
469 let trailer_offset = bytes.len() - trailer_len;
470 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
471 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
472 if checksum != expected {
473 return Err(GitError::InvalidFormat(format!(
474 "pack checksum mismatch: expected {expected}, got {checksum}"
475 )));
476 }
477
478 if &bytes[..4] != b"PACK" {
479 return Err(GitError::InvalidFormat("missing PACK signature".into()));
480 }
481 let version = u32_be(&bytes[4..8]);
482 if version != 2 && version != 3 {
483 return Err(GitError::Unsupported(format!("pack version {version}")));
484 }
485 let count = u32_be(&bytes[8..12]) as usize;
486 let mut offset = 12usize;
487 let mut entries = Vec::with_capacity(count);
488 for _ in 0..count {
489 let entry_offset = offset;
490 let header = parse_entry_header(bytes, &mut offset)?;
491 let base =
492 match header.kind {
493 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
494 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
495 )),
496 PackObjectKind::RefDelta => {
497 let hash_len = format.raw_len();
498 if offset + hash_len > trailer_offset {
499 return Err(GitError::InvalidFormat(
500 "truncated ref-delta base object id".into(),
501 ));
502 }
503 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
504 offset += hash_len;
505 Some(DeltaBase::Ref(oid))
506 }
507 _ => None,
508 };
509 let mut body = Vec::new();
510 let consumed = inflate_into(
511 &bytes[offset..trailer_offset],
512 &mut body,
513 header.size.min(usize::MAX as u64) as usize,
514 )?;
515 if body.len() as u64 != header.size {
516 return Err(GitError::InvalidObject(format!(
517 "pack object declared {} bytes, decoded {}",
518 header.size,
519 body.len()
520 )));
521 }
522 if consumed == 0 {
523 return Err(GitError::InvalidFormat(
524 "empty compressed pack entry".into(),
525 ));
526 }
527 offset = offset
528 .checked_add(consumed)
529 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
530 if offset > trailer_offset {
531 return Err(GitError::InvalidFormat(
532 "pack entry extends past checksum".into(),
533 ));
534 }
535 if let Some(base) = base {
536 entries.push(ParsedPackEntry::Delta {
537 base,
538 compressed_size: consumed as u64,
539 delta_size: header.size,
540 offset: entry_offset as u64,
541 delta: body,
542 });
543 } else {
544 let object_type = match header.kind {
545 PackObjectKind::Commit => ObjectType::Commit,
546 PackObjectKind::Tree => ObjectType::Tree,
547 PackObjectKind::Blob => ObjectType::Blob,
548 PackObjectKind::Tag => ObjectType::Tag,
549 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
550 };
551 let object = EncodedObject::new(object_type, body);
552 let oid = object.object_id(format)?;
553 entries.push(ParsedPackEntry::Resolved(PackObject {
554 entry: PackEntry {
555 oid,
556 compressed_size: consumed as u64,
557 uncompressed_size: header.size,
558 offset: entry_offset as u64,
559 },
560 object,
561 }));
562 }
563 }
564 if offset != trailer_offset {
565 return Err(GitError::InvalidFormat(format!(
566 "pack has {} trailing bytes before checksum",
567 trailer_offset - offset
568 )));
569 }
570 Ok(Self {
571 version,
572 entries: resolve_pack_entries(entries, format, &mut external_base)?,
573 checksum,
574 })
575 }
576
577 pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
588 let pack = Self::parse(bytes, format)?;
592
593 let trailer_len = format.raw_len();
597 let trailer_offset = bytes.len() - trailer_len;
598 let count = u32_be(&bytes[8..12]) as usize;
599 let mut offset = 12usize;
600 let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
606 for _ in 0..count {
607 let entry_offset = offset as u64;
608 let header = parse_entry_header(bytes, &mut offset)?;
609 let stream_size = header.size;
610 let base = match header.kind {
611 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
612 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
613 )),
614 PackObjectKind::RefDelta => {
615 let hash_len = format.raw_len();
616 if offset + hash_len > trailer_offset {
617 return Err(GitError::InvalidFormat(
618 "truncated ref-delta base object id".into(),
619 ));
620 }
621 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
622 offset += hash_len;
623 Some(DeltaBase::Ref(oid))
624 }
625 _ => None,
626 };
627 let mut body = Vec::new();
629 let consumed = inflate_into(
630 &bytes[offset..trailer_offset],
631 &mut body,
632 header.size.min(usize::MAX as u64) as usize,
633 )?;
634 offset = offset
635 .checked_add(consumed)
636 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
637 on_disk.push(OnDiskEntry {
638 offset: entry_offset,
639 base,
640 stream_size,
641 });
642 }
643
644 let mut resolved_by_offset: HashMap<u64, &PackObject> =
646 HashMap::with_capacity(pack.entries.len());
647 for object in &pack.entries {
648 resolved_by_offset.insert(object.entry.offset, object);
649 }
650 let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
652 for entry in &on_disk {
653 if let Some(object) = resolved_by_offset.get(&entry.offset) {
654 oid_by_offset.insert(entry.offset, object.entry.oid);
655 }
656 }
657 let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
659 for (idx, entry) in on_disk.iter().enumerate() {
660 index_by_offset.insert(entry.offset, idx);
661 }
662
663 let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
666 sorted_offsets.sort_unstable();
667 let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
668 for window in sorted_offsets.windows(2) {
669 next_offset.insert(window[0], window[1]);
670 }
671 if let Some(last) = sorted_offsets.last() {
672 next_offset.insert(*last, trailer_offset as u64);
673 }
674
675 let mut depth = vec![None; on_disk.len()];
681 fn resolve_depth(
682 idx: usize,
683 on_disk: &[OnDiskEntry],
684 index_by_offset: &HashMap<u64, usize>,
685 offset_of_oid: &HashMap<ObjectId, u64>,
686 depth: &mut [Option<u32>],
687 ) -> u32 {
688 if let Some(d) = depth[idx] {
689 return d;
690 }
691 let computed = match &on_disk[idx].base {
692 None => 0,
693 Some(base) => {
694 let base_idx = match base {
695 DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
696 DeltaBase::Ref(oid) => offset_of_oid
697 .get(oid)
698 .and_then(|off| index_by_offset.get(off).copied()),
699 };
700 match base_idx {
701 Some(bi) => {
702 resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
703 }
704 None => 1,
706 }
707 }
708 };
709 depth[idx] = Some(computed);
710 computed
711 }
712 let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
713 for (off, oid) in &oid_by_offset {
714 offset_of_oid.insert(*oid, *off);
715 }
716 for idx in 0..on_disk.len() {
717 resolve_depth(
718 idx,
719 &on_disk,
720 &index_by_offset,
721 &offset_of_oid,
722 &mut depth,
723 );
724 }
725
726 let mut stats = Vec::with_capacity(on_disk.len());
727 for (idx, entry) in on_disk.iter().enumerate() {
728 let off = entry.offset;
729 let object = resolved_by_offset.get(&off).ok_or_else(|| {
730 GitError::InvalidFormat("pack offset missing from resolved set".into())
731 })?;
732 let size_in_pack = next_offset
733 .get(&off)
734 .copied()
735 .unwrap_or(trailer_offset as u64)
736 .saturating_sub(off);
737 let base_oid = match &entry.base {
738 None => None,
739 Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
740 Some(DeltaBase::Ref(oid)) => Some(*oid),
741 };
742 stats.push(PackVerifyStat {
743 oid: object.entry.oid,
744 object_type: object.object.object_type,
745 size: entry.stream_size,
748 size_in_pack,
749 offset: off,
750 delta_depth: depth[idx].unwrap_or(0),
751 base_oid,
752 });
753 }
754 stats.sort_by_key(|stat| stat.offset);
756
757 Ok(PackVerifyStats {
758 objects: stats,
759 checksum: pack.checksum,
760 })
761 }
762
763 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
764 where
765 T: Borrow<EncodedObject>,
766 {
767 Self::write_undeltified(objects, ObjectFormat::Sha1)
768 }
769
770 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
776 where
777 T: Borrow<EncodedObject>,
778 {
779 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
780 Self::write_packed_impl(objects, format, &options)
781 }
782
783 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
792 where
793 T: Borrow<EncodedObject>,
794 {
795 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
796 }
797
798 pub fn write_packed_with_options<T>(
802 objects: &[T],
803 format: ObjectFormat,
804 options: &PackWriteOptions,
805 ) -> Result<PackWrite>
806 where
807 T: Borrow<EncodedObject>,
808 {
809 Self::write_packed_impl(objects, format, options)
810 }
811
812 pub fn write_packed_with_known_ids(
821 inputs: &[PackInput<'_>],
822 format: ObjectFormat,
823 ) -> Result<PackWrite> {
824 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
825 }
826
827 pub fn write_packed_with_known_ids_and_options(
830 inputs: &[PackInput<'_>],
831 format: ObjectFormat,
832 options: &PackWriteOptions,
833 ) -> Result<PackWrite> {
834 if inputs.len() > u32::MAX as usize {
835 return Err(GitError::InvalidFormat("too many pack objects".into()));
836 }
837 let mut objects = Vec::with_capacity(inputs.len());
838 let mut object_ids = Vec::with_capacity(inputs.len());
839 for input in inputs {
840 if input.oid.format() != format {
841 return Err(GitError::InvalidObjectId(format!(
842 "pack object id {} uses {}, pack uses {}",
843 input.oid,
844 input.oid.format().name(),
845 format.name()
846 )));
847 }
848 objects.push(input.object);
849 object_ids.push(*input.oid);
850 }
851 Self::write_packed_from_parts(objects, object_ids, format, options)
852 }
853
854 pub fn write_thin<T>(
863 objects: &[T],
864 format: ObjectFormat,
865 external_bases: HashMap<ObjectId, EncodedObject>,
866 ) -> Result<PackWrite>
867 where
868 T: Borrow<EncodedObject>,
869 {
870 let options = PackWriteOptions::new().with_thin_bases(external_bases);
871 Self::write_packed_impl(objects, format, &options)
872 }
873
874 fn write_packed_impl<T>(
875 objects: &[T],
876 format: ObjectFormat,
877 options: &PackWriteOptions,
878 ) -> Result<PackWrite>
879 where
880 T: Borrow<EncodedObject>,
881 {
882 if objects.len() > u32::MAX as usize {
883 return Err(GitError::InvalidFormat("too many pack objects".into()));
884 }
885 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
886
887 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
890 for object in &objects {
891 object_ids.push(object.object_id(format)?);
892 }
893 Self::write_packed_from_parts(objects, object_ids, format, options)
894 }
895
896 fn write_packed_from_parts(
897 objects: Vec<&EncodedObject>,
898 object_ids: Vec<ObjectId>,
899 format: ObjectFormat,
900 options: &PackWriteOptions,
901 ) -> Result<PackWrite> {
902 let mut seen = HashSet::with_capacity(object_ids.len());
903 for oid in &object_ids {
904 if !seen.insert(oid) {
905 return Err(GitError::InvalidFormat(format!(
906 "pack contains duplicate object id {oid}"
907 )));
908 }
909 }
910
911 for oid in options.thin_bases.keys() {
913 if oid.format() != format {
914 return Err(GitError::InvalidObjectId(
915 "thin pack base object id format does not match pack format".into(),
916 ));
917 }
918 }
919
920 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
926
927 let mut pack = Vec::new();
928 pack.extend_from_slice(b"PACK");
929 pack.extend_from_slice(&2u32.to_be_bytes());
930 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
931
932 let mut index_entries = Vec::with_capacity(objects.len());
933 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
936
937 let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
938
939 for (order_pos, &idx) in order.iter().enumerate() {
940 let offset = pack.len() as u64;
941 let mut entry_bytes = Vec::new();
942 match &plan[idx].base {
943 PlannedBase::None => {
944 write_entry_header(
945 &mut entry_bytes,
946 objects[idx].object_type,
947 objects[idx].body.len() as u64,
948 );
949 }
950 PlannedBase::InPack { base_idx, delta } => {
951 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
952 GitError::InvalidFormat(
953 "in-pack delta base emitted after dependent object".into(),
954 )
955 })?;
956 if options.prefer_ofs_delta {
957 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
958 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
959 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
960 })?;
961 write_ofs_delta_offset(&mut entry_bytes, relative)?;
962 } else {
963 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
964 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
965 }
966 }
967 PlannedBase::External { base_oid, delta } => {
968 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
969 entry_bytes.extend_from_slice(base_oid.as_bytes());
970 }
971 }
972 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
973 let crc32 = crc32fast::hash(&entry_bytes);
974 pack.extend_from_slice(&entry_bytes);
975 written_offsets[idx] = Some(offset);
976 index_entries.push(PackIndexEntry {
977 oid: object_ids[idx].clone(),
978 crc32,
979 offset,
980 });
981 }
982
983 let checksum = sley_core::digest_bytes(format, &pack)?;
984 pack.extend_from_slice(checksum.as_bytes());
985 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
986 Ok(PackWrite {
987 pack,
988 index,
989 checksum,
990 entries: index_entries,
991 })
992 }
993}
994
995impl<'a> PackIndexView<'a> {
996 pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
997 Self::parse(bytes, ObjectFormat::Sha1)
998 }
999
1000 pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1001 Self::parse_impl(bytes, format, true, true)
1002 }
1003
1004 pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1008 Self::parse_impl(bytes, format, false, true)
1009 }
1010
1011 pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1018 Self::parse_impl(bytes, format, false, false)
1019 }
1020
1021 pub fn count(&self) -> usize {
1022 self.count
1023 }
1024
1025 pub fn fanout(&self) -> &[u32; 256] {
1026 &self.fanout
1027 }
1028
1029 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1030 if oid.format() != self.format {
1031 return None;
1032 }
1033 let bucket = usize::from(oid.as_bytes()[0]);
1034 let mut start = if bucket == 0 {
1035 0
1036 } else {
1037 self.fanout[bucket - 1] as usize
1038 };
1039 let mut end = self.fanout[bucket] as usize;
1040 let target = oid.as_bytes();
1041
1042 while start < end {
1043 let mid = start + (end - start) / 2;
1044 match self.oid_bytes_at(mid).cmp(target) {
1045 std::cmp::Ordering::Less => start = mid + 1,
1046 std::cmp::Ordering::Equal => return self.lookup_at(mid),
1047 std::cmp::Ordering::Greater => end = mid,
1048 }
1049 }
1050 None
1051 }
1052
1053 fn parse_impl(
1054 bytes: &'a [u8],
1055 format: ObjectFormat,
1056 verify_checksum: bool,
1057 validate_entries: bool,
1058 ) -> Result<Self> {
1059 let hash_len = format.raw_len();
1060 if bytes.len() < 4 {
1061 return Err(GitError::InvalidFormat("pack index too short".into()));
1062 }
1063 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1064 return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1065 }
1066 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1067 return Err(GitError::InvalidFormat("pack index too short".into()));
1068 }
1069 let version = u32_be(&bytes[4..8]);
1070 if version != 2 {
1071 return Err(GitError::Unsupported(format!(
1072 "pack index version {version}"
1073 )));
1074 }
1075 let index_checksum_offset = bytes.len() - hash_len;
1076 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1077 if verify_checksum {
1078 let actual_index_checksum =
1079 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1080 if actual_index_checksum != index_checksum {
1081 return Err(GitError::InvalidFormat(format!(
1082 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1083 )));
1084 }
1085 }
1086
1087 let mut offset = 8usize;
1088 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1089 let count = fanout[255] as usize;
1090 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1091 offset = oid_table.end;
1092 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1093 offset = crc_table.end;
1094 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1095 offset = small_offset_table.end;
1096
1097 let large_offset_count = (0..count)
1098 .filter(|idx| {
1099 let start = small_offset_table.start + idx * 4;
1100 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1101 })
1102 .count();
1103 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1104 offset = large_offset_table.end;
1105
1106 let expected_trailer_offset = bytes.len() - hash_len * 2;
1107 if offset != expected_trailer_offset {
1108 return Err(GitError::InvalidFormat(format!(
1109 "pack index has {} unexpected bytes before trailer",
1110 expected_trailer_offset.saturating_sub(offset)
1111 )));
1112 }
1113 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1114
1115 let view = Self {
1116 version,
1117 count,
1118 fanout,
1119 pack_checksum,
1120 index_checksum,
1121 bytes,
1122 format,
1123 tables: PackIndexViewTables::V2 {
1124 oid_table,
1125 crc_table,
1126 small_offset_table,
1127 large_offset_table,
1128 },
1129 };
1130 if validate_entries {
1131 view.validate_v2_entries()?;
1132 }
1133 Ok(view)
1134 }
1135
1136 fn parse_v1_impl(
1137 bytes: &'a [u8],
1138 format: ObjectFormat,
1139 verify_checksum: bool,
1140 validate_entries: bool,
1141 ) -> Result<Self> {
1142 let hash_len = format.raw_len();
1143 if bytes.len() < 256 * 4 + 2 * hash_len {
1144 return Err(GitError::InvalidFormat("pack index too short".into()));
1145 }
1146 let index_checksum_offset = bytes.len() - hash_len;
1147 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1148 if verify_checksum {
1149 let actual_index_checksum =
1150 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1151 if actual_index_checksum != index_checksum {
1152 return Err(GitError::InvalidFormat(format!(
1153 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1154 )));
1155 }
1156 }
1157
1158 let mut offset = 0usize;
1159 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1160 let count = fanout[255] as usize;
1161 let entry_len = hash_len
1162 .checked_add(4)
1163 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1164 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1165 offset = entry_table.end;
1166 let expected_trailer_offset = bytes.len() - hash_len * 2;
1167 if offset != expected_trailer_offset {
1168 return Err(GitError::InvalidFormat(format!(
1169 "pack index has {} unexpected bytes before trailer",
1170 expected_trailer_offset.saturating_sub(offset)
1171 )));
1172 }
1173 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1174
1175 let view = Self {
1176 version: 1,
1177 count,
1178 fanout,
1179 pack_checksum,
1180 index_checksum,
1181 bytes,
1182 format,
1183 tables: PackIndexViewTables::V1 { entry_table },
1184 };
1185 if validate_entries {
1186 view.validate_v1_entries()?;
1187 }
1188 Ok(view)
1189 }
1190
1191 fn validate_v2_entries(&self) -> Result<()> {
1192 let PackIndexViewTables::V2 {
1193 oid_table,
1194 small_offset_table,
1195 large_offset_table,
1196 ..
1197 } = &self.tables
1198 else {
1199 unreachable!("v2 validation only runs for v2 views");
1200 };
1201 let oid_table = self.slice(oid_table.clone());
1202 let small_offset_table = self.slice(small_offset_table.clone());
1203 let large_offset_table = self.slice(large_offset_table.clone());
1204 let hash_len = self.format.raw_len();
1205 for idx in 0..self.count {
1206 let oid_start = idx * hash_len;
1207 let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1208 if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1209 return Err(GitError::InvalidFormat(
1210 "pack index object ids are not strictly ascending".into(),
1211 ));
1212 }
1213 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1214
1215 let offset_start = idx * 4;
1216 let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1217 pack_index_v2_offset(raw_offset, large_offset_table)?;
1218 }
1219 Ok(())
1220 }
1221
1222 fn validate_v1_entries(&self) -> Result<()> {
1223 let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1224 unreachable!("v1 validation only runs for v1 views");
1225 };
1226 let entry_table = self.slice(entry_table.clone());
1227 let hash_len = self.format.raw_len();
1228 let entry_len = hash_len
1229 .checked_add(4)
1230 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1231 for idx in 0..self.count {
1232 let start = idx * entry_len;
1233 let oid_start = start + 4;
1234 let oid_bytes = &entry_table[oid_start..start + entry_len];
1235 if idx > 0 {
1236 let previous_oid_start = oid_start - entry_len;
1237 let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1238 if previous_oid >= oid_bytes {
1239 return Err(GitError::InvalidFormat(
1240 "pack index object ids are not strictly sorted".into(),
1241 ));
1242 }
1243 }
1244 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1245 }
1246 Ok(())
1247 }
1248
1249 fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1250 let hash_len = self.format.raw_len();
1251 match &self.tables {
1252 PackIndexViewTables::V1 { entry_table } => {
1253 let entry_table = self.slice(entry_table.clone());
1254 let entry_len = hash_len + 4;
1255 let start = idx * entry_len + 4;
1256 &entry_table[start..start + hash_len]
1257 }
1258 PackIndexViewTables::V2 { oid_table, .. } => {
1259 let oid_table = self.slice(oid_table.clone());
1260 let start = idx * hash_len;
1261 &oid_table[start..start + hash_len]
1262 }
1263 }
1264 }
1265
1266 fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1267 if idx >= self.count {
1268 return None;
1269 }
1270 let hash_len = self.format.raw_len();
1271 match &self.tables {
1272 PackIndexViewTables::V1 { entry_table } => {
1273 let entry_table = self.slice(entry_table.clone());
1274 let entry_len = hash_len + 4;
1275 let start = idx * entry_len;
1276 Some(PackIndexLookup {
1277 crc32: 0,
1278 offset: u64::from(u32_be(&entry_table[start..start + 4])),
1279 })
1280 }
1281 PackIndexViewTables::V2 {
1282 crc_table,
1283 small_offset_table,
1284 large_offset_table,
1285 ..
1286 } => {
1287 let crc_table = self.slice(crc_table.clone());
1288 let small_offset_table = self.slice(small_offset_table.clone());
1289 let large_offset_table = self.slice(large_offset_table.clone());
1290 let crc_start = idx * 4;
1291 let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1292 Some(PackIndexLookup {
1293 crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1294 offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1295 })
1296 }
1297 }
1298 }
1299
1300 fn slice(&self, range: Range<usize>) -> &'a [u8] {
1301 &self.bytes[range]
1302 }
1303}
1304
1305impl PackIndexViewData {
1306 pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1307 Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1308 }
1309
1310 pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1314 Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1315 }
1316
1317 pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1320 Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1321 }
1322
1323 pub fn parse_source(
1324 bytes: Arc<dyn PackIndexByteSource>,
1325 format: ObjectFormat,
1326 ) -> Result<Self> {
1327 Self::parse_impl(bytes, format, true, true)
1328 }
1329
1330 pub fn parse_source_without_checksum(
1331 bytes: Arc<dyn PackIndexByteSource>,
1332 format: ObjectFormat,
1333 ) -> Result<Self> {
1334 Self::parse_impl(bytes, format, false, true)
1335 }
1336
1337 pub fn parse_trusted_source_without_checksum(
1338 bytes: Arc<dyn PackIndexByteSource>,
1339 format: ObjectFormat,
1340 ) -> Result<Self> {
1341 Self::parse_impl(bytes, format, false, false)
1342 }
1343
1344 pub fn count(&self) -> usize {
1345 self.count
1346 }
1347
1348 pub fn fanout(&self) -> &[u32; 256] {
1349 &self.fanout
1350 }
1351
1352 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1353 self.as_view().find(oid)
1354 }
1355
1356 pub fn as_view(&self) -> PackIndexView<'_> {
1357 PackIndexView {
1358 version: self.version,
1359 count: self.count,
1360 fanout: self.fanout,
1361 pack_checksum: self.pack_checksum,
1362 index_checksum: self.index_checksum,
1363 bytes: self.bytes.as_bytes(),
1364 format: self.format,
1365 tables: self.tables.clone(),
1366 }
1367 }
1368
1369 fn parse_impl(
1370 bytes: Arc<dyn PackIndexByteSource>,
1371 format: ObjectFormat,
1372 verify_checksum: bool,
1373 validate_entries: bool,
1374 ) -> Result<Self> {
1375 let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1376 let view = PackIndexView::parse_impl(
1377 bytes.as_bytes(),
1378 format,
1379 verify_checksum,
1380 validate_entries,
1381 )?;
1382 (
1383 view.version,
1384 view.count,
1385 view.fanout,
1386 view.pack_checksum,
1387 view.index_checksum,
1388 view.tables,
1389 )
1390 };
1391 Ok(Self {
1392 version,
1393 count,
1394 fanout,
1395 pack_checksum,
1396 index_checksum,
1397 bytes,
1398 format,
1399 tables,
1400 })
1401 }
1402}
1403
1404impl PackIndex {
1405 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1406 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1407 }
1408
1409 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1410 let trailer_len = format.raw_len();
1411 if pack_bytes.len() < 12 + trailer_len {
1412 return Err(GitError::InvalidFormat("pack file too short".into()));
1413 }
1414 let trailer_offset = pack_bytes.len() - trailer_len;
1415 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1416 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1417 if pack_checksum != expected {
1418 return Err(GitError::InvalidFormat(format!(
1419 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1420 )));
1421 }
1422
1423 if &pack_bytes[..4] != b"PACK" {
1424 return Err(GitError::InvalidFormat("missing PACK signature".into()));
1425 }
1426 let version = u32_be(&pack_bytes[4..8]);
1427 if version != 2 && version != 3 {
1428 return Err(GitError::Unsupported(format!("pack version {version}")));
1429 }
1430 let count = u32_be(&pack_bytes[8..12]) as usize;
1431 let mut offset = 12usize;
1432 let mut parsed_entries = Vec::with_capacity(count);
1433 let mut raw_entries = Vec::with_capacity(count);
1434 for _ in 0..count {
1435 let entry_offset = offset;
1436 let header = parse_entry_header(pack_bytes, &mut offset)?;
1437 let base = match header.kind {
1438 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1439 pack_bytes,
1440 &mut offset,
1441 entry_offset as u64,
1442 )?)),
1443 PackObjectKind::RefDelta => {
1444 let hash_len = format.raw_len();
1445 if offset + hash_len > trailer_offset {
1446 return Err(GitError::InvalidFormat(
1447 "truncated ref-delta base object id".into(),
1448 ));
1449 }
1450 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1451 offset += hash_len;
1452 Some(DeltaBase::Ref(oid))
1453 }
1454 _ => None,
1455 };
1456 let mut body = Vec::new();
1457 let consumed = inflate_into(
1458 &pack_bytes[offset..trailer_offset],
1459 &mut body,
1460 header.size.min(usize::MAX as u64) as usize,
1461 )?;
1462 if body.len() as u64 != header.size {
1463 return Err(GitError::InvalidObject(format!(
1464 "pack object declared {} bytes, decoded {}",
1465 header.size,
1466 body.len()
1467 )));
1468 }
1469 if consumed == 0 {
1470 return Err(GitError::InvalidFormat(
1471 "empty compressed pack entry".into(),
1472 ));
1473 }
1474 offset = offset
1475 .checked_add(consumed)
1476 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1477 if offset > trailer_offset {
1478 return Err(GitError::InvalidFormat(
1479 "pack entry extends past checksum".into(),
1480 ));
1481 }
1482 raw_entries.push((
1483 entry_offset as u64,
1484 crc32fast::hash(&pack_bytes[entry_offset..offset]),
1485 ));
1486 if let Some(base) = base {
1487 parsed_entries.push(ParsedPackEntry::Delta {
1488 base,
1489 compressed_size: consumed as u64,
1490 delta_size: header.size,
1491 offset: entry_offset as u64,
1492 delta: body,
1493 });
1494 } else {
1495 let object_type = match header.kind {
1496 PackObjectKind::Commit => ObjectType::Commit,
1497 PackObjectKind::Tree => ObjectType::Tree,
1498 PackObjectKind::Blob => ObjectType::Blob,
1499 PackObjectKind::Tag => ObjectType::Tag,
1500 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1501 };
1502 let object = EncodedObject::new(object_type, body);
1503 let oid = object.object_id(format)?;
1504 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1505 entry: PackEntry {
1506 oid,
1507 compressed_size: consumed as u64,
1508 uncompressed_size: header.size,
1509 offset: entry_offset as u64,
1510 },
1511 object,
1512 }));
1513 }
1514 }
1515 if offset != trailer_offset {
1516 return Err(GitError::InvalidFormat(format!(
1517 "pack has {} trailing bytes before checksum",
1518 trailer_offset - offset
1519 )));
1520 }
1521
1522 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1523 let entries = resolved
1524 .iter()
1525 .zip(raw_entries)
1526 .map(|(object, (offset, crc32))| PackIndexEntry {
1527 oid: object.entry.oid,
1528 crc32,
1529 offset,
1530 })
1531 .collect::<Vec<_>>();
1532 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1533 Ok(PackIndexBuild {
1534 index,
1535 pack_checksum,
1536 entries,
1537 })
1538 }
1539
1540 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1541 Self::parse(bytes, ObjectFormat::Sha1)
1542 }
1543
1544 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1545 let hash_len = format.raw_len();
1546 if bytes.len() < 4 {
1547 return Err(GitError::InvalidFormat("pack index too short".into()));
1548 }
1549 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1550 return Self::parse_v1(bytes, format);
1551 }
1552 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1553 return Err(GitError::InvalidFormat("pack index too short".into()));
1554 }
1555 let version = u32_be(&bytes[4..8]);
1556 if version != 2 {
1557 return Err(GitError::Unsupported(format!(
1558 "pack index version {version}"
1559 )));
1560 }
1561 let index_checksum_offset = bytes.len() - hash_len;
1562 let actual_index_checksum =
1563 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1564 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1565 if actual_index_checksum != index_checksum {
1566 return Err(GitError::InvalidFormat(format!(
1567 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1568 )));
1569 }
1570
1571 let mut offset = 8usize;
1572 let mut fanout = [0u32; 256];
1573 let mut previous = 0u32;
1574 for slot in &mut fanout {
1575 *slot = u32_be(&bytes[offset..offset + 4]);
1576 if *slot < previous {
1577 return Err(GitError::InvalidFormat(
1578 "pack index fanout is not monotonic".into(),
1579 ));
1580 }
1581 previous = *slot;
1582 offset += 4;
1583 }
1584 let count = fanout[255] as usize;
1585 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1586 offset = oid_table.end;
1587 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1588 offset = crc_table.end;
1589 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1590 offset = small_offset_table.end;
1591
1592 let large_offset_count = (0..count)
1593 .filter(|idx| {
1594 let start = small_offset_table.start + idx * 4;
1595 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1596 })
1597 .count();
1598 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1599 offset = large_offset_table.end;
1600
1601 let expected_trailer_offset = bytes.len() - hash_len * 2;
1602 if offset != expected_trailer_offset {
1603 return Err(GitError::InvalidFormat(format!(
1604 "pack index has {} unexpected bytes before trailer",
1605 expected_trailer_offset.saturating_sub(offset)
1606 )));
1607 }
1608 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1609
1610 let mut entries = Vec::with_capacity(count);
1611 for idx in 0..count {
1612 let oid_start = oid_table.start + idx * hash_len;
1613 let crc_start = crc_table.start + idx * 4;
1614 let offset_start = small_offset_table.start + idx * 4;
1615 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1616 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1620 return Err(GitError::InvalidFormat(
1621 "pack index object ids are not strictly ascending".into(),
1622 ));
1623 }
1624 let expected_min = if oid_bytes[0] == 0 {
1625 0
1626 } else {
1627 fanout[usize::from(oid_bytes[0] - 1)]
1628 };
1629 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1630 return Err(GitError::InvalidFormat(
1631 "pack index object id is outside its fanout bucket".into(),
1632 ));
1633 }
1634 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1635 let offset = if raw_offset & 0x8000_0000 == 0 {
1636 u64::from(raw_offset)
1637 } else {
1638 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1639 let large_start = large_offset_table.start + large_idx * 8;
1640 if large_idx >= large_offset_count {
1641 return Err(GitError::InvalidFormat(
1642 "pack index large offset points past table".into(),
1643 ));
1644 }
1645 u64_be(&bytes[large_start..large_start + 8])
1646 };
1647 entries.push(PackIndexEntry {
1648 oid: ObjectId::from_raw(format, oid_bytes)?,
1649 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1650 offset,
1651 });
1652 }
1653 Ok(Self {
1654 version,
1655 fanout,
1656 entries,
1657 pack_checksum,
1658 index_checksum,
1659 })
1660 }
1661
1662 fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1663 let hash_len = format.raw_len();
1664 if bytes.len() < 256 * 4 + 2 * hash_len {
1665 return Err(GitError::InvalidFormat("pack index too short".into()));
1666 }
1667 let index_checksum_offset = bytes.len() - hash_len;
1668 let actual_index_checksum =
1669 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1670 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1671 if actual_index_checksum != index_checksum {
1672 return Err(GitError::InvalidFormat(format!(
1673 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1674 )));
1675 }
1676
1677 let mut offset = 0usize;
1678 let mut fanout = [0u32; 256];
1679 let mut previous = 0u32;
1680 for slot in &mut fanout {
1681 *slot = u32_be(&bytes[offset..offset + 4]);
1682 if *slot < previous {
1683 return Err(GitError::InvalidFormat(
1684 "pack index fanout is not monotonic".into(),
1685 ));
1686 }
1687 previous = *slot;
1688 offset += 4;
1689 }
1690 let count = fanout[255] as usize;
1691 let entry_len = hash_len
1692 .checked_add(4)
1693 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1694 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1695 offset = entry_table.end;
1696 let expected_trailer_offset = bytes.len() - hash_len * 2;
1697 if offset != expected_trailer_offset {
1698 return Err(GitError::InvalidFormat(format!(
1699 "pack index has {} unexpected bytes before trailer",
1700 expected_trailer_offset.saturating_sub(offset)
1701 )));
1702 }
1703 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1704
1705 let mut entries = Vec::with_capacity(count);
1706 let mut previous_oid: Option<ObjectId> = None;
1707 for idx in 0..count {
1708 let start = entry_table.start + idx * entry_len;
1709 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1710 if let Some(previous) = &previous_oid
1711 && previous.as_bytes() >= oid.as_bytes()
1712 {
1713 return Err(GitError::InvalidFormat(
1714 "pack index object ids are not strictly sorted".into(),
1715 ));
1716 }
1717 previous_oid = Some(oid);
1718 entries.push(PackIndexEntry {
1719 oid,
1720 crc32: 0,
1721 offset: u64::from(u32_be(&bytes[start..start + 4])),
1722 });
1723 }
1724 Ok(Self {
1725 version: 1,
1726 fanout,
1727 entries,
1728 pack_checksum,
1729 index_checksum,
1730 })
1731 }
1732
1733 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1734 self.entries
1735 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1736 .ok()
1737 .map(|idx| &self.entries[idx])
1738 }
1739
1740 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1741 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1742 }
1743
1744 pub fn write_v2(
1745 format: ObjectFormat,
1746 entries: &[PackIndexEntry],
1747 pack_checksum: &ObjectId,
1748 ) -> Result<Vec<u8>> {
1749 if pack_checksum.format() != format {
1750 return Err(GitError::InvalidObjectId(
1751 "pack checksum format does not match index format".into(),
1752 ));
1753 }
1754 let mut entries = entries.iter().collect::<Vec<_>>();
1755 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1756 for pair in entries.windows(2) {
1757 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1758 return Err(GitError::InvalidFormat(format!(
1759 "pack index contains duplicate object id {}",
1760 pair[0].oid
1761 )));
1762 }
1763 }
1764 let mut fanout = [0u32; 256];
1765 for entry in &entries {
1766 if entry.oid.format() != format {
1767 return Err(GitError::InvalidObjectId(
1768 "pack index entry format does not match index format".into(),
1769 ));
1770 }
1771 let first = entry.oid.as_bytes()[0] as usize;
1772 fanout[first] = fanout[first]
1773 .checked_add(1)
1774 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1775 }
1776 let mut running = 0u32;
1777 for slot in &mut fanout {
1778 running = running
1779 .checked_add(*slot)
1780 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1781 *slot = running;
1782 }
1783
1784 let mut index = Vec::new();
1785 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1786 index.extend_from_slice(&2u32.to_be_bytes());
1787 for count in fanout {
1788 index.extend_from_slice(&count.to_be_bytes());
1789 }
1790 for entry in &entries {
1791 index.extend_from_slice(entry.oid.as_bytes());
1792 }
1793 for entry in &entries {
1794 index.extend_from_slice(&entry.crc32.to_be_bytes());
1795 }
1796
1797 let mut large_offsets = Vec::new();
1798 for entry in &entries {
1799 if entry.offset < 0x8000_0000 {
1800 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1801 } else {
1802 if large_offsets.len() > 0x7fff_ffff {
1803 return Err(GitError::InvalidFormat(
1804 "too many large pack offsets".into(),
1805 ));
1806 }
1807 let large_idx = large_offsets.len() as u32;
1808 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1809 large_offsets.push(entry.offset);
1810 }
1811 }
1812 for offset in large_offsets {
1813 index.extend_from_slice(&offset.to_be_bytes());
1814 }
1815 index.extend_from_slice(pack_checksum.as_bytes());
1816 let index_checksum = sley_core::digest_bytes(format, &index)?;
1817 index.extend_from_slice(index_checksum.as_bytes());
1818 Ok(index)
1819 }
1820
1821 pub fn write_v1(
1827 format: ObjectFormat,
1828 entries: &[PackIndexEntry],
1829 pack_checksum: &ObjectId,
1830 ) -> Result<Vec<u8>> {
1831 if pack_checksum.format() != format {
1832 return Err(GitError::InvalidObjectId(
1833 "pack checksum format does not match index format".into(),
1834 ));
1835 }
1836 let mut entries = entries.iter().collect::<Vec<_>>();
1837 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1838 for pair in entries.windows(2) {
1839 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1840 return Err(GitError::InvalidFormat(format!(
1841 "pack index contains duplicate object id {}",
1842 pair[0].oid
1843 )));
1844 }
1845 }
1846 let mut fanout = [0u32; 256];
1847 for entry in &entries {
1848 if entry.oid.format() != format {
1849 return Err(GitError::InvalidObjectId(
1850 "pack index entry format does not match index format".into(),
1851 ));
1852 }
1853 if entry.offset > 0xffff_ffff {
1854 return Err(GitError::InvalidFormat(
1855 "pack offset too large for a version-1 index".into(),
1856 ));
1857 }
1858 let first = entry.oid.as_bytes()[0] as usize;
1859 fanout[first] = fanout[first]
1860 .checked_add(1)
1861 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1862 }
1863 let mut running = 0u32;
1864 for slot in &mut fanout {
1865 running = running
1866 .checked_add(*slot)
1867 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1868 *slot = running;
1869 }
1870
1871 let mut index = Vec::new();
1872 for count in fanout {
1873 index.extend_from_slice(&count.to_be_bytes());
1874 }
1875 for entry in &entries {
1876 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1877 index.extend_from_slice(entry.oid.as_bytes());
1878 }
1879 index.extend_from_slice(pack_checksum.as_bytes());
1880 let index_checksum = sley_core::digest_bytes(format, &index)?;
1881 index.extend_from_slice(index_checksum.as_bytes());
1882 Ok(index)
1883 }
1884}
1885
1886pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1891 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1892 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1893 let mut index_position = vec![0u32; entries.len()];
1894 for (position, &entry) in oid_sorted.iter().enumerate() {
1895 index_position[entry] = position as u32;
1896 }
1897 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1898 by_offset.sort_by_key(|&entry| entries[entry].offset);
1899 by_offset
1900 .into_iter()
1901 .map(|entry| index_position[entry])
1902 .collect()
1903}
1904
1905impl PackReverseIndex {
1906 pub fn write(
1907 format: ObjectFormat,
1908 positions: &[u32],
1909 pack_checksum: &ObjectId,
1910 ) -> Result<Vec<u8>> {
1911 if pack_checksum.format() != format {
1912 return Err(GitError::InvalidObjectId(
1913 "pack checksum format does not match reverse index format".into(),
1914 ));
1915 }
1916 validate_position_permutation(positions)?;
1917
1918 let mut out = Vec::new();
1919 out.extend_from_slice(b"RIDX");
1920 out.extend_from_slice(&1u32.to_be_bytes());
1921 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1922 for position in positions {
1923 out.extend_from_slice(&position.to_be_bytes());
1924 }
1925 out.extend_from_slice(pack_checksum.as_bytes());
1926 let checksum = sley_core::digest_bytes(format, &out)?;
1927 out.extend_from_slice(checksum.as_bytes());
1928 Ok(out)
1929 }
1930
1931 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1932 let hash_len = format.raw_len();
1933 let table_len = object_count
1934 .checked_mul(4)
1935 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1936 let min_len = 12usize
1937 .checked_add(table_len)
1938 .and_then(|len| len.checked_add(hash_len * 2))
1939 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1940 if bytes.len() < min_len {
1941 return Err(GitError::InvalidFormat("reverse index too short".into()));
1942 }
1943 if bytes.len() != min_len {
1944 return Err(GitError::InvalidFormat(format!(
1945 "reverse index has {} trailing bytes",
1946 bytes.len() - min_len
1947 )));
1948 }
1949 if &bytes[..4] != b"RIDX" {
1950 return Err(GitError::InvalidFormat(
1951 "missing reverse index signature".into(),
1952 ));
1953 }
1954 let version = u32_be(&bytes[4..8]);
1955 if version != 1 {
1956 return Err(GitError::Unsupported(format!(
1957 "reverse index version {version}"
1958 )));
1959 }
1960 let hash_id = u32_be(&bytes[8..12]);
1961 if hash_id != hash_function_id(format) {
1962 return Err(GitError::InvalidFormat(format!(
1963 "reverse index hash id {hash_id} does not match {}",
1964 format.name()
1965 )));
1966 }
1967
1968 let index_checksum_offset = bytes.len() - hash_len;
1969 let actual_index_checksum =
1970 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1971 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1972 if actual_index_checksum != index_checksum {
1973 return Err(GitError::InvalidFormat(format!(
1974 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1975 )));
1976 }
1977
1978 let pack_checksum_offset = index_checksum_offset - hash_len;
1979 let pack_checksum =
1980 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1981 let mut positions = Vec::with_capacity(object_count);
1982 let mut offset = 12usize;
1983 for _ in 0..object_count {
1984 let position = u32_be(&bytes[offset..offset + 4]);
1985 positions.push(position);
1986 offset += 4;
1987 }
1988 validate_position_permutation(&positions)?;
1989
1990 Ok(Self {
1991 version,
1992 format,
1993 positions,
1994 pack_checksum,
1995 index_checksum,
1996 })
1997 }
1998}
1999
2000impl PackMtimes {
2001 pub fn write(
2002 format: ObjectFormat,
2003 mtimes: &[u32],
2004 pack_checksum: &ObjectId,
2005 ) -> Result<Vec<u8>> {
2006 if pack_checksum.format() != format {
2007 return Err(GitError::InvalidObjectId(
2008 "pack checksum format does not match mtimes format".into(),
2009 ));
2010 }
2011
2012 let mut out = Vec::new();
2013 out.extend_from_slice(b"MTME");
2014 out.extend_from_slice(&1u32.to_be_bytes());
2015 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2016 for mtime in mtimes {
2017 out.extend_from_slice(&mtime.to_be_bytes());
2018 }
2019 out.extend_from_slice(pack_checksum.as_bytes());
2020 let checksum = sley_core::digest_bytes(format, &out)?;
2021 out.extend_from_slice(checksum.as_bytes());
2022 Ok(out)
2023 }
2024
2025 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2026 let hash_len = format.raw_len();
2027 let table_len = object_count
2028 .checked_mul(4)
2029 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2030 let expected_len = 12usize
2031 .checked_add(table_len)
2032 .and_then(|len| len.checked_add(hash_len * 2))
2033 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2034 if bytes.len() < expected_len {
2035 return Err(GitError::InvalidFormat("mtimes file too short".into()));
2036 }
2037 if bytes.len() != expected_len {
2038 return Err(GitError::InvalidFormat(format!(
2039 "mtimes file has {} trailing bytes",
2040 bytes.len() - expected_len
2041 )));
2042 }
2043 if &bytes[..4] != b"MTME" {
2044 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2045 }
2046 let version = u32_be(&bytes[4..8]);
2047 if version != 1 {
2048 return Err(GitError::Unsupported(format!("mtimes version {version}")));
2049 }
2050 let hash_id = u32_be(&bytes[8..12]);
2051 if hash_id != hash_function_id(format) {
2052 return Err(GitError::InvalidFormat(format!(
2053 "mtimes hash id {hash_id} does not match {}",
2054 format.name()
2055 )));
2056 }
2057
2058 let index_checksum_offset = bytes.len() - hash_len;
2059 let actual_index_checksum =
2060 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2061 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2062 if actual_index_checksum != index_checksum {
2063 return Err(GitError::InvalidFormat(format!(
2064 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2065 )));
2066 }
2067
2068 let pack_checksum_offset = index_checksum_offset - hash_len;
2069 let pack_checksum =
2070 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2071 let mut mtimes = Vec::with_capacity(object_count);
2072 let mut offset = 12usize;
2073 for _ in 0..object_count {
2074 mtimes.push(u32_be(&bytes[offset..offset + 4]));
2075 offset += 4;
2076 }
2077
2078 Ok(Self {
2079 version,
2080 format,
2081 mtimes,
2082 pack_checksum,
2083 index_checksum,
2084 })
2085 }
2086}
2087
2088impl PackBitmapIndex {
2089 pub const OPTION_FULL_DAG: u16 = 0x0001;
2090 pub const OPTION_HASH_CACHE: u16 = 0x0004;
2091
2092 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2093 let hash_len = format.raw_len();
2094 let min_len = 12usize
2095 .checked_add(hash_len * 2)
2096 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2097 if bytes.len() < min_len {
2098 return Err(GitError::InvalidFormat("bitmap index too short".into()));
2099 }
2100 if &bytes[..4] != b"BITM" {
2101 return Err(GitError::InvalidFormat(
2102 "missing bitmap index signature".into(),
2103 ));
2104 }
2105 let version = u16_be(&bytes[4..6]);
2106 if version != 1 {
2107 return Err(GitError::Unsupported(format!(
2108 "bitmap index version {version}"
2109 )));
2110 }
2111 let options = u16_be(&bytes[6..8]);
2112 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2113 if options & !known_options != 0 {
2114 return Err(GitError::Unsupported(format!(
2115 "bitmap index options {:#06x}",
2116 options & !known_options
2117 )));
2118 }
2119 let entry_count = u32_be(&bytes[8..12]) as usize;
2120 let checksum_offset = bytes.len() - hash_len;
2121 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2122 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2123 if actual_index_checksum != index_checksum {
2124 return Err(GitError::InvalidFormat(format!(
2125 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2126 )));
2127 }
2128
2129 let pack_checksum_end = 12usize
2130 .checked_add(hash_len)
2131 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2132 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2133 let mut offset = pack_checksum_end;
2134 let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2135 let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2136 let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2137 let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2138
2139 let mut entries = Vec::with_capacity(entry_count);
2140 for idx in 0..entry_count {
2141 if checksum_offset.saturating_sub(offset) < 6 {
2142 return Err(GitError::InvalidFormat(
2143 "truncated bitmap index entry".into(),
2144 ));
2145 }
2146 let object_position = u32_be(&bytes[offset..offset + 4]);
2147 offset += 4;
2148 if object_position as usize >= object_count {
2149 return Err(GitError::InvalidFormat(
2150 "bitmap index entry points past object table".into(),
2151 ));
2152 }
2153 let xor_offset = bytes[offset];
2154 offset += 1;
2155 if xor_offset as usize > idx || xor_offset > 160 {
2156 return Err(GitError::InvalidFormat(
2157 "bitmap index entry has invalid XOR offset".into(),
2158 ));
2159 }
2160 let flags = bytes[offset];
2161 offset += 1;
2162 let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2163 entries.push(PackBitmapEntry {
2164 object_position,
2165 xor_offset,
2166 flags,
2167 bitmap,
2168 });
2169 }
2170
2171 let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2172 let cache_len = object_count
2173 .checked_mul(4)
2174 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2175 if checksum_offset.saturating_sub(offset) < cache_len {
2176 return Err(GitError::InvalidFormat(
2177 "truncated bitmap hash cache".into(),
2178 ));
2179 }
2180 let mut cache = Vec::with_capacity(object_count);
2181 for _ in 0..object_count {
2182 cache.push(u32_be(&bytes[offset..offset + 4]));
2183 offset += 4;
2184 }
2185 Some(cache)
2186 } else {
2187 None
2188 };
2189
2190 if offset != checksum_offset {
2191 return Err(GitError::InvalidFormat(format!(
2192 "bitmap index has {} trailing bytes",
2193 checksum_offset - offset
2194 )));
2195 }
2196
2197 Ok(Self {
2198 version,
2199 format,
2200 options,
2201 pack_checksum,
2202 index_checksum,
2203 type_bitmaps: PackBitmapTypeBitmaps {
2204 commits,
2205 trees,
2206 blobs,
2207 tags,
2208 },
2209 entries,
2210 name_hash_cache,
2211 })
2212 }
2213
2214 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2217 self.entries
2218 .iter()
2219 .find(|entry| entry.object_position == position)
2220 }
2221}
2222
2223fn parse_bitmap_ewah(
2224 bytes: &[u8],
2225 offset: &mut usize,
2226 checksum_offset: usize,
2227 _object_count: usize,
2228) -> Result<EwahBitmap> {
2229 if checksum_offset.saturating_sub(*offset) < 12 {
2230 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2231 }
2232 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2233 *offset += 4;
2234 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2235 *offset += 4;
2236 let words_len = word_count
2237 .checked_mul(8)
2238 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2239 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2240 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2241 }
2242 let mut words = Vec::with_capacity(word_count);
2243 for _ in 0..word_count {
2244 words.push(u64_be(&bytes[*offset..*offset + 8]));
2245 *offset += 8;
2246 }
2247 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2248 *offset += 4;
2249 validate_ewah_words(bit_size, &words, rlw_position)?;
2250 Ok(EwahBitmap {
2251 bit_size,
2252 words,
2253 rlw_position,
2254 })
2255}
2256
2257fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2258 if words.is_empty() {
2259 if rlw_position != 0 || bit_size != 0 {
2260 return Err(GitError::InvalidFormat(
2261 "EWAH bitmap has invalid empty RLW".into(),
2262 ));
2263 }
2264 return Ok(());
2265 }
2266 if rlw_position as usize >= words.len() {
2267 return Err(GitError::InvalidFormat(
2268 "EWAH RLW position points past word table".into(),
2269 ));
2270 }
2271 let mut word_idx = 0usize;
2272 let mut decoded_words = 0u64;
2273 while word_idx < words.len() {
2274 let rlw = words[word_idx];
2275 let run_words = (rlw >> 1) & 0xffff_ffff;
2276 let literal_words = (rlw >> 33) as usize;
2277 word_idx += 1;
2278 word_idx = word_idx
2279 .checked_add(literal_words)
2280 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
2281 if word_idx > words.len() {
2282 return Err(GitError::InvalidFormat(
2283 "EWAH literal words extend past word table".into(),
2284 ));
2285 }
2286 decoded_words = decoded_words
2287 .checked_add(run_words)
2288 .and_then(|value| value.checked_add(literal_words as u64))
2289 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2290 }
2291 let decoded_bits = decoded_words
2292 .checked_mul(64)
2293 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2294 if decoded_bits < u64::from(bit_size) {
2295 return Err(GitError::InvalidFormat(
2296 "EWAH bitmap decodes fewer bits than declared".into(),
2297 ));
2298 }
2299 Ok(())
2300}
2301
2302impl MultiPackIndex {
2303 pub fn write(
2304 format: ObjectFormat,
2305 version: u8,
2306 pack_names: &[String],
2307 objects: &[MultiPackIndexEntry],
2308 ) -> Result<Vec<u8>> {
2309 Self::write_with_reverse_index(format, version, pack_names, objects, None)
2310 }
2311
2312 pub fn write_with_reverse_index(
2321 format: ObjectFormat,
2322 version: u8,
2323 pack_names: &[String],
2324 objects: &[MultiPackIndexEntry],
2325 preferred_pack: Option<u32>,
2326 ) -> Result<Vec<u8>> {
2327 if let Some(preferred) = preferred_pack
2328 && preferred as usize >= pack_names.len()
2329 {
2330 return Err(GitError::InvalidFormat(format!(
2331 "preferred pack {preferred} out of range for {} packs",
2332 pack_names.len()
2333 )));
2334 }
2335 if version != 1 && version != 2 {
2336 return Err(GitError::Unsupported(format!(
2337 "multi-pack-index version {version}"
2338 )));
2339 }
2340 if pack_names.len() > u32::MAX as usize {
2341 return Err(GitError::InvalidFormat(
2342 "too many multi-pack-index packs".into(),
2343 ));
2344 }
2345 if objects.len() > u32::MAX as usize {
2346 return Err(GitError::InvalidFormat(
2347 "too many multi-pack-index objects".into(),
2348 ));
2349 }
2350 validate_midx_pack_names(pack_names)?;
2351 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2352 return Err(GitError::InvalidFormat(
2353 "multi-pack-index v1 pack names must be sorted".into(),
2354 ));
2355 }
2356
2357 let mut objects = objects.iter().collect::<Vec<_>>();
2358 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2359 let mut previous_oid: Option<&ObjectId> = None;
2360 for object in &objects {
2361 if object.oid.format() != format {
2362 return Err(GitError::InvalidObjectId(
2363 "multi-pack-index object format does not match index format".into(),
2364 ));
2365 }
2366 if let Some(previous) = previous_oid
2367 && previous.as_bytes() == object.oid.as_bytes()
2368 {
2369 return Err(GitError::InvalidFormat(
2370 "multi-pack-index contains duplicate object ids".into(),
2371 ));
2372 }
2373 if object.pack_int_id as usize >= pack_names.len() {
2374 return Err(GitError::InvalidFormat(
2375 "multi-pack-index object points past pack table".into(),
2376 ));
2377 }
2378 previous_oid = Some(&object.oid);
2379 }
2380
2381 let mut large_offsets = Vec::new();
2382 let mut chunks = vec![
2383 (*b"PNAM", write_midx_pack_names(pack_names)),
2384 (*b"OIDF", write_midx_oid_fanout(&objects)?),
2385 (*b"OIDL", write_midx_oid_lookup(&objects)),
2386 (
2387 *b"OOFF",
2388 write_midx_object_offsets(&objects, &mut large_offsets)?,
2389 ),
2390 ];
2391 if !large_offsets.is_empty() {
2392 chunks.push((*b"LOFF", large_offsets));
2393 }
2394 if let Some(preferred) = preferred_pack {
2395 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2398 pseudo.sort_by_key(|&midx_pos| {
2399 let object = objects[midx_pos as usize];
2400 (
2401 object.pack_int_id != preferred,
2402 object.pack_int_id,
2403 object.offset,
2404 )
2405 });
2406 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2407 for midx_pos in pseudo {
2408 ridx.extend_from_slice(&midx_pos.to_be_bytes());
2409 }
2410 chunks.push((*b"RIDX", ridx));
2411 }
2412 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2413 }
2414
2415 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2416 let hash_len = format.raw_len();
2417 if bytes.len() < 12 + 12 + hash_len {
2418 return Err(GitError::InvalidFormat(
2419 "multi-pack-index file too short".into(),
2420 ));
2421 }
2422 if &bytes[..4] != b"MIDX" {
2423 return Err(GitError::InvalidFormat(
2424 "missing multi-pack-index signature".into(),
2425 ));
2426 }
2427 let version = bytes[4];
2428 if version != 1 && version != 2 {
2429 return Err(GitError::Unsupported(format!(
2430 "multi-pack-index version {version}"
2431 )));
2432 }
2433 let hash_id = bytes[5];
2434 if u32::from(hash_id) != hash_function_id(format) {
2435 return Err(GitError::InvalidFormat(format!(
2436 "multi-pack-index hash id {hash_id} does not match {}",
2437 format.name()
2438 )));
2439 }
2440 let chunk_count = bytes[6] as usize;
2441 let base_midx_count = bytes[7];
2442 if base_midx_count != 0 {
2443 return Err(GitError::Unsupported(format!(
2444 "multi-pack-index base count {base_midx_count}"
2445 )));
2446 }
2447 let pack_count = u32_be(&bytes[8..12]);
2448 let lookup_len = (chunk_count + 1)
2449 .checked_mul(12)
2450 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2451 let data_start = 12usize
2452 .checked_add(lookup_len)
2453 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2454 let checksum_offset = bytes.len() - hash_len;
2455 if data_start > checksum_offset {
2456 return Err(GitError::InvalidFormat(
2457 "truncated multi-pack-index chunk lookup".into(),
2458 ));
2459 }
2460
2461 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2462 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2463 if actual_checksum != checksum {
2464 return Err(GitError::InvalidFormat(format!(
2465 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2466 )));
2467 }
2468
2469 let mut entries = Vec::with_capacity(chunk_count + 1);
2470 let mut offset = 12usize;
2471 for _ in 0..=chunk_count {
2472 let id = [
2473 bytes[offset],
2474 bytes[offset + 1],
2475 bytes[offset + 2],
2476 bytes[offset + 3],
2477 ];
2478 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2479 entries.push((id, chunk_offset));
2480 offset += 12;
2481 }
2482 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2483 return Err(GitError::InvalidFormat(
2484 "multi-pack-index chunk lookup is empty".into(),
2485 ));
2486 };
2487 if terminator_id != [0, 0, 0, 0] {
2488 return Err(GitError::InvalidFormat(
2489 "multi-pack-index chunk lookup missing terminator".into(),
2490 ));
2491 }
2492 if terminator_offset != checksum_offset as u64 {
2493 return Err(GitError::InvalidFormat(
2494 "multi-pack-index terminator does not point at checksum".into(),
2495 ));
2496 }
2497
2498 let mut chunks = Vec::with_capacity(chunk_count);
2499 let mut previous_offset = data_start as u64;
2500 for pair in entries.windows(2) {
2501 let (id, chunk_offset) = pair[0];
2502 let (_next_id, next_offset) = pair[1];
2503 if id == [0, 0, 0, 0] {
2504 return Err(GitError::InvalidFormat(
2505 "multi-pack-index chunk id is zero before terminator".into(),
2506 ));
2507 }
2508 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2509 return Err(GitError::InvalidFormat(
2510 "multi-pack-index chunk offsets are not monotonic".into(),
2511 ));
2512 }
2513 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2514 return Err(GitError::InvalidFormat(
2515 "multi-pack-index chunk length is invalid".into(),
2516 ));
2517 }
2518 chunks.push(MultiPackIndexChunk {
2519 id,
2520 offset: chunk_offset,
2521 len: next_offset - chunk_offset,
2522 });
2523 previous_offset = chunk_offset;
2524 }
2525
2526 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2527 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2528 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2529 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2530 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2531 let bitmapped_packs =
2532 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2533
2534 Ok(Self {
2535 version,
2536 format,
2537 pack_count,
2538 pack_names,
2539 object_count: object_count as u32,
2540 fanout,
2541 objects,
2542 reverse_index,
2543 bitmapped_packs,
2544 chunks,
2545 checksum,
2546 })
2547 }
2548
2549 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2550 self.objects
2551 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2552 .ok()
2553 .map(|idx| &self.objects[idx])
2554 }
2555}
2556
2557impl MultiPackIndexOidLookup {
2558 pub fn parse(bytes: Arc<Vec<u8>>, format: ObjectFormat) -> Result<Self> {
2559 let hash_len = format.raw_len();
2560 if bytes.len() < 12 + 12 + hash_len {
2561 return Err(GitError::InvalidFormat(
2562 "multi-pack-index file too short".into(),
2563 ));
2564 }
2565 if &bytes[..4] != b"MIDX" {
2566 return Err(GitError::InvalidFormat(
2567 "missing multi-pack-index signature".into(),
2568 ));
2569 }
2570 let version = bytes[4];
2571 if version != 1 && version != 2 {
2572 return Err(GitError::Unsupported(format!(
2573 "multi-pack-index version {version}"
2574 )));
2575 }
2576 let hash_id = bytes[5];
2577 if u32::from(hash_id) != hash_function_id(format) {
2578 return Err(GitError::InvalidFormat(format!(
2579 "multi-pack-index hash id {hash_id} does not match {}",
2580 format.name()
2581 )));
2582 }
2583 let chunk_count = bytes[6] as usize;
2584 let base_midx_count = bytes[7];
2585 if base_midx_count != 0 {
2586 return Err(GitError::Unsupported(format!(
2587 "multi-pack-index base count {base_midx_count}"
2588 )));
2589 }
2590 let pack_count = u32_be(&bytes[8..12]);
2591 let lookup_len = (chunk_count + 1)
2592 .checked_mul(12)
2593 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2594 let data_start = 12usize
2595 .checked_add(lookup_len)
2596 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2597 let checksum_offset = bytes.len() - hash_len;
2598 if data_start > checksum_offset {
2599 return Err(GitError::InvalidFormat(
2600 "truncated multi-pack-index chunk lookup".into(),
2601 ));
2602 }
2603
2604 let mut entries = Vec::with_capacity(chunk_count + 1);
2605 let mut offset = 12usize;
2606 for _ in 0..=chunk_count {
2607 let id = [
2608 bytes[offset],
2609 bytes[offset + 1],
2610 bytes[offset + 2],
2611 bytes[offset + 3],
2612 ];
2613 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2614 entries.push((id, chunk_offset));
2615 offset += 12;
2616 }
2617 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2618 return Err(GitError::InvalidFormat(
2619 "multi-pack-index chunk lookup is empty".into(),
2620 ));
2621 };
2622 if terminator_id != [0, 0, 0, 0] {
2623 return Err(GitError::InvalidFormat(
2624 "multi-pack-index chunk lookup missing terminator".into(),
2625 ));
2626 }
2627 if terminator_offset != checksum_offset as u64 {
2628 return Err(GitError::InvalidFormat(
2629 "multi-pack-index terminator does not point at checksum".into(),
2630 ));
2631 }
2632
2633 let mut chunks = Vec::with_capacity(chunk_count);
2634 let mut previous_offset = data_start as u64;
2635 for pair in entries.windows(2) {
2636 let (id, chunk_offset) = pair[0];
2637 let (_next_id, next_offset) = pair[1];
2638 if id == [0, 0, 0, 0] {
2639 return Err(GitError::InvalidFormat(
2640 "multi-pack-index chunk id is zero before terminator".into(),
2641 ));
2642 }
2643 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2644 return Err(GitError::InvalidFormat(
2645 "multi-pack-index chunk offsets are not monotonic".into(),
2646 ));
2647 }
2648 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2649 return Err(GitError::InvalidFormat(
2650 "multi-pack-index chunk length is invalid".into(),
2651 ));
2652 }
2653 chunks.push(MultiPackIndexChunk {
2654 id,
2655 offset: chunk_offset,
2656 len: next_offset - chunk_offset,
2657 });
2658 previous_offset = chunk_offset;
2659 }
2660
2661 let pack_names = parse_midx_pack_names(&bytes, &chunks, pack_count as usize, version)?;
2662 let (fanout, object_count) = parse_midx_oid_fanout(&bytes, &chunks)?;
2663 let oid_lookup = midx_chunk_data(&bytes, &chunks, *b"OIDL", true)?
2664 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2665 let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2666 GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2667 })?;
2668 if oid_lookup.len() != expected_len {
2669 return Err(GitError::InvalidFormat(
2670 "multi-pack-index OIDL chunk has invalid length".into(),
2671 ));
2672 }
2673 let object_offsets = midx_chunk_data(&bytes, &chunks, *b"OOFF", true)?
2674 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2675 let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2676 GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2677 })?;
2678 if object_offsets.len() != expected_offsets_len {
2679 return Err(GitError::InvalidFormat(
2680 "multi-pack-index OOFF chunk has invalid length".into(),
2681 ));
2682 }
2683 let large_offsets = midx_chunk_data(&bytes, &chunks, *b"LOFF", false)?;
2684 if let Some(large_offsets) = large_offsets
2685 && large_offsets.len() % 8 != 0
2686 {
2687 return Err(GitError::InvalidFormat(
2688 "multi-pack-index LOFF chunk has invalid length".into(),
2689 ));
2690 }
2691 let oid_lookup_offset = oid_lookup.as_ptr() as usize - bytes.as_ptr() as usize;
2692 let object_offsets_offset = object_offsets.as_ptr() as usize - bytes.as_ptr() as usize;
2693 let (large_offsets_offset, large_offsets_len) = match large_offsets {
2694 Some(large_offsets) => (
2695 Some(large_offsets.as_ptr() as usize - bytes.as_ptr() as usize),
2696 large_offsets.len(),
2697 ),
2698 None => (None, 0),
2699 };
2700 Ok(Self {
2701 format,
2702 pack_count,
2703 pack_names,
2704 fanout,
2705 object_count,
2706 oid_lookup_offset,
2707 object_offsets_offset,
2708 large_offsets_offset,
2709 large_offsets_len,
2710 bytes,
2711 })
2712 }
2713
2714 pub fn contains(&self, oid: &ObjectId) -> bool {
2715 self.find_position(oid).is_some()
2716 }
2717
2718 pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2719 let Some(position) = self.find_position(oid) else {
2720 return Ok(None);
2721 };
2722 let hash_len = self.format.raw_len();
2723 let oid_start = self
2724 .oid_lookup_offset
2725 .checked_add(position * hash_len)
2726 .ok_or_else(|| {
2727 GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2728 })?;
2729 let oid = ObjectId::from_raw(self.format, &self.bytes[oid_start..oid_start + hash_len])?;
2730 let offset_start = self
2731 .object_offsets_offset
2732 .checked_add(position * 8)
2733 .ok_or_else(|| {
2734 GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2735 })?;
2736 let data = &self.bytes[offset_start..offset_start + 8];
2737 let pack_int_id = u32_be(&data[..4]);
2738 if pack_int_id >= self.pack_count {
2739 return Err(GitError::InvalidFormat(
2740 "multi-pack-index object points past pack table".into(),
2741 ));
2742 }
2743 let raw_offset = u32_be(&data[4..8]);
2744 let offset = if raw_offset & 0x8000_0000 == 0 {
2745 u64::from(raw_offset)
2746 } else {
2747 let Some(large_offsets_offset) = self.large_offsets_offset else {
2748 return Err(GitError::InvalidFormat(
2749 "multi-pack-index large offset missing LOFF chunk".into(),
2750 ));
2751 };
2752 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2753 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2754 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2755 })?;
2756 let large_end = large_start.checked_add(8).ok_or_else(|| {
2757 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2758 })?;
2759 if large_end > self.large_offsets_len {
2760 return Err(GitError::InvalidFormat(
2761 "multi-pack-index large offset points past LOFF chunk".into(),
2762 ));
2763 }
2764 let start = large_offsets_offset + large_start;
2765 u64_be(&self.bytes[start..start + 8])
2766 };
2767 Ok(Some(MultiPackIndexEntry {
2768 oid,
2769 pack_int_id,
2770 offset,
2771 }))
2772 }
2773
2774 pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2775 self.pack_names
2776 .get(pack_int_id as usize)
2777 .map(String::as_str)
2778 }
2779
2780 fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2781 if oid.format() != self.format || self.object_count == 0 {
2782 return None;
2783 }
2784 let first = oid.as_bytes()[0] as usize;
2785 let start = if first == 0 {
2786 0
2787 } else {
2788 self.fanout[first - 1] as usize
2789 };
2790 let end = self.fanout[first] as usize;
2791 if start >= end || end > self.object_count {
2792 return None;
2793 }
2794 let hash_len = self.format.raw_len();
2795 let table_start = self.oid_lookup_offset;
2796 let table_end = table_start + self.object_count * hash_len;
2797 let table = &self.bytes[table_start..table_end];
2798 let needle = oid.as_bytes();
2799 let mut low = start;
2800 let mut high = end;
2801 while low < high {
2802 let mid = low + (high - low) / 2;
2803 let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2804 match raw.cmp(needle) {
2805 std::cmp::Ordering::Less => low = mid + 1,
2806 std::cmp::Ordering::Equal => return Some(mid),
2807 std::cmp::Ordering::Greater => high = mid,
2808 }
2809 }
2810 None
2811 }
2812}
2813
2814fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2815 for name in pack_names {
2816 if name.is_empty() {
2817 return Err(GitError::InvalidFormat(
2818 "multi-pack-index pack name is empty".into(),
2819 ));
2820 }
2821 if name
2822 .bytes()
2823 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2824 {
2825 return Err(GitError::InvalidFormat(
2826 "multi-pack-index pack name contains an invalid byte".into(),
2827 ));
2828 }
2829 }
2830 Ok(())
2831}
2832
2833fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2834 let mut out = Vec::new();
2835 for name in pack_names {
2836 out.extend_from_slice(name.as_bytes());
2837 out.push(0);
2838 }
2839 while out.len() % 4 != 0 {
2840 out.push(0);
2841 }
2842 out
2843}
2844
2845fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2846 let mut counts = [0u32; 256];
2847 for object in objects {
2848 let first = object.oid.as_bytes()[0] as usize;
2849 counts[first] = counts[first]
2850 .checked_add(1)
2851 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2852 }
2853 let mut running = 0u32;
2854 let mut out = Vec::with_capacity(256 * 4);
2855 for count in counts {
2856 running = running
2857 .checked_add(count)
2858 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2859 out.extend_from_slice(&running.to_be_bytes());
2860 }
2861 Ok(out)
2862}
2863
2864fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2865 let mut out = Vec::new();
2866 for object in objects {
2867 out.extend_from_slice(object.oid.as_bytes());
2868 }
2869 out
2870}
2871
2872fn write_midx_object_offsets(
2873 objects: &[&MultiPackIndexEntry],
2874 large_offsets: &mut Vec<u8>,
2875) -> Result<Vec<u8>> {
2876 let mut out = Vec::new();
2877 for object in objects {
2878 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2879 if object.offset < 0x8000_0000 {
2880 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2881 } else {
2882 let large_idx = large_offsets.len() / 8;
2883 if large_idx > 0x7fff_ffff {
2884 return Err(GitError::InvalidFormat(
2885 "too many multi-pack-index large offsets".into(),
2886 ));
2887 }
2888 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2889 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2890 }
2891 }
2892 Ok(out)
2893}
2894
2895fn write_multi_pack_index_chunks(
2896 format: ObjectFormat,
2897 version: u8,
2898 pack_count: u32,
2899 chunks: &[([u8; 4], Vec<u8>)],
2900) -> Result<Vec<u8>> {
2901 if chunks.len() > u8::MAX as usize {
2902 return Err(GitError::InvalidFormat(
2903 "too many multi-pack-index chunks".into(),
2904 ));
2905 }
2906 let lookup_len = (chunks.len() + 1)
2907 .checked_mul(12)
2908 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2909 let mut out = Vec::new();
2910 out.extend_from_slice(b"MIDX");
2911 out.push(version);
2912 out.push(hash_function_id(format) as u8);
2913 out.push(chunks.len() as u8);
2914 out.push(0);
2915 out.extend_from_slice(&pack_count.to_be_bytes());
2916 let mut chunk_offset = (12usize)
2917 .checked_add(lookup_len)
2918 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
2919 as u64;
2920 for (id, data) in chunks {
2921 out.extend_from_slice(id);
2922 out.extend_from_slice(&chunk_offset.to_be_bytes());
2923 chunk_offset = chunk_offset
2924 .checked_add(data.len() as u64)
2925 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
2926 }
2927 out.extend_from_slice(&[0, 0, 0, 0]);
2928 out.extend_from_slice(&chunk_offset.to_be_bytes());
2929 for (_id, data) in chunks {
2930 out.extend_from_slice(data);
2931 }
2932 let checksum = sley_core::digest_bytes(format, &out)?;
2933 out.extend_from_slice(checksum.as_bytes());
2934 Ok(out)
2935}
2936
2937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2938struct EntryHeader {
2939 kind: PackObjectKind,
2940 size: u64,
2941}
2942
2943pub trait PackDeltaCache {
2957 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
2959 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
2961}
2962
2963struct NoopDeltaCache;
2966
2967impl PackDeltaCache for NoopDeltaCache {
2968 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
2969 None
2970 }
2971 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
2972}
2973
2974thread_local! {
2980 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
2981}
2982
2983const MAX_INFLATE_EXPANSION: usize = 1032;
2994
2995const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3001
3002fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3010 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3011 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3013}
3014
3015fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3024 INFLATE.with(|cell| {
3025 let mut decompress = cell.borrow_mut();
3026 decompress.reset(true);
3027 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3028 let mut input = compressed;
3029 let mut consumed_total = 0usize;
3030 loop {
3031 if out.len() == out.capacity() {
3034 out.reserve(out.len().max(64));
3035 }
3036 let before_in = decompress.total_in();
3037 let before_out = decompress.total_out();
3038 let status = decompress
3039 .decompress_vec(input, out, flate2::FlushDecompress::None)
3040 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3041 let consumed = (decompress.total_in() - before_in) as usize;
3042 let produced = decompress.total_out() - before_out;
3043 input = &input[consumed..];
3044 consumed_total += consumed;
3045 match status {
3046 flate2::Status::StreamEnd => return Ok(consumed_total),
3047 _ if consumed == 0 && produced == 0 => {
3048 return Err(GitError::InvalidObject("truncated zlib stream".into()));
3049 }
3050 _ => {}
3051 }
3052 }
3053 })
3054}
3055
3056fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3060 INFLATE.with(|cell| {
3061 let mut decompress = cell.borrow_mut();
3062 decompress.reset(true);
3063 out.reserve(max_out.max(16));
3064 let mut input = compressed;
3065 while out.len() < max_out {
3066 if out.len() == out.capacity() {
3067 out.reserve(out.len().max(16));
3068 }
3069 let before_in = decompress.total_in();
3070 let before_out = decompress.total_out();
3071 let status = decompress
3072 .decompress_vec(input, out, flate2::FlushDecompress::None)
3073 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3074 let consumed = (decompress.total_in() - before_in) as usize;
3075 let produced = decompress.total_out() - before_out;
3076 input = &input[consumed..];
3077 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3078 break;
3079 }
3080 }
3081 Ok(())
3082 })
3083}
3084
3085pub fn read_object_at_arc<F>(
3093 pack_bytes: &[u8],
3094 offset: u64,
3095 format: ObjectFormat,
3096 resolve_ref_base: F,
3097) -> Result<Arc<EncodedObject>>
3098where
3099 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3100{
3101 read_object_at_with_cache_arc(
3102 pack_bytes,
3103 offset,
3104 format,
3105 resolve_ref_base,
3106 &NoopDeltaCache,
3107 )
3108}
3109
3110pub fn read_object_at_with_cache_arc<F, C>(
3119 pack_bytes: &[u8],
3120 offset: u64,
3121 format: ObjectFormat,
3122 mut resolve_ref_base: F,
3123 cache: &C,
3124) -> Result<Arc<EncodedObject>>
3125where
3126 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3127 C: PackDeltaCache + ?Sized,
3128{
3129 read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3130}
3131
3132fn read_object_at_inner<F, C>(
3133 pack_bytes: &[u8],
3134 offset: u64,
3135 format: ObjectFormat,
3136 resolve_ref_base: &mut F,
3137 cache: &C,
3138) -> Result<Arc<EncodedObject>>
3139where
3140 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3141 C: PackDeltaCache + ?Sized,
3142{
3143 if let Some(object) = cache.get(offset) {
3146 return Ok(object);
3147 }
3148 let trailer_offset = pack_bytes
3149 .len()
3150 .checked_sub(format.raw_len())
3151 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3152 let mut cursor = usize::try_from(offset)
3153 .ok()
3154 .filter(|&value| value < trailer_offset)
3155 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3156 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3157 let base = match header.kind {
3158 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3159 pack_bytes,
3160 &mut cursor,
3161 offset,
3162 )?)),
3163 PackObjectKind::RefDelta => {
3164 let hash_len = format.raw_len();
3165 if cursor + hash_len > trailer_offset {
3166 return Err(GitError::InvalidFormat(
3167 "truncated ref-delta base object id".into(),
3168 ));
3169 }
3170 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3171 cursor += hash_len;
3172 Some(DeltaBase::Ref(oid))
3173 }
3174 _ => None,
3175 };
3176 let mut body = Vec::new();
3177 inflate_into(
3178 &pack_bytes[cursor..trailer_offset],
3179 &mut body,
3180 header.size.min(usize::MAX as u64) as usize,
3181 )?;
3182 if body.len() as u64 != header.size {
3183 return Err(GitError::InvalidObject(format!(
3184 "pack object declared {} bytes, decoded {}",
3185 header.size,
3186 body.len()
3187 )));
3188 }
3189 let object = match base {
3190 None => {
3191 let object_type = match header.kind {
3192 PackObjectKind::Commit => ObjectType::Commit,
3193 PackObjectKind::Tree => ObjectType::Tree,
3194 PackObjectKind::Blob => ObjectType::Blob,
3195 PackObjectKind::Tag => ObjectType::Tag,
3196 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3197 return Err(GitError::InvalidFormat(
3198 "delta pack entry decoded without a base".into(),
3199 ));
3200 }
3201 };
3202 Arc::new(EncodedObject::new(object_type, body))
3203 }
3204 Some(DeltaBase::Offset(base_offset)) => {
3205 let base =
3206 read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
3207 let resolved = apply_pack_delta(&base.body, &body)?;
3208 Arc::new(EncodedObject::new(base.object_type, resolved))
3209 }
3210 Some(DeltaBase::Ref(base_oid)) => {
3211 let base = resolve_ref_base(&base_oid)?
3212 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
3213 let resolved = apply_pack_delta(&base.body, &body)?;
3214 Arc::new(EncodedObject::new(base.object_type, resolved))
3215 }
3216 };
3217 cache.insert(offset, Arc::clone(&object));
3221 Ok(object)
3222}
3223
3224pub fn read_object_header_at<F>(
3234 pack_bytes: &[u8],
3235 offset: u64,
3236 format: ObjectFormat,
3237 mut resolve_ref_base_type: F,
3238) -> Result<(ObjectType, u64)>
3239where
3240 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3241{
3242 read_object_header_at_inner(
3243 pack_bytes,
3244 offset,
3245 format,
3246 &mut resolve_ref_base_type,
3247 &mut NoopHeaderTypeCache,
3248 )
3249}
3250
3251pub trait HeaderTypeCache {
3268 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
3270 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
3272}
3273
3274struct NoopHeaderTypeCache;
3275
3276impl HeaderTypeCache for NoopHeaderTypeCache {
3277 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
3278 None
3279 }
3280 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
3281}
3282
3283pub fn read_object_header_at_with_cache<F, C>(
3289 pack_bytes: &[u8],
3290 offset: u64,
3291 format: ObjectFormat,
3292 mut resolve_ref_base_type: F,
3293 type_cache: &mut C,
3294) -> Result<(ObjectType, u64)>
3295where
3296 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3297 C: HeaderTypeCache + ?Sized,
3298{
3299 if let Some(header) = type_cache.get(offset) {
3300 return Ok(header);
3301 }
3302 read_object_header_at_inner(
3303 pack_bytes,
3304 offset,
3305 format,
3306 &mut resolve_ref_base_type,
3307 type_cache,
3308 )
3309}
3310
3311fn read_object_header_at_inner<F, C>(
3312 pack_bytes: &[u8],
3313 offset: u64,
3314 format: ObjectFormat,
3315 resolve_ref_base_type: &mut F,
3316 type_cache: &mut C,
3317) -> Result<(ObjectType, u64)>
3318where
3319 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3320 C: HeaderTypeCache + ?Sized,
3321{
3322 let trailer_offset = pack_bytes
3323 .len()
3324 .checked_sub(format.raw_len())
3325 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3326 let mut cursor = usize::try_from(offset)
3327 .ok()
3328 .filter(|&value| value < trailer_offset)
3329 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3330 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3331 let resolved = match header.kind {
3332 PackObjectKind::Commit => (ObjectType::Commit, header.size),
3333 PackObjectKind::Tree => (ObjectType::Tree, header.size),
3334 PackObjectKind::Blob => (ObjectType::Blob, header.size),
3335 PackObjectKind::Tag => (ObjectType::Tag, header.size),
3336 PackObjectKind::OfsDelta => {
3337 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3338 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3339 let base_type = match type_cache.get(base_offset) {
3342 Some((base_type, _)) => base_type,
3343 None => {
3344 let (base_type, _) = read_object_header_at_inner(
3345 pack_bytes,
3346 base_offset,
3347 format,
3348 resolve_ref_base_type,
3349 type_cache,
3350 )?;
3351 base_type
3352 }
3353 };
3354 (base_type, size)
3355 }
3356 PackObjectKind::RefDelta => {
3357 let hash_len = format.raw_len();
3358 if cursor + hash_len > trailer_offset {
3359 return Err(GitError::InvalidFormat(
3360 "truncated ref-delta base object id".into(),
3361 ));
3362 }
3363 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3364 cursor += hash_len;
3365 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3366 let base_type = resolve_ref_base_type(&oid)?
3367 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3368 (base_type, size)
3369 }
3370 };
3371 type_cache.put(offset, resolved);
3374 Ok(resolved)
3375}
3376
3377const DELTA_HEADER_PREFIX_LEN: usize = 32;
3381
3382fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3385 let mut prefix = Vec::new();
3386 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3387 decoded_delta_result_size(&prefix)
3388}
3389
3390fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3391 let first = next_byte(bytes, offset)?;
3392 let mut size = u64::from(first & 0x0f);
3393 let kind = match (first >> 4) & 0x07 {
3394 1 => PackObjectKind::Commit,
3395 2 => PackObjectKind::Tree,
3396 3 => PackObjectKind::Blob,
3397 4 => PackObjectKind::Tag,
3398 6 => PackObjectKind::OfsDelta,
3399 7 => PackObjectKind::RefDelta,
3400 other => {
3401 return Err(GitError::InvalidFormat(format!(
3402 "invalid pack object type {other}"
3403 )));
3404 }
3405 };
3406 let mut shift = 4;
3407 let mut byte = first;
3408 while byte & 0x80 != 0 {
3409 byte = next_byte(bytes, offset)?;
3410 let part = u64::from(byte & 0x7f);
3411 size = size
3412 .checked_add(
3413 part.checked_shl(shift)
3414 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3415 )
3416 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3417 shift += 7;
3418 }
3419 Ok(EntryHeader { kind, size })
3420}
3421
3422fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3423 let mut byte = next_byte(bytes, offset)?;
3424 let mut relative = u64::from(byte & 0x7f);
3425 while byte & 0x80 != 0 {
3426 byte = next_byte(bytes, offset)?;
3427 relative = relative
3428 .checked_add(1)
3429 .and_then(|value| value.checked_shl(7))
3430 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3431 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3432 }
3433 entry_offset
3434 .checked_sub(relative)
3435 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3436}
3437
3438fn resolve_pack_entries<F>(
3439 parsed: Vec<ParsedPackEntry>,
3440 format: ObjectFormat,
3441 external_base: &mut F,
3442) -> Result<Vec<PackObject>>
3443where
3444 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3445{
3446 let mut offset_to_index = HashMap::with_capacity(parsed.len());
3447 for (idx, entry) in parsed.iter().enumerate() {
3448 offset_to_index.insert(parsed_entry_offset(entry), idx);
3449 }
3450
3451 let mut resolved = vec![None; parsed.len()];
3452 let mut oid_to_index = HashMap::new();
3453 let mut unresolved = 0usize;
3454 for (idx, entry) in parsed.iter().enumerate() {
3455 match entry {
3456 ParsedPackEntry::Resolved(object) => {
3457 oid_to_index.insert(object.entry.oid, idx);
3458 resolved[idx] = Some(object.clone());
3459 }
3460 ParsedPackEntry::Delta { .. } => unresolved += 1,
3461 }
3462 }
3463
3464 while unresolved != 0 {
3465 let mut progress = false;
3466 for idx in 0..parsed.len() {
3467 if resolved[idx].is_some() {
3468 continue;
3469 }
3470 let ParsedPackEntry::Delta {
3471 base,
3472 compressed_size,
3473 delta_size,
3474 offset,
3475 delta,
3476 } = &parsed[idx]
3477 else {
3478 continue;
3479 };
3480 let Some(base_object) = delta_base_object(
3481 base,
3482 &offset_to_index,
3483 &oid_to_index,
3484 &resolved,
3485 external_base,
3486 )?
3487 else {
3488 continue;
3489 };
3490 let body = apply_pack_delta(base_object.body(), delta)?;
3491 let object = EncodedObject::new(base_object.object_type(), body);
3492 let oid = object.object_id(format)?;
3493 let pack_object = PackObject {
3494 entry: PackEntry {
3495 oid,
3496 compressed_size: *compressed_size,
3497 uncompressed_size: object.body.len() as u64,
3498 offset: *offset,
3499 },
3500 object,
3501 };
3502 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3503 return Err(GitError::InvalidObject(
3504 "resolved delta size does not match delta header".into(),
3505 ));
3506 }
3507 if *delta_size != delta.len() as u64 {
3508 return Err(GitError::InvalidObject(format!(
3509 "pack delta declared {delta_size} bytes, decoded {}",
3510 delta.len()
3511 )));
3512 }
3513 oid_to_index.insert(oid, idx);
3514 resolved[idx] = Some(pack_object);
3515 unresolved -= 1;
3516 progress = true;
3517 }
3518 if !progress {
3519 return Err(GitError::Unsupported("unresolved delta base".into()));
3520 }
3521 }
3522
3523 resolved
3524 .into_iter()
3525 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3526 .collect()
3527}
3528
3529fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3530 match entry {
3531 ParsedPackEntry::Resolved(object) => object.entry.offset,
3532 ParsedPackEntry::Delta { offset, .. } => *offset,
3533 }
3534}
3535
3536enum DeltaBaseObject<'a> {
3537 Borrowed(&'a EncodedObject),
3538 Owned(EncodedObject),
3539}
3540
3541impl DeltaBaseObject<'_> {
3542 fn object_type(&self) -> ObjectType {
3543 match self {
3544 Self::Borrowed(object) => object.object_type,
3545 Self::Owned(object) => object.object_type,
3546 }
3547 }
3548
3549 fn body(&self) -> &[u8] {
3550 match self {
3551 Self::Borrowed(object) => &object.body,
3552 Self::Owned(object) => &object.body,
3553 }
3554 }
3555}
3556
3557fn delta_base_object<'a, F>(
3558 base: &DeltaBase,
3559 offset_to_index: &HashMap<u64, usize>,
3560 oid_to_index: &HashMap<ObjectId, usize>,
3561 resolved: &'a [Option<PackObject>],
3562 external_base: &mut F,
3563) -> Result<Option<DeltaBaseObject<'a>>>
3564where
3565 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3566{
3567 match base {
3568 DeltaBase::Offset(offset) => {
3569 let Some(index) = offset_to_index.get(offset).copied() else {
3570 return Err(GitError::InvalidFormat(format!(
3571 "ofs-delta base offset {offset} not found"
3572 )));
3573 };
3574 Ok(resolved[index]
3575 .as_ref()
3576 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3577 }
3578 DeltaBase::Ref(oid) => {
3579 if let Some(index) = oid_to_index.get(oid).copied() {
3580 return Ok(resolved[index]
3581 .as_ref()
3582 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3583 }
3584 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3585 }
3586 }
3587}
3588
3589fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3590 let mut cursor = 0usize;
3591 let base_size = read_delta_varint(delta, &mut cursor)?;
3592 if base_size != base.len() as u64 {
3593 return Err(GitError::InvalidObject(format!(
3594 "delta base size mismatch: expected {base_size}, got {}",
3595 base.len()
3596 )));
3597 }
3598 let result_size = read_delta_varint(delta, &mut cursor)?;
3599 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3608 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3609 while cursor < delta.len() {
3610 let command = delta[cursor];
3611 cursor += 1;
3612 if command & 0x80 != 0 {
3613 let copy_offset =
3614 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3615 let mut copy_size =
3616 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3617 if copy_size == 0 {
3618 copy_size = 0x10000;
3619 }
3620 let start = usize::try_from(copy_offset)
3621 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3622 let len = usize::try_from(copy_size)
3623 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3624 let end = start
3625 .checked_add(len)
3626 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3627 let Some(slice) = base.get(start..end) else {
3628 return Err(GitError::InvalidObject(
3629 "delta copy range exceeds base object".into(),
3630 ));
3631 };
3632 result.extend_from_slice(slice);
3633 } else if command != 0 {
3634 let len = usize::from(command);
3635 let end = cursor
3636 .checked_add(len)
3637 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3638 let Some(slice) = delta.get(cursor..end) else {
3639 return Err(GitError::InvalidObject(
3640 "delta insert range exceeds delta data".into(),
3641 ));
3642 };
3643 result.extend_from_slice(slice);
3644 cursor = end;
3645 } else {
3646 return Err(GitError::InvalidObject(
3647 "delta contains reserved zero command".into(),
3648 ));
3649 }
3650 }
3651 if result.len() as u64 != result_size {
3652 return Err(GitError::InvalidObject(format!(
3653 "delta result size mismatch: expected {result_size}, got {}",
3654 result.len()
3655 )));
3656 }
3657 Ok(result)
3658}
3659
3660fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3661 let mut cursor = 0usize;
3662 let _ = read_delta_varint(delta, &mut cursor)?;
3663 read_delta_varint(delta, &mut cursor)
3664}
3665
3666const DELTA_BLOCK_SIZE: usize = 16;
3669
3670const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3675
3676const DELTA_BUCKET_BITS: usize = 12;
3679const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3680const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3681
3682struct DeltaIndex<'a> {
3689 base: &'a [u8],
3690 blocks: Vec<DeltaBlock>,
3691 buckets: Vec<usize>,
3692}
3693
3694#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3695struct DeltaBlock {
3696 hash: u32,
3697 offset: usize,
3698}
3699
3700impl<'a> DeltaIndex<'a> {
3701 fn new(base: &'a [u8]) -> Self {
3702 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3703 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3704 for_each_delta_anchor(base.len(), |offset| {
3705 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3706 buckets[delta_bucket(hash) + 1] += 1;
3707 anchors.push(DeltaBlock { hash, offset });
3708 });
3709 for idx in 1..buckets.len() {
3710 buckets[idx] += buckets[idx - 1];
3711 }
3712
3713 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3714 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3715 for anchor in anchors {
3716 let bucket = delta_bucket(anchor.hash);
3717 let next = &mut next_offsets[bucket];
3718 blocks[*next] = anchor;
3719 *next += 1;
3720 }
3721
3722 Self {
3723 base,
3724 blocks,
3725 buckets,
3726 }
3727 }
3728
3729 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3730 let bucket = delta_bucket(hash);
3731 let start = self.buckets[bucket];
3732 let end = self.buckets[bucket + 1];
3733 self.blocks[start..end]
3734 .iter()
3735 .filter(move |block| block.hash == hash)
3736 }
3737
3738 fn has_hash(&self, hash: u32) -> bool {
3739 self.candidate_blocks(hash).next().is_some()
3740 }
3741
3742 fn has_shared_anchor(&self, target: &[u8]) -> bool {
3743 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3744 return false;
3745 }
3746 let last = target.len() - DELTA_BLOCK_SIZE;
3747 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3748 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3749 if self.has_hash(hash) {
3750 return true;
3751 }
3752 }
3753 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3754 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3755 if self.has_hash(hash) {
3756 return true;
3757 }
3758 }
3759 false
3760 }
3761
3762 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3764 if !self.has_shared_anchor(target) {
3765 return None;
3766 }
3767 let base = self.base;
3768 let mut delta = Vec::new();
3769 write_delta_varint(&mut delta, base.len() as u64);
3770 write_delta_varint(&mut delta, target.len() as u64);
3771
3772 let mut pending_insert_start = 0usize;
3773 let mut pos = 0usize;
3774 while pos < target.len() {
3775 let mut best_len = 0usize;
3776 let mut best_offset = 0usize;
3777 if pos + DELTA_BLOCK_SIZE <= target.len() {
3778 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3779 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3780 let candidate = candidate.offset;
3783 let max_len = (base.len() - candidate).min(target.len() - pos);
3784 let mut len = 0usize;
3785 while len < max_len && base[candidate + len] == target[pos + len] {
3786 len += 1;
3787 }
3788 if len > best_len {
3789 best_len = len;
3790 best_offset = candidate;
3791 }
3792 }
3793 }
3794
3795 if best_len >= DELTA_BLOCK_SIZE {
3796 if pending_insert_start < pos {
3797 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3798 }
3799 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3800 pos += best_len;
3801 pending_insert_start = pos;
3802 } else {
3803 pos += 1;
3804 }
3805 }
3806 if pending_insert_start < target.len() {
3807 write_delta_insert(&mut delta, &target[pending_insert_start..]);
3808 }
3809 Some(delta)
3810 }
3811}
3812
3813fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3814 if len < DELTA_BLOCK_SIZE {
3815 return;
3816 }
3817 len -= DELTA_BLOCK_SIZE;
3818 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3819 visit(offset);
3820 }
3821 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3822 visit(len);
3823 }
3824}
3825
3826fn delta_anchor_count(len: usize) -> usize {
3827 if len < DELTA_BLOCK_SIZE {
3828 return 0;
3829 }
3830 let last = len - DELTA_BLOCK_SIZE;
3831 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3832}
3833
3834fn delta_bucket(hash: u32) -> usize {
3835 (hash as usize) & DELTA_BUCKET_MASK
3836}
3837
3838const DELTA_MAX_CHAIN: usize = 64;
3841
3842fn block_hash(block: &[u8]) -> u32 {
3848 let mut hash = 0u32;
3849 for &byte in block {
3850 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3851 }
3852 hash
3853}
3854
3855#[derive(Debug, Clone, PartialEq, Eq)]
3857enum PlannedBase {
3858 None,
3860 InPack { base_idx: usize, delta: Vec<u8> },
3864 External { base_oid: ObjectId, delta: Vec<u8> },
3866}
3867
3868#[derive(Debug, Clone, PartialEq, Eq)]
3869struct PlannedEntry {
3870 base: PlannedBase,
3871}
3872
3873fn compress_planned_payloads(
3874 objects: &[&EncodedObject],
3875 plan: &[PlannedEntry],
3876 order: &[usize],
3877) -> Result<Vec<Vec<u8>>> {
3878 if order.is_empty() {
3879 return Ok(Vec::new());
3880 }
3881
3882 let worker_count = std::thread::available_parallelism()
3883 .map(|threads| threads.get())
3884 .unwrap_or(1)
3885 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3886 .min(order.len());
3887 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3888 let mut payloads = Vec::with_capacity(order.len());
3889 for &idx in order {
3890 payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
3891 }
3892 return Ok(payloads);
3893 }
3894
3895 let chunk_len = order.len().div_ceil(worker_count);
3896 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
3897 std::thread::scope(|scope| {
3898 let mut handles = Vec::new();
3899 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
3900 let chunk_start = chunk_idx * chunk_len;
3901 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
3902 let mut chunk_payloads = Vec::with_capacity(chunk.len());
3903 for (offset, &idx) in chunk.iter().enumerate() {
3904 chunk_payloads.push((
3905 chunk_start + offset,
3906 compressed_payload(planned_payload(objects, plan, idx))?,
3907 ));
3908 }
3909 Ok(chunk_payloads)
3910 }));
3911 }
3912
3913 let mut first_error = None;
3914 for handle in handles {
3915 match handle.join() {
3916 Ok(Ok(chunk_payloads)) => {
3917 if first_error.is_none() {
3918 for (pos, payload) in chunk_payloads {
3919 payloads[pos] = payload;
3920 }
3921 }
3922 }
3923 Ok(Err(err)) => {
3924 first_error.get_or_insert(err);
3925 }
3926 Err(_) => {
3927 first_error.get_or_insert_with(|| {
3928 GitError::InvalidObject("pack compression worker panicked".into())
3929 });
3930 }
3931 }
3932 }
3933
3934 match first_error {
3935 Some(err) => Err(err),
3936 None => Ok(()),
3937 }
3938 })?;
3939 Ok(payloads)
3940}
3941
3942fn planned_payload<'a>(
3943 objects: &'a [&'a EncodedObject],
3944 plan: &'a [PlannedEntry],
3945 idx: usize,
3946) -> &'a [u8] {
3947 match &plan[idx].base {
3948 PlannedBase::None => &objects[idx].body,
3949 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
3950 }
3951}
3952
3953fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
3954 let mut out = Vec::new();
3955 write_compressed_payload(&mut out, body)?;
3956 Ok(out)
3957}
3958
3959const DELTA_MAX_EXTERNAL_BASES: usize = 64;
3962
3963struct DeltaWindowEntry<'a> {
3964 idx: usize,
3965 index: DeltaIndex<'a>,
3966}
3967
3968fn delta_type_rank(object_type: ObjectType) -> u8 {
3971 match object_type {
3972 ObjectType::Commit => 0,
3973 ObjectType::Tree => 1,
3974 ObjectType::Blob => 2,
3975 ObjectType::Tag => 3,
3976 }
3977}
3978
3979fn plan_pack_deltas(
4009 objects: &[&EncodedObject],
4010 object_ids: &[ObjectId],
4011 options: &PackWriteOptions,
4012) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4013 let count = objects.len();
4014 let mut plan: Vec<PlannedEntry> = (0..count)
4015 .map(|_| PlannedEntry {
4016 base: PlannedBase::None,
4017 })
4018 .collect();
4019
4020 let mut order: Vec<usize> = (0..count).collect();
4024 if options.reorder && options.depth > 0 {
4025 order.sort_by(|&left, &right| {
4026 delta_type_rank(objects[left].object_type)
4027 .cmp(&delta_type_rank(objects[right].object_type))
4028 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4029 .then_with(|| {
4030 object_ids[left]
4031 .as_bytes()
4032 .cmp(object_ids[right].as_bytes())
4033 })
4034 });
4035 }
4036
4037 if options.depth == 0 {
4038 return Ok((plan, order));
4039 }
4040
4041 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4044 Vec::with_capacity(options.thin_bases.len());
4045 for (oid, object) in &options.thin_bases {
4046 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4047 }
4048
4049 let mut depth = vec![0usize; count];
4052 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4054 std::collections::VecDeque::new();
4055
4056 for &idx in &order {
4057 let target = &objects[idx].body;
4058 let target_type = objects[idx].object_type;
4059
4060 let mut best_delta: Option<Vec<u8>> = None;
4061 let mut best_base = PlannedBase::None;
4062
4063 for base_entry in window.iter().rev() {
4065 let base_idx = base_entry.idx;
4066 if objects[base_idx].object_type != target_type {
4067 continue;
4068 }
4069 if depth[base_idx] + 1 > options.depth {
4072 continue;
4073 }
4074 let Some(delta) = base_entry.index.delta(target) else {
4075 continue;
4076 };
4077 if !delta_is_acceptable(&delta, target.len()) {
4078 continue;
4079 }
4080 if best_delta
4081 .as_ref()
4082 .is_none_or(|current| delta.len() < current.len())
4083 {
4084 best_delta = Some(delta);
4085 best_base = PlannedBase::InPack {
4086 base_idx,
4087 delta: Vec::new(),
4088 };
4089 }
4090 }
4091
4092 for (base_oid, base_type, base_index) in
4095 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4096 {
4097 if *base_type != target_type {
4098 continue;
4099 }
4100 let Some(delta) = base_index.delta(target) else {
4101 continue;
4102 };
4103 if !delta_is_acceptable(&delta, target.len()) {
4104 continue;
4105 }
4106 if best_delta
4107 .as_ref()
4108 .is_none_or(|current| delta.len() < current.len())
4109 {
4110 best_delta = Some(delta);
4111 best_base = PlannedBase::External {
4112 base_oid: *base_oid,
4113 delta: Vec::new(),
4114 };
4115 }
4116 }
4117
4118 if let Some(delta) = best_delta {
4119 match best_base {
4120 PlannedBase::InPack { base_idx, .. } => {
4121 depth[idx] = depth[base_idx] + 1;
4122 plan[idx].base = PlannedBase::InPack { base_idx, delta };
4123 }
4124 PlannedBase::External { base_oid, .. } => {
4125 depth[idx] = 1;
4126 plan[idx].base = PlannedBase::External { base_oid, delta };
4127 }
4128 PlannedBase::None => {}
4129 }
4130 }
4131
4132 window.push_back(DeltaWindowEntry {
4134 idx,
4135 index: DeltaIndex::new(&objects[idx].body),
4136 });
4137 while window.len() > options.window {
4138 window.pop_front();
4139 }
4140 }
4141
4142 Ok((plan, order))
4143}
4144
4145fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4150 !delta.is_empty() && delta.len() < target_len
4151}
4152
4153fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4154 loop {
4155 let mut byte = (value as u8) & 0x7f;
4156 value >>= 7;
4157 if value != 0 {
4158 byte |= 0x80;
4159 }
4160 out.push(byte);
4161 if value == 0 {
4162 break;
4163 }
4164 }
4165}
4166
4167fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4168 while size != 0 {
4169 let chunk = size.min(0x10000);
4170 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4171 let mut command = 0x80u8;
4172 let mut payload = [0u8; 7];
4173 let mut payload_len = 0usize;
4174 for idx in 0..4 {
4175 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4176 if byte != 0 {
4177 command |= 1 << idx;
4178 payload[payload_len] = byte;
4179 payload_len += 1;
4180 }
4181 }
4182 for idx in 0..3 {
4183 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4184 if byte != 0 {
4185 command |= 0x10 << idx;
4186 payload[payload_len] = byte;
4187 payload_len += 1;
4188 }
4189 }
4190 out.push(command);
4191 out.extend_from_slice(&payload[..payload_len]);
4192 offset += chunk;
4193 size -= chunk;
4194 }
4195}
4196
4197fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
4198 while !bytes.is_empty() {
4199 let chunk_len = bytes.len().min(0x7f);
4200 out.push(chunk_len as u8);
4201 out.extend_from_slice(&bytes[..chunk_len]);
4202 bytes = &bytes[chunk_len..];
4203 }
4204}
4205
4206fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
4207 let mut value = 0u64;
4208 let mut shift = 0u32;
4209 loop {
4210 let Some(byte) = delta.get(*cursor).copied() else {
4211 return Err(GitError::InvalidObject("truncated delta size".into()));
4212 };
4213 *cursor += 1;
4214 value = value
4215 .checked_add(
4216 u64::from(byte & 0x7f)
4217 .checked_shl(shift)
4218 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
4219 )
4220 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4221 if byte & 0x80 == 0 {
4222 return Ok(value);
4223 }
4224 shift = shift
4225 .checked_add(7)
4226 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4227 }
4228}
4229
4230fn read_delta_copy_value(
4231 delta: &[u8],
4232 cursor: &mut usize,
4233 command: u8,
4234 masks: &[u8],
4235) -> Result<u64> {
4236 let mut value = 0u64;
4237 for (shift, mask) in masks.iter().enumerate() {
4238 if command & mask != 0 {
4239 let Some(byte) = delta.get(*cursor).copied() else {
4240 return Err(GitError::InvalidObject(
4241 "truncated delta copy command".into(),
4242 ));
4243 };
4244 *cursor += 1;
4245 value |= u64::from(byte) << (shift * 8);
4246 }
4247 }
4248 Ok(value)
4249}
4250
4251thread_local! {
4252 static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
4253}
4254
4255fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
4256 DEFLATE.with(|cell| {
4257 let mut compressor = cell.borrow_mut();
4258 compressor.reset();
4259 out.reserve(zlib_compress_bound(body.len()));
4260 let status = compressor
4261 .compress_vec(body, out, FlushCompress::Finish)
4262 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
4263 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
4264 return Err(GitError::InvalidObject(
4265 "zlib compression did not finish pack entry".into(),
4266 ));
4267 }
4268 Ok(())
4269 })
4270}
4271
4272fn zlib_compress_bound(len: usize) -> usize {
4273 len.saturating_add(len >> 12)
4274 .saturating_add(len >> 14)
4275 .saturating_add(len >> 25)
4276 .saturating_add(13)
4277}
4278
4279fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
4280 let type_code = match object_type {
4281 ObjectType::Commit => 1,
4282 ObjectType::Tree => 2,
4283 ObjectType::Blob => 3,
4284 ObjectType::Tag => 4,
4285 };
4286 write_pack_entry_header_kind(out, type_code, size);
4287}
4288
4289fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4290 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4291 size >>= 4;
4292 if size != 0 {
4293 byte |= 0x80;
4294 }
4295 out.push(byte);
4296 while size != 0 {
4297 let mut byte = (size as u8) & 0x7f;
4298 size >>= 7;
4299 if size != 0 {
4300 byte |= 0x80;
4301 }
4302 out.push(byte);
4303 }
4304}
4305
4306fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4307 if relative == 0 {
4308 return Err(GitError::InvalidFormat(
4309 "ofs-delta relative offset cannot be zero".into(),
4310 ));
4311 }
4312 let mut value = relative;
4313 let mut bytes = vec![(value & 0x7f) as u8];
4314 value >>= 7;
4315 while value != 0 {
4316 value -= 1;
4317 bytes.push(((value & 0x7f) as u8) | 0x80);
4318 value >>= 7;
4319 }
4320 bytes.reverse();
4321 out.extend_from_slice(&bytes);
4322 Ok(())
4323}
4324
4325fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4326 let Some(byte) = bytes.get(*offset).copied() else {
4327 return Err(GitError::InvalidFormat(
4328 "truncated pack entry header".into(),
4329 ));
4330 };
4331 *offset += 1;
4332 Ok(byte)
4333}
4334
4335fn u16_be(bytes: &[u8]) -> u16 {
4336 u16::from_be_bytes([bytes[0], bytes[1]])
4337}
4338
4339fn u32_be(bytes: &[u8]) -> u32 {
4340 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4341}
4342
4343fn u64_be(bytes: &[u8]) -> u64 {
4344 u64::from_be_bytes([
4345 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4346 ])
4347}
4348
4349fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4350 let mut fanout = [0u32; 256];
4351 let mut previous = 0u32;
4352 for slot in &mut fanout {
4353 *slot = u32_be(&bytes[*offset..*offset + 4]);
4354 if *slot < previous {
4355 return Err(GitError::InvalidFormat(
4356 "pack index fanout is not monotonic".into(),
4357 ));
4358 }
4359 previous = *slot;
4360 *offset += 4;
4361 }
4362 Ok(fanout)
4363}
4364
4365fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4366 let expected_min = if oid_bytes[0] == 0 {
4367 0
4368 } else {
4369 fanout[usize::from(oid_bytes[0] - 1)]
4370 };
4371 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4372 return Err(GitError::InvalidFormat(
4373 "pack index object id is outside its fanout bucket".into(),
4374 ));
4375 }
4376 Ok(())
4377}
4378
4379fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4380 if raw_offset & 0x8000_0000 == 0 {
4381 return Ok(u64::from(raw_offset));
4382 }
4383 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4384 let large_start = large_idx
4385 .checked_mul(8)
4386 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4387 let large_end = large_start
4388 .checked_add(8)
4389 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4390 if large_end > large_offset_table.len() {
4391 return Err(GitError::InvalidFormat(
4392 "pack index large offset points past table".into(),
4393 ));
4394 }
4395 Ok(u64_be(&large_offset_table[large_start..large_end]))
4396}
4397
4398fn checked_range(
4399 start: usize,
4400 count: usize,
4401 width: usize,
4402 total: usize,
4403) -> Result<std::ops::Range<usize>> {
4404 let len = count
4405 .checked_mul(width)
4406 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4407 let end = start
4408 .checked_add(len)
4409 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4410 if end > total {
4411 return Err(GitError::InvalidFormat("truncated pack index table".into()));
4412 }
4413 Ok(start..end)
4414}
4415
4416fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4417 let mut seen = vec![false; positions.len()];
4418 for position in positions {
4419 let idx = *position as usize;
4420 if idx >= positions.len() {
4421 return Err(GitError::InvalidFormat(
4422 "reverse index position points past object table".into(),
4423 ));
4424 }
4425 if seen[idx] {
4426 return Err(GitError::InvalidFormat(
4427 "reverse index position is duplicated".into(),
4428 ));
4429 }
4430 seen[idx] = true;
4431 }
4432 Ok(())
4433}
4434
4435fn parse_midx_pack_names(
4436 bytes: &[u8],
4437 chunks: &[MultiPackIndexChunk],
4438 pack_count: usize,
4439 version: u8,
4440) -> Result<Vec<String>> {
4441 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4442 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4443 let mut names = Vec::with_capacity(pack_count);
4444 let mut offset = 0usize;
4445 while names.len() < pack_count {
4446 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4447 return Err(GitError::InvalidFormat(
4448 "multi-pack-index PNAM entry is unterminated".into(),
4449 ));
4450 };
4451 let name_bytes = &data[offset..offset + relative_end];
4452 if name_bytes.is_empty() {
4453 return Err(GitError::InvalidFormat(
4454 "multi-pack-index PNAM entry is empty".into(),
4455 ));
4456 }
4457 let name = std::str::from_utf8(name_bytes)
4458 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4459 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4460 return Err(GitError::InvalidFormat(
4461 "multi-pack-index PNAM entry contains a path separator".into(),
4462 ));
4463 }
4464 names.push(name.to_string());
4465 offset += relative_end + 1;
4466 }
4467 let padding = &data[offset..];
4468 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4469 return Err(GitError::InvalidFormat(
4470 "multi-pack-index PNAM padding is invalid".into(),
4471 ));
4472 }
4473 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4474 return Err(GitError::InvalidFormat(
4475 "multi-pack-index v1 PNAM entries are not sorted".into(),
4476 ));
4477 }
4478 Ok(names)
4479}
4480
4481fn parse_midx_oid_fanout(
4482 bytes: &[u8],
4483 chunks: &[MultiPackIndexChunk],
4484) -> Result<([u32; 256], usize)> {
4485 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4486 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4487 if data.len() != 256 * 4 {
4488 return Err(GitError::InvalidFormat(
4489 "multi-pack-index OIDF chunk has invalid length".into(),
4490 ));
4491 }
4492 let mut fanout = [0u32; 256];
4493 let mut previous = 0u32;
4494 for (idx, slot) in fanout.iter_mut().enumerate() {
4495 let start = idx * 4;
4496 *slot = u32_be(&data[start..start + 4]);
4497 if *slot < previous {
4498 return Err(GitError::InvalidFormat(
4499 "multi-pack-index OIDF fanout is not monotonic".into(),
4500 ));
4501 }
4502 previous = *slot;
4503 }
4504 Ok((fanout, fanout[255] as usize))
4505}
4506
4507fn parse_midx_object_ids(
4508 bytes: &[u8],
4509 chunks: &[MultiPackIndexChunk],
4510 format: ObjectFormat,
4511 object_count: usize,
4512 fanout: &[u32; 256],
4513) -> Result<Vec<ObjectId>> {
4514 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4515 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4516 let expected_len = object_count
4517 .checked_mul(format.raw_len())
4518 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4519 if data.len() != expected_len {
4520 return Err(GitError::InvalidFormat(
4521 "multi-pack-index OIDL chunk has invalid length".into(),
4522 ));
4523 }
4524
4525 let mut ids = Vec::with_capacity(object_count);
4526 let mut counts = [0u32; 256];
4527 let mut previous_oid: Option<ObjectId> = None;
4528 for idx in 0..object_count {
4529 let start = idx * format.raw_len();
4530 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4531 if let Some(previous) = &previous_oid
4532 && previous.as_bytes() >= oid.as_bytes()
4533 {
4534 return Err(GitError::InvalidFormat(
4535 "multi-pack-index OIDL object ids are not strictly sorted".into(),
4536 ));
4537 }
4538 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4539 .checked_add(1)
4540 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4541 previous_oid = Some(oid);
4542 ids.push(oid);
4543 }
4544
4545 let mut running = 0u32;
4546 for (idx, count) in counts.iter().enumerate() {
4547 running = running
4548 .checked_add(*count)
4549 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4550 if fanout[idx] != running {
4551 return Err(GitError::InvalidFormat(
4552 "multi-pack-index OIDF fanout does not match OIDL".into(),
4553 ));
4554 }
4555 }
4556 Ok(ids)
4557}
4558
4559fn parse_midx_object_offsets(
4560 bytes: &[u8],
4561 chunks: &[MultiPackIndexChunk],
4562 object_ids: Vec<ObjectId>,
4563 pack_count: u32,
4564) -> Result<Vec<MultiPackIndexEntry>> {
4565 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4566 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4567 let expected_len = object_ids
4568 .len()
4569 .checked_mul(8)
4570 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4571 if data.len() != expected_len {
4572 return Err(GitError::InvalidFormat(
4573 "multi-pack-index OOFF chunk has invalid length".into(),
4574 ));
4575 }
4576 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4577 if let Some(large_offsets) = large_offsets
4578 && large_offsets.len() % 8 != 0
4579 {
4580 return Err(GitError::InvalidFormat(
4581 "multi-pack-index LOFF chunk has invalid length".into(),
4582 ));
4583 }
4584
4585 let mut entries = Vec::with_capacity(object_ids.len());
4586 for (idx, oid) in object_ids.into_iter().enumerate() {
4587 let start = idx * 8;
4588 let pack_int_id = u32_be(&data[start..start + 4]);
4589 if pack_int_id >= pack_count {
4590 return Err(GitError::InvalidFormat(
4591 "multi-pack-index object points past pack table".into(),
4592 ));
4593 }
4594 let raw_offset = u32_be(&data[start + 4..start + 8]);
4595 let offset = if raw_offset & 0x8000_0000 == 0 {
4596 u64::from(raw_offset)
4597 } else {
4598 let Some(large_offsets) = large_offsets else {
4599 return Err(GitError::InvalidFormat(
4600 "multi-pack-index large offset missing LOFF chunk".into(),
4601 ));
4602 };
4603 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4604 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4605 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4606 })?;
4607 let large_end = large_start.checked_add(8).ok_or_else(|| {
4608 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4609 })?;
4610 if large_end > large_offsets.len() {
4611 return Err(GitError::InvalidFormat(
4612 "multi-pack-index large offset points past LOFF chunk".into(),
4613 ));
4614 }
4615 u64_be(&large_offsets[large_start..large_end])
4616 };
4617 entries.push(MultiPackIndexEntry {
4618 oid,
4619 pack_int_id,
4620 offset,
4621 });
4622 }
4623 Ok(entries)
4624}
4625
4626fn parse_midx_reverse_index(
4627 bytes: &[u8],
4628 chunks: &[MultiPackIndexChunk],
4629 object_count: usize,
4630) -> Result<Option<Vec<u32>>> {
4631 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4632 return Ok(None);
4633 };
4634 let expected_len = object_count
4635 .checked_mul(4)
4636 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4637 if data.len() != expected_len {
4638 return Err(GitError::InvalidFormat(
4639 "multi-pack-index RIDX chunk has invalid length".into(),
4640 ));
4641 }
4642 let mut positions = Vec::with_capacity(object_count);
4643 for idx in 0..object_count {
4644 let start = idx * 4;
4645 positions.push(u32_be(&data[start..start + 4]));
4646 }
4647 validate_position_permutation(&positions)?;
4648 Ok(Some(positions))
4649}
4650
4651fn parse_midx_bitmapped_packs(
4652 bytes: &[u8],
4653 chunks: &[MultiPackIndexChunk],
4654 pack_count: usize,
4655 object_count: usize,
4656) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4657 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4658 return Ok(None);
4659 };
4660 let expected_len = pack_count
4661 .checked_mul(8)
4662 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4663 if data.len() != expected_len {
4664 return Err(GitError::InvalidFormat(
4665 "multi-pack-index BTMP chunk has invalid length".into(),
4666 ));
4667 }
4668 let mut entries = Vec::with_capacity(pack_count);
4669 for idx in 0..pack_count {
4670 let start = idx * 8;
4671 let bitmap_pos = u32_be(&data[start..start + 4]);
4672 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4673 let bitmap_end = u64::from(bitmap_pos)
4674 .checked_add(u64::from(bitmap_nr))
4675 .ok_or_else(|| {
4676 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4677 })?;
4678 if bitmap_end > object_count as u64 {
4679 return Err(GitError::InvalidFormat(
4680 "multi-pack-index BTMP range points past object table".into(),
4681 ));
4682 }
4683 entries.push(MultiPackBitmapPack {
4684 bitmap_pos,
4685 bitmap_nr,
4686 });
4687 }
4688 Ok(Some(entries))
4689}
4690
4691fn midx_chunk_data<'a>(
4692 bytes: &'a [u8],
4693 chunks: &[MultiPackIndexChunk],
4694 id: [u8; 4],
4695 required: bool,
4696) -> Result<Option<&'a [u8]>> {
4697 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4698 if required {
4699 return Err(GitError::InvalidFormat(format!(
4700 "multi-pack-index missing {} chunk",
4701 std::str::from_utf8(&id).unwrap_or("required")
4702 )));
4703 }
4704 return Ok(None);
4705 };
4706 let start = usize::try_from(chunk.offset)
4707 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4708 let len = usize::try_from(chunk.len)
4709 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4710 let end = start
4711 .checked_add(len)
4712 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4713 let Some(data) = bytes.get(start..end) else {
4714 return Err(GitError::InvalidFormat(
4715 "multi-pack-index chunk extends past file".into(),
4716 ));
4717 };
4718 Ok(Some(data))
4719}
4720
4721fn hash_function_id(format: ObjectFormat) -> u32 {
4722 match format {
4723 ObjectFormat::Sha1 => 1,
4724 ObjectFormat::Sha256 => 2,
4725 }
4726}
4727
4728const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4731
4732const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4735
4736const EWAH_ALL_ONES: u64 = u64::MAX;
4738
4739impl EwahBitmap {
4740 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4754 let required_words = bit_size.div_ceil(64) as usize;
4755 if required_words > words.len() {
4756 return Err(GitError::InvalidFormat(format!(
4757 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4758 words.len()
4759 )));
4760 }
4761 let significant = &words[..required_words];
4764 let mut builder = EwahBuilder::new(bit_size);
4765 for &word in significant {
4766 if word == 0 {
4767 builder.add_empty_words(false, 1);
4768 } else if word == EWAH_ALL_ONES {
4769 builder.add_empty_words(true, 1);
4770 } else {
4771 builder.add_literal(word);
4772 }
4773 }
4774 builder.finish()
4775 }
4776
4777 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4783 let word_count = bit_size.div_ceil(64) as usize;
4784 let mut words = vec![0u64; word_count];
4785 for &position in positions {
4786 if position >= bit_size {
4787 return Err(GitError::InvalidFormat(format!(
4788 "EWAH bit position {position} out of range for bit_size {bit_size}"
4789 )));
4790 }
4791 let word_index = (position / 64) as usize;
4792 let bit_index = position % 64;
4793 words[word_index] |= 1u64 << bit_index;
4794 }
4795 Self::from_words(bit_size, &words)
4796 }
4797
4798 pub fn empty() -> Self {
4801 Self {
4802 bit_size: 0,
4803 words: Vec::new(),
4804 rlw_position: 0,
4805 }
4806 }
4807
4808 pub fn to_words(&self) -> Result<Vec<u64>> {
4814 let mut out = Vec::new();
4815 let mut word_idx = 0usize;
4816 while word_idx < self.words.len() {
4817 let rlw = self.words[word_idx];
4818 let run_bit = rlw & 1;
4819 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4820 let literal_words = (rlw >> 33) as usize;
4821 word_idx += 1;
4822 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4823 for _ in 0..run_words {
4824 out.push(fill);
4825 }
4826 let literal_end = word_idx
4827 .checked_add(literal_words)
4828 .filter(|end| *end <= self.words.len())
4829 .ok_or_else(|| {
4830 GitError::InvalidFormat("EWAH literal words extend past word table".into())
4831 })?;
4832 out.extend_from_slice(&self.words[word_idx..literal_end]);
4833 word_idx = literal_end;
4834 }
4835 let required_words = (self.bit_size as usize).div_ceil(64);
4836 if out.len() < required_words {
4837 out.resize(required_words, 0);
4838 }
4839 out.truncate(required_words);
4840 Ok(out)
4841 }
4842
4843 pub fn to_positions(&self) -> Result<Vec<u32>> {
4845 let words = self.to_words()?;
4846 let mut positions = Vec::new();
4847 for (word_index, word) in words.iter().enumerate() {
4848 let mut remaining = *word;
4849 while remaining != 0 {
4850 let bit = remaining.trailing_zeros();
4851 let position = (word_index as u64) * 64 + u64::from(bit);
4852 if position < u64::from(self.bit_size) {
4853 positions.push(position as u32);
4855 }
4856 remaining &= remaining - 1;
4857 }
4858 }
4859 Ok(positions)
4860 }
4861
4862 pub fn to_bytes(&self) -> Vec<u8> {
4866 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4867 self.append_bytes(&mut out);
4868 out
4869 }
4870
4871 fn append_bytes(&self, out: &mut Vec<u8>) {
4872 out.extend_from_slice(&self.bit_size.to_be_bytes());
4873 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4874 for word in &self.words {
4875 out.extend_from_slice(&word.to_be_bytes());
4876 }
4877 out.extend_from_slice(&self.rlw_position.to_be_bytes());
4878 }
4879}
4880
4881struct EwahBuilder {
4889 bit_size: u32,
4890 words: Vec<u64>,
4891 rlw_position: usize,
4892}
4893
4894impl EwahBuilder {
4895 fn new(bit_size: u32) -> Self {
4896 Self {
4898 bit_size,
4899 words: vec![0u64],
4900 rlw_position: 0,
4901 }
4902 }
4903
4904 fn rlw(&self) -> u64 {
4905 self.words[self.rlw_position]
4906 }
4907
4908 fn set_rlw(&mut self, value: u64) {
4909 self.words[self.rlw_position] = value;
4910 }
4911
4912 fn rlw_running_len(&self) -> u64 {
4913 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
4914 }
4915
4916 fn rlw_running_bit(&self) -> bool {
4917 self.rlw() & 1 == 1
4918 }
4919
4920 fn rlw_literal_len(&self) -> u64 {
4921 self.rlw() >> 33
4922 }
4923
4924 fn set_running_bit(&mut self, bit: bool) {
4925 let mut value = self.rlw();
4926 value &= !1;
4927 value |= u64::from(bit);
4928 self.set_rlw(value);
4929 }
4930
4931 fn set_running_len(&mut self, len: u64) {
4932 let mut value = self.rlw();
4933 value &= !(EWAH_MAX_RUNNING_LEN << 1);
4934 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
4935 self.set_rlw(value);
4936 }
4937
4938 fn set_literal_len(&mut self, len: u64) {
4939 let mut value = self.rlw();
4940 value &= (1u64 << 33) - 1;
4941 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
4942 self.set_rlw(value);
4943 }
4944
4945 fn push_rlw(&mut self) {
4947 self.rlw_position = self.words.len();
4948 self.words.push(0);
4949 }
4950
4951 fn add_empty_words(&mut self, value: bool, mut number: u64) {
4959 while number > 0 {
4960 let can_extend = self.rlw_literal_len() == 0
4964 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
4965 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
4966 if !can_extend {
4967 self.push_rlw();
4968 }
4969 if self.rlw_running_len() == 0 {
4970 self.set_running_bit(value);
4971 }
4972 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
4973 let take = available.min(number);
4974 self.set_running_len(self.rlw_running_len() + take);
4975 number -= take;
4976 }
4977 }
4978
4979 fn add_literal(&mut self, word: u64) {
4982 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
4983 self.push_rlw();
4984 }
4985 let literal_len = self.rlw_literal_len();
4986 self.set_literal_len(literal_len + 1);
4987 self.words.push(word);
4988 }
4989
4990 fn finish(self) -> Result<EwahBitmap> {
4991 let rlw_position = u32::try_from(self.rlw_position)
4992 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
4993 if self.words.len() > u32::MAX as usize {
4994 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
4995 }
4996 Ok(EwahBitmap {
4997 bit_size: self.bit_size,
4998 words: self.words,
4999 rlw_position,
5000 })
5001 }
5002}
5003
5004#[derive(Debug, Clone)]
5017pub struct PackBitmapWriter {
5018 format: ObjectFormat,
5019 pack_checksum: ObjectId,
5020 object_count: u32,
5021 commit_positions: Vec<u32>,
5022 tree_positions: Vec<u32>,
5023 blob_positions: Vec<u32>,
5024 tag_positions: Vec<u32>,
5025 name_hash_cache: Option<Vec<u32>>,
5026 selected: Vec<SelectedCommit>,
5027}
5028
5029#[derive(Debug, Clone)]
5030struct SelectedCommit {
5031 commit_index_position: u32,
5035 flags: u8,
5036 reachable: Vec<u32>,
5037}
5038
5039impl PackBitmapWriter {
5040 pub const FLAG_NONE: u8 = 0;
5044
5045 pub fn new(
5052 format: ObjectFormat,
5053 pack_checksum: ObjectId,
5054 object_types: &[ObjectType],
5055 ) -> Result<Self> {
5056 if object_types.len() > u32::MAX as usize {
5057 return Err(GitError::InvalidFormat(
5058 "too many objects for a pack bitmap".into(),
5059 ));
5060 }
5061 if pack_checksum.format() != format {
5062 return Err(GitError::InvalidObjectId(
5063 "pack checksum format does not match bitmap format".into(),
5064 ));
5065 }
5066 let object_count = object_types.len() as u32;
5067 let mut commit_positions = Vec::new();
5068 let mut tree_positions = Vec::new();
5069 let mut blob_positions = Vec::new();
5070 let mut tag_positions = Vec::new();
5071 for (index, object_type) in object_types.iter().enumerate() {
5072 let position = index as u32;
5073 match object_type {
5074 ObjectType::Commit => commit_positions.push(position),
5075 ObjectType::Tree => tree_positions.push(position),
5076 ObjectType::Blob => blob_positions.push(position),
5077 ObjectType::Tag => tag_positions.push(position),
5078 }
5079 }
5080 Ok(Self {
5081 format,
5082 pack_checksum,
5083 object_count,
5084 commit_positions,
5085 tree_positions,
5086 blob_positions,
5087 tag_positions,
5088 name_hash_cache: None,
5089 selected: Vec::new(),
5090 })
5091 }
5092
5093 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5099 if cache.len() != self.object_count as usize {
5100 return Err(GitError::InvalidFormat(format!(
5101 "name hash cache has {} entries but pack has {} objects",
5102 cache.len(),
5103 self.object_count
5104 )));
5105 }
5106 self.name_hash_cache = Some(cache);
5107 Ok(self)
5108 }
5109
5110 pub fn add_commit(
5122 &mut self,
5123 commit_position: u32,
5124 commit_index_position: u32,
5125 reachable: &[u32],
5126 ) -> Result<()> {
5127 if commit_position >= self.object_count {
5128 return Err(GitError::InvalidFormat(format!(
5129 "commit position {commit_position} out of range for {} objects",
5130 self.object_count
5131 )));
5132 }
5133 if commit_index_position >= self.object_count {
5134 return Err(GitError::InvalidFormat(format!(
5135 "commit index position {commit_index_position} out of range for {} objects",
5136 self.object_count
5137 )));
5138 }
5139 if !self.commit_positions.contains(&commit_position) {
5140 return Err(GitError::InvalidFormat(format!(
5141 "bitmap commit position {commit_position} is not a commit object"
5142 )));
5143 }
5144 for &position in reachable {
5145 if position >= self.object_count {
5146 return Err(GitError::InvalidFormat(format!(
5147 "reachable position {position} out of range for {} objects",
5148 self.object_count
5149 )));
5150 }
5151 }
5152 let mut reachable = reachable.to_vec();
5153 reachable.push(commit_position);
5154 self.selected.push(SelectedCommit {
5155 commit_index_position,
5156 flags: Self::FLAG_NONE,
5157 reachable,
5158 });
5159 Ok(())
5160 }
5161
5162 pub fn build(&self) -> Result<PackBitmapIndex> {
5169 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5170 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5171 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5172 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5173
5174 let mut entries = Vec::with_capacity(self.selected.len());
5175 for selected in &self.selected {
5176 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5177 entries.push(PackBitmapEntry {
5178 object_position: selected.commit_index_position,
5179 xor_offset: 0,
5180 flags: selected.flags,
5181 bitmap,
5182 });
5183 }
5184
5185 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5186 if self.name_hash_cache.is_some() {
5187 options |= PackBitmapIndex::OPTION_HASH_CACHE;
5188 }
5189
5190 let placeholder_checksum = ObjectId::null(self.format);
5195 Ok(PackBitmapIndex {
5196 version: 1,
5197 format: self.format,
5198 options,
5199 pack_checksum: self.pack_checksum.clone(),
5200 index_checksum: placeholder_checksum,
5201 type_bitmaps: PackBitmapTypeBitmaps {
5202 commits,
5203 trees,
5204 blobs,
5205 tags,
5206 },
5207 entries,
5208 name_hash_cache: self.name_hash_cache.clone(),
5209 })
5210 }
5211
5212 pub fn write(&self) -> Result<Vec<u8>> {
5215 self.build()?.write()
5216 }
5217}
5218
5219impl PackBitmapIndex {
5220 pub fn write(&self) -> Result<Vec<u8>> {
5234 if self.version != 1 {
5235 return Err(GitError::Unsupported(format!(
5236 "bitmap index version {}",
5237 self.version
5238 )));
5239 }
5240 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
5241 if self.options & !known_options != 0 {
5242 return Err(GitError::Unsupported(format!(
5243 "bitmap index options {:#06x}",
5244 self.options & !known_options
5245 )));
5246 }
5247 if self.pack_checksum.format() != self.format {
5248 return Err(GitError::InvalidObjectId(
5249 "bitmap pack checksum format does not match index format".into(),
5250 ));
5251 }
5252 if self.entries.len() > u32::MAX as usize {
5253 return Err(GitError::InvalidFormat(
5254 "too many bitmap index entries".into(),
5255 ));
5256 }
5257 let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
5258 match (&self.name_hash_cache, want_cache) {
5259 (Some(_), false) => {
5260 return Err(GitError::InvalidFormat(
5261 "name hash cache present without OPTION_HASH_CACHE".into(),
5262 ));
5263 }
5264 (None, true) => {
5265 return Err(GitError::InvalidFormat(
5266 "OPTION_HASH_CACHE set without a name hash cache".into(),
5267 ));
5268 }
5269 _ => {}
5270 }
5271
5272 let mut out = Vec::new();
5273 out.extend_from_slice(b"BITM");
5274 out.extend_from_slice(&self.version.to_be_bytes());
5275 out.extend_from_slice(&self.options.to_be_bytes());
5276 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
5277 out.extend_from_slice(self.pack_checksum.as_bytes());
5278
5279 self.type_bitmaps.commits.append_bytes(&mut out);
5280 self.type_bitmaps.trees.append_bytes(&mut out);
5281 self.type_bitmaps.blobs.append_bytes(&mut out);
5282 self.type_bitmaps.tags.append_bytes(&mut out);
5283
5284 for (idx, entry) in self.entries.iter().enumerate() {
5285 if entry.xor_offset as usize > idx {
5286 return Err(GitError::InvalidFormat(
5287 "bitmap index entry has invalid XOR offset".into(),
5288 ));
5289 }
5290 out.extend_from_slice(&entry.object_position.to_be_bytes());
5291 out.push(entry.xor_offset);
5292 out.push(entry.flags);
5293 entry.bitmap.append_bytes(&mut out);
5294 }
5295
5296 if let Some(cache) = &self.name_hash_cache {
5297 for value in cache {
5298 out.extend_from_slice(&value.to_be_bytes());
5299 }
5300 }
5301
5302 let checksum = sley_core::digest_bytes(self.format, &out)?;
5303 out.extend_from_slice(checksum.as_bytes());
5304 Ok(out)
5305 }
5306}
5307
5308pub fn write_bitmap(
5317 format: ObjectFormat,
5318 pack_checksum: ObjectId,
5319 object_types: &[ObjectType],
5320 commits: &[(u32, u32, Vec<u32>)],
5321 name_hash_cache: Option<Vec<u32>>,
5322) -> Result<Vec<u8>> {
5323 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5324 if let Some(cache) = name_hash_cache {
5325 writer = writer.with_name_hash_cache(cache)?;
5326 }
5327 for (commit_position, commit_index_position, reachable) in commits {
5328 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5329 }
5330 writer.write()
5331}
5332
5333#[cfg(test)]
5334mod tests {
5335 use super::*;
5336 use flate2::Compression;
5337 use flate2::read::ZlibDecoder;
5338 use flate2::write::ZlibEncoder;
5339 use std::fs;
5340 use std::io::Read;
5341 use std::io::Write;
5342 use std::path::{Path, PathBuf};
5343 use std::process::Command;
5344 use std::time::{SystemTime, UNIX_EPOCH};
5345
5346 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5347 PackWriteOptions::new()
5348 .with_prefer_ofs_delta(prefer_ofs_delta)
5349 .with_reorder(false)
5350 }
5351
5352 #[test]
5353 fn parses_single_blob_pack() {
5354 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5355 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5356 assert_eq!(parsed.version, 2);
5357 assert_eq!(parsed.entries.len(), 1);
5358 let object = &parsed.entries[0].object;
5359 assert_eq!(object.object_type, ObjectType::Blob);
5360 assert_eq!(object.body, b"hello\n");
5361 assert_eq!(
5362 parsed.entries[0].entry.oid.to_hex(),
5363 "ce013625030ba8dba906f756967f9e9ca394464a"
5364 );
5365 }
5366
5367 #[test]
5368 fn parses_single_blob_pack_sha256() {
5369 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5370 let parsed =
5371 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5372 assert_eq!(parsed.version, 2);
5373 assert_eq!(parsed.entries.len(), 1);
5374 let object = &parsed.entries[0].object;
5375 assert_eq!(object.object_type, ObjectType::Blob);
5376 assert_eq!(object.body, b"hello\n");
5377 assert_eq!(
5378 parsed.entries[0].entry.oid,
5379 object
5380 .object_id(ObjectFormat::Sha256)
5381 .expect("test operation should succeed")
5382 );
5383 }
5384
5385 #[test]
5386 fn parses_bundle_pack_payload_with_bundle_format() {
5387 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5388 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5389 .expect("test operation should succeed");
5390 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5391 .into_bytes()
5392 .into_iter()
5393 .chain(pack)
5394 .collect::<Vec<_>>();
5395 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5396 .expect("test operation should succeed");
5397
5398 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5399 assert_eq!(parsed.entries.len(), 1);
5400 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5401 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5402 }
5403
5404 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5410 let mut pack = Vec::new();
5411 pack.extend_from_slice(b"PACK");
5412 pack.extend_from_slice(&2u32.to_be_bytes());
5413 pack.extend_from_slice(&1u32.to_be_bytes());
5414 write_pack_entry_header_kind(&mut pack, 3, declared_size);
5416 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5417 encoder
5418 .write_all(real_body)
5419 .expect("test operation should succeed");
5420 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5421 let checksum =
5422 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5423 pack.extend_from_slice(checksum.as_bytes());
5424 pack
5425 }
5426
5427 #[test]
5440 fn rejects_decompression_bomb_header_without_oom() {
5441 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5442 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5443 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5444 let result = handle.join();
5445 assert!(
5447 result.is_ok(),
5448 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5449 );
5450 let parse_result = result.expect("parse thread should not panic on a bomb header");
5452 assert!(
5453 parse_result.is_err(),
5454 "bomb header (declared={declared}) should be rejected as invalid"
5455 );
5456 }
5457 }
5458
5459 fn lying_result_size_delta_pack(
5466 format: ObjectFormat,
5467 declared_result_size: u64,
5468 delta_kind: DeltaKind,
5469 ) -> Vec<u8> {
5470 let base = b"hello";
5471 let result = b"hello world"; let mut delta = Vec::new();
5475 write_delta_varint(&mut delta, base.len() as u64);
5476 write_delta_varint(&mut delta, declared_result_size);
5477 let suffix = &result[base.len()..];
5479 delta.push(0x90); delta.push(base.len() as u8);
5481 delta.push(suffix.len() as u8);
5482 delta.extend_from_slice(suffix);
5483
5484 let mut pack = Vec::new();
5485 pack.extend_from_slice(b"PACK");
5486 pack.extend_from_slice(&2u32.to_be_bytes());
5487 pack.extend_from_slice(&2u32.to_be_bytes());
5488
5489 let base_offset = pack.len();
5490 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5491 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5492 encoder
5493 .write_all(base)
5494 .expect("test operation should succeed");
5495 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5496
5497 let delta_offset = pack.len();
5498 write_pack_entry_header_kind(
5499 &mut pack,
5500 match delta_kind {
5501 DeltaKind::Offset => 6,
5502 DeltaKind::Ref => 7,
5503 },
5504 delta.len() as u64,
5505 );
5506 match delta_kind {
5507 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5508 DeltaKind::Ref => {
5509 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5510 .expect("test operation should succeed");
5511 pack.extend_from_slice(base_oid.as_bytes());
5512 }
5513 }
5514 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5515 encoder
5516 .write_all(&delta)
5517 .expect("test operation should succeed");
5518 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5519
5520 let checksum =
5521 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5522 pack.extend_from_slice(checksum.as_bytes());
5523 pack
5524 }
5525
5526 #[test]
5536 fn rejects_delta_result_size_bomb_without_oom() {
5537 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5538 for &declared in bombs {
5539 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5540 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5541 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5542 let join_result = handle.join();
5543 assert!(
5544 join_result.is_ok(),
5545 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5546 instead of erroring cleanly"
5547 );
5548 let parse_result =
5549 join_result.expect("parse thread should not panic on a delta bomb");
5550 assert!(
5551 parse_result.is_err(),
5552 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5553 as invalid (result.len() != declared)"
5554 );
5555 }
5556 }
5557 }
5558
5559 #[test]
5563 fn applies_legitimate_delta_after_result_size_bound() {
5564 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5565 let base = b"hello";
5566 let result = b"hello world";
5567 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5568 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5569 assert_eq!(parsed.entries.len(), 2);
5570 assert_eq!(parsed.entries[0].object.body, base);
5571 assert_eq!(parsed.entries[1].object.body, result);
5572 }
5573 }
5574
5575 #[test]
5576 fn bounded_inflate_reserve_caps_attacker_declared_size() {
5577 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5579 assert_eq!(
5581 bounded_inflate_reserve(usize::MAX, usize::MAX),
5582 MAX_INFLATE_RESERVE
5583 );
5584 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5588 assert_eq!(bounded_inflate_reserve(0, 0), 64);
5590 }
5591
5592 #[test]
5593 fn rejects_bundle_pack_payload_with_wrong_object_format() {
5594 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5595 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5596 .expect("test operation should succeed");
5597 let bundle_bytes =
5598 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5599 .into_bytes()
5600 .into_iter()
5601 .chain(pack)
5602 .collect::<Vec<_>>();
5603 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5604 .expect("test operation should succeed");
5605
5606 assert!(PackFile::parse_bundle(&bundle).is_err());
5607 }
5608
5609 fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5610 let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5611 let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5612 let owned_view =
5613 PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5614 .expect("test operation should succeed");
5615
5616 assert_eq!(view.version, owned.version);
5617 assert_eq!(view.count, owned.entries.len());
5618 assert_eq!(view.count(), owned.entries.len());
5619 assert_eq!(view.fanout(), &owned.fanout);
5620 assert_eq!(view.pack_checksum, owned.pack_checksum);
5621 assert_eq!(view.index_checksum, owned.index_checksum);
5622 assert_eq!(owned_view.version, owned.version);
5623 assert_eq!(owned_view.count(), owned.entries.len());
5624 assert_eq!(owned_view.fanout(), &owned.fanout);
5625 assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5626 assert_eq!(owned_view.index_checksum, owned.index_checksum);
5627 for entry in &owned.entries {
5628 let owned_found = owned
5629 .find(&entry.oid)
5630 .expect("test operation should succeed");
5631 let expected = Some(PackIndexLookup {
5632 crc32: owned_found.crc32,
5633 offset: owned_found.offset,
5634 });
5635 assert_eq!(view.find(&entry.oid), expected);
5636 assert_eq!(owned_view.find(&entry.oid), expected);
5637 }
5638 }
5639
5640 #[test]
5641 fn writes_pack_and_index_that_round_trip() {
5642 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5643 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5644 .expect("test operation should succeed");
5645 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5646 let index =
5647 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5648 let oid = object
5649 .object_id(ObjectFormat::Sha1)
5650 .expect("test operation should succeed");
5651 assert_eq!(pack.entries[0].object, object);
5652 assert_eq!(index.pack_checksum, pack.checksum);
5653 assert_eq!(
5654 index
5655 .find(&oid)
5656 .expect("test operation should succeed")
5657 .offset,
5658 12
5659 );
5660 }
5661
5662 #[test]
5663 fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5664 let objects = (0..8)
5665 .map(|idx| {
5666 EncodedObject::new(
5667 ObjectType::Blob,
5668 format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5669 )
5670 })
5671 .collect::<Vec<_>>();
5672 let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5673 .expect("test operation should succeed");
5674
5675 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5676
5677 let view =
5678 PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5679 let missing = sley_core::object_id_for_bytes(
5680 ObjectFormat::Sha1,
5681 "blob",
5682 b"not present in borrowed index\n",
5683 )
5684 .expect("test operation should succeed");
5685 assert_eq!(view.find(&missing), None);
5686 }
5687
5688 #[test]
5689 fn writes_sha256_pack_and_index_that_round_trip() {
5690 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5691 let written =
5692 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5693 .expect("test operation should succeed");
5694 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5695 .expect("test operation should succeed");
5696 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5697 .expect("test operation should succeed");
5698 let oid = object
5699 .object_id(ObjectFormat::Sha256)
5700 .expect("test operation should succeed");
5701 assert_eq!(pack.entries[0].object, object);
5702 assert_eq!(index.pack_checksum, pack.checksum);
5703 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5704 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5705 assert_eq!(
5706 index
5707 .find(&oid)
5708 .expect("test operation should succeed")
5709 .offset,
5710 12
5711 );
5712 }
5713
5714 #[test]
5715 fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5716 let objects = (0..4)
5717 .map(|idx| {
5718 EncodedObject::new(
5719 ObjectType::Blob,
5720 format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5721 )
5722 })
5723 .collect::<Vec<_>>();
5724 let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5725 .expect("test operation should succeed");
5726
5727 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5728 }
5729
5730 #[test]
5731 fn indexes_existing_sha256_pack_bytes() {
5732 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5733 let written =
5734 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5735 .expect("test operation should succeed");
5736
5737 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5738 .expect("test operation should succeed");
5739 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5740 .expect("test operation should succeed");
5741
5742 assert_eq!(indexed.pack_checksum, written.checksum);
5743 assert_eq!(indexed.entries, written.entries);
5744 assert_eq!(index.pack_checksum, written.checksum);
5745 assert_eq!(index.entries, written.entries);
5746 }
5747
5748 #[test]
5749 fn indexes_existing_delta_pack_bytes() {
5750 let (base, changed) = similar_blob_objects();
5751 let options = delta_pack_options(true);
5752 let written = PackFile::write_packed_with_options(
5753 &[base, changed.clone()],
5754 ObjectFormat::Sha1,
5755 &options,
5756 )
5757 .expect("test operation should succeed");
5758
5759 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5760 .expect("test operation should succeed");
5761 let index =
5762 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5763 let changed_oid = changed
5764 .object_id(ObjectFormat::Sha1)
5765 .expect("test operation should succeed");
5766
5767 assert_eq!(indexed.pack_checksum, written.checksum);
5768 assert_eq!(indexed.entries, written.entries);
5769 assert_eq!(
5770 index
5771 .find(&changed_oid)
5772 .expect("test operation should succeed")
5773 .offset,
5774 written.entries[1].offset
5775 );
5776 assert_eq!(
5777 index
5778 .find(&changed_oid)
5779 .expect("test operation should succeed")
5780 .crc32,
5781 written.entries[1].crc32
5782 );
5783 }
5784
5785 #[test]
5786 fn writes_ref_delta_pack_and_index_that_round_trip() {
5787 let (base, changed) = similar_blob_objects();
5788 let options = delta_pack_options(false);
5789 let written = PackFile::write_packed_with_options(
5790 &[base.clone(), changed.clone()],
5791 ObjectFormat::Sha1,
5792 &options,
5793 )
5794 .expect("test operation should succeed");
5795 let mut second_offset = written.entries[1].offset as usize;
5796 let header = parse_entry_header(&written.pack, &mut second_offset)
5797 .expect("test operation should succeed");
5798 assert_eq!(header.kind, PackObjectKind::RefDelta);
5799
5800 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5801 let index =
5802 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5803 let oid = changed
5804 .object_id(ObjectFormat::Sha1)
5805 .expect("test operation should succeed");
5806 assert_eq!(pack.entries[0].object, base);
5807 assert_eq!(pack.entries[1].object, changed);
5808 assert_eq!(index.pack_checksum, pack.checksum);
5809 assert_eq!(
5810 index
5811 .find(&oid)
5812 .expect("test operation should succeed")
5813 .offset,
5814 written.entries[1].offset
5815 );
5816 }
5817
5818 #[test]
5819 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5820 let (base, changed) = similar_blob_objects();
5821 let options = delta_pack_options(true);
5822 let written = PackFile::write_packed_with_options(
5823 &[base, changed.clone()],
5824 ObjectFormat::Sha1,
5825 &options,
5826 )
5827 .expect("test operation should succeed");
5828 let mut second = written.entries[1].offset as usize;
5830 assert_eq!(
5831 parse_entry_header(&written.pack, &mut second)
5832 .expect("test operation should succeed")
5833 .kind,
5834 PackObjectKind::OfsDelta
5835 );
5836 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5838 for po in &parsed.entries {
5839 let got =
5840 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5841 Ok(None)
5842 })
5843 .expect("test operation should succeed");
5844 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5845 }
5846 }
5847
5848 #[derive(Default)]
5851 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5852
5853 impl HeaderTypeCache for MapHeaderTypeCache {
5854 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5855 self.0.get(&pack_offset).copied()
5856 }
5857 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5858 self.0.insert(pack_offset, header);
5859 }
5860 }
5861
5862 #[test]
5863 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5864 let (base, changed) = similar_blob_objects();
5865 let options = delta_pack_options(true);
5866 let written =
5867 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5868 .expect("test operation should succeed");
5869 let mut second = written.entries[1].offset as usize;
5871 assert_eq!(
5872 parse_entry_header(&written.pack, &mut second)
5873 .expect("test operation should succeed")
5874 .kind,
5875 PackObjectKind::OfsDelta
5876 );
5877
5878 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5879 let mut cache = MapHeaderTypeCache::default();
5880 for po in &parsed.entries {
5881 let uncached =
5882 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5883 Ok(None)
5884 })
5885 .expect("test operation should succeed");
5886 assert_eq!(
5888 uncached,
5889 (po.object.object_type, po.object.body.len() as u64),
5890 "uncached header at offset {}",
5891 po.entry.offset
5892 );
5893 let cold = read_object_header_at_with_cache(
5895 &written.pack,
5896 po.entry.offset,
5897 ObjectFormat::Sha1,
5898 |_| Ok(None),
5899 &mut cache,
5900 )
5901 .expect("test operation should succeed");
5902 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
5903 }
5904 for po in &parsed.entries {
5907 let warm = read_object_header_at_with_cache(
5908 &written.pack,
5909 po.entry.offset,
5910 ObjectFormat::Sha1,
5911 |_| panic!("warm cache must not re-walk the chain"),
5912 &mut cache,
5913 )
5914 .expect("test operation should succeed");
5915 assert_eq!(
5916 warm,
5917 (po.object.object_type, po.object.body.len() as u64),
5918 "warm cache at offset {}",
5919 po.entry.offset
5920 );
5921 }
5922 }
5923
5924 #[test]
5925 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
5926 let (base, changed) = similar_blob_objects();
5927 let options = delta_pack_options(false);
5928 let written = PackFile::write_packed_with_options(
5929 &[base, changed.clone()],
5930 ObjectFormat::Sha1,
5931 &options,
5932 )
5933 .expect("test operation should succeed");
5934 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5935 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
5936 .entries
5937 .iter()
5938 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
5939 .collect();
5940 for po in &parsed.entries {
5941 let got =
5942 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
5943 Ok(by_oid.get(oid).cloned())
5944 })
5945 .expect("test operation should succeed");
5946 assert_eq!(*got, po.object);
5947 }
5948 }
5949
5950 #[derive(Default)]
5954 struct CountingDeltaCache {
5955 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
5956 hits: std::cell::Cell<usize>,
5957 inserts: std::cell::Cell<usize>,
5958 }
5959
5960 impl PackDeltaCache for CountingDeltaCache {
5961 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
5962 let hit = self.map.borrow().get(&offset).cloned();
5963 if hit.is_some() {
5964 self.hits.set(self.hits.get() + 1);
5965 }
5966 hit
5967 }
5968 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
5969 self.inserts.set(self.inserts.get() + 1);
5970 self.map.borrow_mut().insert(offset, object);
5971 }
5972 }
5973
5974 #[test]
5975 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
5976 let mut objects = Vec::new();
5979 for idx in 0..8u32 {
5980 let mut body = vec![b'x'; 4096];
5981 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
5982 objects.push(EncodedObject::new(ObjectType::Blob, body));
5983 }
5984 let options = delta_pack_options(true);
5985 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5986 .expect("test operation should succeed");
5987 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5988
5989 let cache = CountingDeltaCache::default();
5990 for _ in 0..2 {
5993 for po in &parsed.entries {
5994 let got = read_object_at_with_cache_arc(
5995 &written.pack,
5996 po.entry.offset,
5997 ObjectFormat::Sha1,
5998 |_| Ok(None),
5999 &cache,
6000 )
6001 .expect("test operation should succeed");
6002 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6003 }
6004 }
6005 assert!(cache.hits.get() > 0, "cache never served a warm object");
6008 }
6009
6010 #[test]
6011 fn writes_ofs_delta_pack_and_index_that_round_trip() {
6012 let (base, changed) = similar_blob_objects();
6013 let options = delta_pack_options(true);
6014 let written = PackFile::write_packed_with_options(
6015 &[base.clone(), changed.clone()],
6016 ObjectFormat::Sha1,
6017 &options,
6018 )
6019 .expect("test operation should succeed");
6020 let mut second_offset = written.entries[1].offset as usize;
6021 let header = parse_entry_header(&written.pack, &mut second_offset)
6022 .expect("test operation should succeed");
6023 assert_eq!(header.kind, PackObjectKind::OfsDelta);
6024
6025 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6026 let index =
6027 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6028 let oid = changed
6029 .object_id(ObjectFormat::Sha1)
6030 .expect("test operation should succeed");
6031 assert_eq!(pack.entries[0].object, base);
6032 assert_eq!(pack.entries[1].object, changed);
6033 assert_eq!(index.pack_checksum, pack.checksum);
6034 assert_eq!(
6035 index
6036 .find(&oid)
6037 .expect("test operation should succeed")
6038 .offset,
6039 written.entries[1].offset
6040 );
6041 }
6042
6043 #[test]
6044 fn resolves_ofs_delta_pack_entry() {
6045 let base = b"hello";
6046 let result = b"hello world";
6047 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6048 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6049 assert_eq!(parsed.entries.len(), 2);
6050 assert_eq!(parsed.entries[0].object.body, base);
6051 assert_eq!(parsed.entries[1].object.body, result);
6052 assert_eq!(
6053 parsed.entries[1].entry.oid,
6054 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6055 .expect("test operation should succeed")
6056 );
6057 }
6058
6059 #[test]
6060 fn resolves_ref_delta_pack_entry() {
6061 let base = b"hello";
6062 let result = b"hello world";
6063 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6064 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6065 assert_eq!(parsed.entries.len(), 2);
6066 assert_eq!(parsed.entries[0].object.body, base);
6067 assert_eq!(parsed.entries[1].object.body, result);
6068 assert_eq!(
6069 parsed.entries[1].entry.oid,
6070 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6071 .expect("test operation should succeed")
6072 );
6073 }
6074
6075 #[test]
6076 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6077 let base = b"hello";
6078 let result = b"hello world";
6079 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6080 assert!(PackFile::parse_sha1(&pack).is_err());
6081
6082 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6083 .expect("test operation should succeed");
6084 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6085 if oid == &base_oid {
6086 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6087 } else {
6088 Ok(None)
6089 }
6090 })
6091 .expect("test operation should succeed");
6092 assert_eq!(parsed.entries.len(), 1);
6093 assert_eq!(parsed.entries[0].object.body, result);
6094 assert_eq!(
6095 parsed.entries[0].entry.oid,
6096 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6097 .expect("test operation should succeed")
6098 );
6099 }
6100
6101 #[test]
6102 fn rejects_bad_pack_checksum() {
6103 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6104 let last = pack.len() - 1;
6105 pack[last] ^= 1;
6106 assert!(PackFile::parse_sha1(&pack).is_err());
6107 }
6108
6109 #[test]
6110 fn raw_pack_index_rejects_bad_pack_checksum() {
6111 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6112 let last = pack.len() - 1;
6113 pack[last] ^= 1;
6114 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6115 }
6116
6117 #[test]
6118 fn pack_index_writer_rejects_duplicate_object_ids() {
6119 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6120 .expect("test operation should succeed");
6121 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6122 .expect("test operation should succeed");
6123 let entries = vec![
6124 PackIndexEntry {
6125 oid,
6126 crc32: 1,
6127 offset: 12,
6128 },
6129 PackIndexEntry {
6130 oid,
6131 crc32: 2,
6132 offset: 24,
6133 },
6134 ];
6135 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6136 }
6137
6138 #[test]
6139 fn parses_single_entry_pack_index() {
6140 let oid = ObjectId::from_hex(
6141 ObjectFormat::Sha1,
6142 "ce013625030ba8dba906f756967f9e9ca394464a",
6143 )
6144 .expect("test operation should succeed");
6145 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6146 .expect("test operation should succeed");
6147 let index = single_entry_index(
6148 ObjectFormat::Sha1,
6149 oid,
6150 0x1234_5678,
6151 12,
6152 pack_checksum.clone(),
6153 );
6154 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6155 assert_eq!(parsed.version, 2);
6156 assert_eq!(parsed.pack_checksum, pack_checksum);
6157 assert_eq!(parsed.entries.len(), 1);
6158 assert_eq!(
6159 parsed
6160 .find(&oid)
6161 .expect("test operation should succeed")
6162 .offset,
6163 12
6164 );
6165 assert_eq!(
6166 parsed
6167 .find(&oid)
6168 .expect("test operation should succeed")
6169 .crc32,
6170 0x1234_5678
6171 );
6172 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6173 }
6174
6175 #[test]
6176 fn parses_single_entry_pack_index_v1() {
6177 let oid = ObjectId::from_hex(
6178 ObjectFormat::Sha1,
6179 "ce013625030ba8dba906f756967f9e9ca394464a",
6180 )
6181 .expect("test operation should succeed");
6182 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6183 .expect("test operation should succeed");
6184 let index =
6185 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6186 let parsed =
6187 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6188 assert_eq!(parsed.version, 1);
6189 assert_eq!(parsed.pack_checksum, pack_checksum);
6190 assert_eq!(parsed.entries.len(), 1);
6191 assert_eq!(
6192 parsed
6193 .find(&oid)
6194 .expect("test operation should succeed")
6195 .offset,
6196 0x1234_5678
6197 );
6198 assert_eq!(
6199 parsed
6200 .find(&oid)
6201 .expect("test operation should succeed")
6202 .crc32,
6203 0
6204 );
6205 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6206 }
6207
6208 #[test]
6209 fn rejects_bad_pack_index_v1_checksum() {
6210 let oid = ObjectId::from_hex(
6211 ObjectFormat::Sha1,
6212 "ce013625030ba8dba906f756967f9e9ca394464a",
6213 )
6214 .expect("test operation should succeed");
6215 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6216 .expect("test operation should succeed");
6217 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
6218 let last = index.len() - 1;
6219 index[last] ^= 1;
6220 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
6221 }
6222
6223 #[test]
6224 fn pack_index_view_reads_v2_large_offsets() {
6225 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
6226 .expect("test operation should succeed");
6227 let second =
6228 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
6229 .expect("test operation should succeed");
6230 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6231 .expect("test operation should succeed");
6232 let entries = vec![
6233 PackIndexEntry {
6234 oid: first,
6235 crc32: 0x1111_2222,
6236 offset: 0x8000_0000,
6237 },
6238 PackIndexEntry {
6239 oid: second,
6240 crc32: 0x3333_4444,
6241 offset: 0x1_0000_0042,
6242 },
6243 ];
6244 let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
6245 .expect("test operation should succeed");
6246
6247 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6248 let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
6249 .expect("test operation should succeed");
6250 for entry in entries {
6251 assert_eq!(
6252 view.find(&entry.oid),
6253 Some(PackIndexLookup {
6254 crc32: entry.crc32,
6255 offset: entry.offset,
6256 })
6257 );
6258 }
6259 }
6260
6261 #[test]
6262 fn pack_index_view_default_parse_checks_index_checksum() {
6263 let oid = ObjectId::from_hex(
6264 ObjectFormat::Sha1,
6265 "ce013625030ba8dba906f756967f9e9ca394464a",
6266 )
6267 .expect("test operation should succeed");
6268 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6269 .expect("test operation should succeed");
6270 let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
6271 let last = index.len() - 1;
6272 index[last] ^= 1;
6273
6274 assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
6275 let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
6276 .expect("test operation should succeed");
6277 let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
6278 Arc::from(index.clone().into_boxed_slice()),
6279 ObjectFormat::Sha1,
6280 )
6281 .expect("test operation should succeed");
6282 assert_eq!(
6283 view.find(&oid),
6284 Some(PackIndexLookup {
6285 crc32: 0x1234_5678,
6286 offset: 12,
6287 })
6288 );
6289 assert_eq!(
6290 trusted_view.find(&oid),
6291 Some(PackIndexLookup {
6292 crc32: 0x1234_5678,
6293 offset: 12,
6294 })
6295 );
6296 }
6297
6298 #[test]
6299 fn parses_pack_reverse_index() {
6300 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6301 .expect("test operation should succeed");
6302 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6303 .expect("test operation should succeed");
6304 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6305 .expect("test operation should succeed");
6306 assert_eq!(parsed.version, 1);
6307 assert_eq!(parsed.format, ObjectFormat::Sha1);
6308 assert_eq!(parsed.positions, vec![2, 0, 1]);
6309 assert_eq!(parsed.pack_checksum, pack_checksum);
6310 assert_eq!(
6311 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6312 .expect("test operation should succeed"),
6313 reverse_index
6314 );
6315 }
6316
6317 #[test]
6318 fn rejects_bad_pack_reverse_index_checksum() {
6319 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6320 .expect("test operation should succeed");
6321 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6322 .expect("test operation should succeed");
6323 let last = reverse_index.len() - 1;
6324 reverse_index[last] ^= 1;
6325 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6326 }
6327
6328 #[test]
6329 fn rejects_bad_pack_reverse_index_positions() {
6330 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6331 .expect("test operation should succeed");
6332 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6333 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6334 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6335 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6336 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6337 .expect("test operation should succeed");
6338 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6339 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6340 }
6341
6342 #[test]
6343 fn parses_pack_mtimes() {
6344 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6345 .expect("test operation should succeed");
6346 let mtimes = PackMtimes::write(
6347 ObjectFormat::Sha1,
6348 &[1, 1_700_000_000, u32::MAX],
6349 &pack_checksum,
6350 )
6351 .expect("test operation should succeed");
6352 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6353 .expect("test operation should succeed");
6354 assert_eq!(parsed.version, 1);
6355 assert_eq!(parsed.format, ObjectFormat::Sha1);
6356 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6357 assert_eq!(parsed.pack_checksum, pack_checksum);
6358 assert_eq!(
6359 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6360 .expect("test operation should succeed"),
6361 mtimes
6362 );
6363 }
6364
6365 #[test]
6366 fn rejects_bad_pack_mtimes_checksum() {
6367 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6368 .expect("test operation should succeed");
6369 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6370 .expect("test operation should succeed");
6371 let last = mtimes.len() - 1;
6372 mtimes[last] ^= 1;
6373 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6374 }
6375
6376 #[test]
6377 fn rejects_bad_pack_mtimes_shape() {
6378 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6379 .expect("test operation should succeed");
6380 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6381 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6382
6383 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6384 wrong_hash[11] = 2;
6385 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6386 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6387 .expect("test operation should succeed");
6388 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6389 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6390 }
6391
6392 #[test]
6393 fn parses_multi_pack_index_header_and_chunk_lookup() {
6394 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6395 .expect("test operation should succeed");
6396 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6397 .expect("test operation should succeed");
6398 let chunks = midx_chunks_with_pack_names(
6399 ObjectFormat::Sha1,
6400 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6401 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6402 );
6403 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6404 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6405 .expect("test operation should succeed");
6406 assert_eq!(parsed.version, 2);
6407 assert_eq!(parsed.format, ObjectFormat::Sha1);
6408 assert_eq!(parsed.pack_count, 2);
6409 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6410 assert_eq!(parsed.object_count, 2);
6411 assert_eq!(parsed.objects.len(), 2);
6412 assert_eq!(
6413 parsed
6414 .find(&first)
6415 .expect("test operation should succeed")
6416 .pack_int_id,
6417 0
6418 );
6419 assert_eq!(
6420 parsed
6421 .find(&first)
6422 .expect("test operation should succeed")
6423 .offset,
6424 12
6425 );
6426 assert_eq!(
6427 parsed
6428 .find(&second)
6429 .expect("test operation should succeed")
6430 .pack_int_id,
6431 1
6432 );
6433 assert_eq!(
6434 parsed
6435 .find(&second)
6436 .expect("test operation should succeed")
6437 .offset,
6438 0x1_0000_0000
6439 );
6440 assert_eq!(parsed.reverse_index, None);
6441 assert_eq!(parsed.bitmapped_packs, None);
6442 assert_eq!(parsed.chunks.len(), 5);
6443 assert_eq!(parsed.chunks[0].id, *b"PNAM");
6444 assert_eq!(parsed.chunks[0].offset, 84);
6445 assert_eq!(parsed.chunks[0].len, 24);
6446 assert_eq!(parsed.chunks[1].id, *b"OIDF");
6447 assert_eq!(parsed.chunks[1].offset, 108);
6448 assert_eq!(parsed.chunks[1].len, 1024);
6449 }
6450
6451 #[test]
6452 fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6453 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6454 .expect("test operation should succeed");
6455 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6456 .expect("test operation should succeed");
6457 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6458 .expect("test operation should succeed");
6459 let chunks = midx_chunks_with_pack_names(
6460 ObjectFormat::Sha1,
6461 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6462 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6463 );
6464 let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6465 let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6466 .expect("test operation should succeed");
6467
6468 assert!(lookup.contains(&first));
6469 assert!(lookup.contains(&second));
6470 assert!(!lookup.contains(&missing));
6471
6472 let first_entry = lookup
6473 .find(&first)
6474 .expect("test operation should succeed")
6475 .expect("object should be present");
6476 assert_eq!(lookup.pack_name(first_entry.pack_int_id), Some("pack-a.idx"));
6477 assert_eq!(first_entry.offset, 12);
6478
6479 let second_entry = lookup
6480 .find(&second)
6481 .expect("test operation should succeed")
6482 .expect("object should be present");
6483 assert_eq!(lookup.pack_name(second_entry.pack_int_id), Some("pack-b.idx"));
6484 assert_eq!(second_entry.offset, 0x1_0000_0000);
6485 assert!(
6486 lookup
6487 .find(&missing)
6488 .expect("test operation should succeed")
6489 .is_none()
6490 );
6491 }
6492
6493 #[test]
6494 fn rejects_bad_multi_pack_index_checksum() {
6495 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6496 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6497 let last = midx.len() - 1;
6498 midx[last] ^= 1;
6499 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6500 }
6501
6502 #[test]
6503 fn rejects_bad_multi_pack_index_shape() {
6504 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6505 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6506 wrong_hash[5] = 2;
6507 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6508 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6509 .expect("test operation should succeed");
6510 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6511 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6512
6513 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6514 missing_terminator[12] = b'B';
6515 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6516 let checksum =
6517 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6518 .expect("test operation should succeed");
6519 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6520 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6521
6522 let mut bad_offset = multi_pack_index(
6523 ObjectFormat::Sha1,
6524 2,
6525 0,
6526 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6527 );
6528 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6529 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6530 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6531 .expect("test operation should succeed");
6532 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6533 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6534 }
6535
6536 #[test]
6537 fn rejects_bad_multi_pack_index_pack_names() {
6538 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6539 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6540
6541 let too_few = multi_pack_index(
6542 ObjectFormat::Sha1,
6543 2,
6544 2,
6545 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6546 );
6547 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6548
6549 let bad_padding = multi_pack_index(
6550 ObjectFormat::Sha1,
6551 2,
6552 1,
6553 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6554 );
6555 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6556
6557 let unsorted_v1 = multi_pack_index(
6558 ObjectFormat::Sha1,
6559 1,
6560 2,
6561 &midx_chunks_with_pack_names(
6562 ObjectFormat::Sha1,
6563 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6564 &[],
6565 ),
6566 );
6567 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6568
6569 let unsorted_v2 = multi_pack_index(
6570 ObjectFormat::Sha1,
6571 2,
6572 2,
6573 &midx_chunks_with_pack_names(
6574 ObjectFormat::Sha1,
6575 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6576 &[],
6577 ),
6578 );
6579 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6580 .expect("test operation should succeed");
6581 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6582 }
6583
6584 #[test]
6585 fn rejects_bad_multi_pack_index_object_tables() {
6586 let oid_a = ObjectId::from_hex(
6587 ObjectFormat::Sha1,
6588 "1111111111111111111111111111111111111111",
6589 )
6590 .expect("test operation should succeed");
6591 let oid_b = ObjectId::from_hex(
6592 ObjectFormat::Sha1,
6593 "2222222222222222222222222222222222222222",
6594 )
6595 .expect("test operation should succeed");
6596
6597 let missing_oidf = multi_pack_index(
6598 ObjectFormat::Sha1,
6599 2,
6600 1,
6601 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6602 );
6603 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6604
6605 let bad_fanout = vec![
6606 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6607 (*b"OIDF", vec![0; 256 * 4]),
6608 (*b"OIDL", oid_a.as_bytes().to_vec()),
6609 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6610 ];
6611 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6612 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6613
6614 let mut unsorted = Vec::new();
6615 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6616 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6617 let mut oid_lookup = Vec::new();
6618 oid_lookup.extend_from_slice(oid_b.as_bytes());
6619 oid_lookup.extend_from_slice(oid_a.as_bytes());
6620 unsorted.push((*b"OIDL", oid_lookup));
6621 unsorted.push((
6622 *b"OOFF",
6623 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6624 ));
6625 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6626 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6627
6628 let bad_pack = multi_pack_index(
6629 ObjectFormat::Sha1,
6630 2,
6631 1,
6632 &midx_chunks_with_pack_names(
6633 ObjectFormat::Sha1,
6634 b"pack-a.idx\0\0".to_vec(),
6635 &[(oid_a.clone(), 1, 12)],
6636 ),
6637 );
6638 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6639
6640 let mut large_offsets = Vec::new();
6641 let missing_loff = vec![
6642 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6643 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6644 (*b"OIDL", oid_a.as_bytes().to_vec()),
6645 (
6646 *b"OOFF",
6647 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6648 ),
6649 ];
6650 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6651 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6652
6653 let mut bad_loff =
6654 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6655 bad_loff.push((*b"LOFF", vec![0]));
6656 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6657 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6658 }
6659
6660 #[test]
6661 fn parses_multi_pack_index_bitmap_chunks() {
6662 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6663 .expect("test operation should succeed");
6664 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6665 .expect("test operation should succeed");
6666 let mut chunks = midx_chunks_with_pack_names(
6667 ObjectFormat::Sha1,
6668 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6669 &[(first, 0, 12), (second, 1, 24)],
6670 );
6671 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6672 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6673 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6674
6675 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6676 .expect("test operation should succeed");
6677 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6678 assert_eq!(
6679 parsed.bitmapped_packs,
6680 Some(vec![
6681 MultiPackBitmapPack {
6682 bitmap_pos: 0,
6683 bitmap_nr: 1,
6684 },
6685 MultiPackBitmapPack {
6686 bitmap_pos: 1,
6687 bitmap_nr: 1,
6688 },
6689 ])
6690 );
6691 }
6692
6693 #[test]
6694 fn writes_multi_pack_index_that_round_trips() {
6695 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6696 .expect("test operation should succeed");
6697 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6698 .expect("test operation should succeed");
6699 let bytes = MultiPackIndex::write(
6700 ObjectFormat::Sha1,
6701 2,
6702 &["pack-b.idx".into(), "pack-a.idx".into()],
6703 &[
6704 MultiPackIndexEntry {
6705 oid: second.clone(),
6706 pack_int_id: 0,
6707 offset: 0x1_0000_0000,
6708 },
6709 MultiPackIndexEntry {
6710 oid: first.clone(),
6711 pack_int_id: 1,
6712 offset: 12,
6713 },
6714 ],
6715 )
6716 .expect("test operation should succeed");
6717
6718 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6719 .expect("test operation should succeed");
6720 assert_eq!(parsed.version, 2);
6721 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6722 assert_eq!(parsed.object_count, 2);
6723 assert_eq!(
6724 parsed
6725 .find(&first)
6726 .expect("test operation should succeed")
6727 .pack_int_id,
6728 1
6729 );
6730 assert_eq!(
6731 parsed
6732 .find(&first)
6733 .expect("test operation should succeed")
6734 .offset,
6735 12
6736 );
6737 assert_eq!(
6738 parsed
6739 .find(&second)
6740 .expect("test operation should succeed")
6741 .pack_int_id,
6742 0
6743 );
6744 assert_eq!(
6745 parsed
6746 .find(&second)
6747 .expect("test operation should succeed")
6748 .offset,
6749 0x1_0000_0000
6750 );
6751 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6752 }
6753
6754 #[test]
6755 fn write_multi_pack_index_rejects_invalid_inputs() {
6756 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6757 .expect("test operation should succeed");
6758 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6759 assert!(
6760 MultiPackIndex::write(
6761 ObjectFormat::Sha1,
6762 1,
6763 &["pack-b.idx".into(), "pack-a.idx".into()],
6764 &[],
6765 )
6766 .is_err()
6767 );
6768 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6769 assert!(
6770 MultiPackIndex::write(
6771 ObjectFormat::Sha1,
6772 2,
6773 &["pack-a.idx".into()],
6774 &[MultiPackIndexEntry {
6775 oid,
6776 pack_int_id: 1,
6777 offset: 12,
6778 }],
6779 )
6780 .is_err()
6781 );
6782 assert!(
6783 MultiPackIndex::write(
6784 ObjectFormat::Sha1,
6785 2,
6786 &["pack-a.idx".into()],
6787 &[
6788 MultiPackIndexEntry {
6789 oid,
6790 pack_int_id: 0,
6791 offset: 12,
6792 },
6793 MultiPackIndexEntry {
6794 oid,
6795 pack_int_id: 0,
6796 offset: 24,
6797 },
6798 ],
6799 )
6800 .is_err()
6801 );
6802 }
6803
6804 #[test]
6805 fn rejects_bad_multi_pack_index_bitmap_chunks() {
6806 let oid_a = ObjectId::from_hex(
6807 ObjectFormat::Sha1,
6808 "1111111111111111111111111111111111111111",
6809 )
6810 .expect("test operation should succeed");
6811 let oid_b = ObjectId::from_hex(
6812 ObjectFormat::Sha1,
6813 "2222222222222222222222222222222222222222",
6814 )
6815 .expect("test operation should succeed");
6816
6817 let mut duplicate_ridx = midx_chunks_with_pack_names(
6818 ObjectFormat::Sha1,
6819 b"pack-a.idx\0\0".to_vec(),
6820 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6821 );
6822 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6823 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6824 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6825
6826 let mut short_btmp = midx_chunks_with_pack_names(
6827 ObjectFormat::Sha1,
6828 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6829 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6830 );
6831 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6832 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6833 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6834
6835 let mut out_of_range_btmp = midx_chunks_with_pack_names(
6836 ObjectFormat::Sha1,
6837 b"pack-a.idx\0\0".to_vec(),
6838 &[(oid_a, 0, 12), (oid_b, 0, 24)],
6839 );
6840 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6841 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6842 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6843 }
6844
6845 #[test]
6846 fn parses_pack_bitmap_index_with_hash_cache() {
6847 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6848 .expect("test operation should succeed");
6849 let bitmap = pack_bitmap_index(
6850 ObjectFormat::Sha1,
6851 3,
6852 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6853 &pack_checksum,
6854 &[(2, 0, 1, &[0b101])],
6855 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6856 );
6857
6858 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6859 .expect("test operation should succeed");
6860 assert_eq!(parsed.version, 1);
6861 assert_eq!(parsed.format, ObjectFormat::Sha1);
6862 assert_eq!(
6863 parsed.options,
6864 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6865 );
6866 assert_eq!(parsed.pack_checksum, pack_checksum);
6867 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6868 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6869 assert_eq!(parsed.entries.len(), 1);
6870 let entry = parsed
6871 .entry_for_index_position(2)
6872 .expect("test operation should succeed");
6873 assert_eq!(entry.xor_offset, 0);
6874 assert_eq!(entry.flags, 1);
6875 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6876 assert_eq!(
6877 parsed.name_hash_cache,
6878 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6879 );
6880 }
6881
6882 #[test]
6883 fn parses_pack_bitmap_index_sha256() {
6884 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6885 .expect("test operation should succeed");
6886 let bitmap = pack_bitmap_index(
6887 ObjectFormat::Sha256,
6888 2,
6889 PackBitmapIndex::OPTION_FULL_DAG,
6890 &pack_checksum,
6891 &[(0, 0, 0, &[0b11])],
6892 None,
6893 );
6894
6895 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
6896 .expect("test operation should succeed");
6897 assert_eq!(parsed.version, 1);
6898 assert_eq!(parsed.format, ObjectFormat::Sha256);
6899 assert_eq!(parsed.pack_checksum, pack_checksum);
6900 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6901 assert_eq!(parsed.entries[0].object_position, 0);
6902 assert_eq!(parsed.name_hash_cache, None);
6903 }
6904
6905 #[test]
6906 fn parses_upstream_git_written_pack_bitmap_index() {
6907 let root = unique_temp_dir("git-pack-bitmap-upstream");
6908 fs::create_dir_all(&root).expect("test operation should succeed");
6909 {
6910 run_git_success(&root, &["init", "-q", "-b", "main"]);
6911 run_git_success(
6912 &root,
6913 &[
6914 "-c",
6915 "user.name=Example User",
6916 "-c",
6917 "user.email=example@example.invalid",
6918 "commit",
6919 "--allow-empty",
6920 "-q",
6921 "-m",
6922 "one",
6923 ],
6924 );
6925 run_git_success(
6926 &root,
6927 &[
6928 "-c",
6929 "user.name=Example User",
6930 "-c",
6931 "user.email=example@example.invalid",
6932 "commit",
6933 "--allow-empty",
6934 "-q",
6935 "-m",
6936 "two",
6937 ],
6938 );
6939 run_git_success(&root, &["repack", "-adb"]);
6940 let pack_dir = root.join(".git").join("objects").join("pack");
6941 let idx_path = single_path_with_extension(&pack_dir, "idx");
6942 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
6943 let index = PackIndex::parse(
6944 &fs::read(idx_path).expect("test operation should succeed"),
6945 ObjectFormat::Sha1,
6946 )
6947 .expect("test operation should succeed");
6948 let bitmap = PackBitmapIndex::parse(
6949 &fs::read(bitmap_path).expect("test operation should succeed"),
6950 ObjectFormat::Sha1,
6951 index.entries.len(),
6952 )
6953 .expect("test operation should succeed");
6954 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
6955 assert!(!bitmap.entries.is_empty());
6956 };
6957 let _ = fs::remove_dir_all(&root);
6958 }
6959
6960 #[test]
6961 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
6962 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6963 .expect("test operation should succeed");
6964 let bitmap = pack_bitmap_index(
6965 ObjectFormat::Sha1,
6966 1,
6967 PackBitmapIndex::OPTION_FULL_DAG,
6968 &pack_checksum,
6969 &[(0, 0, 0, &[1])],
6970 None,
6971 );
6972
6973 let mut bad_signature = bitmap.clone();
6974 bad_signature[0] = b'X';
6975 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
6976
6977 let mut bad_version = bitmap.clone();
6978 bad_version[5] = 2;
6979 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
6980 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
6981
6982 let mut bad_option = bitmap.clone();
6983 bad_option[7] = 0x20;
6984 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
6985 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
6986
6987 let mut bad_checksum = bitmap;
6988 let last = bad_checksum.len() - 1;
6989 bad_checksum[last] ^= 1;
6990 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
6991 }
6992
6993 #[test]
6994 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
6995 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6996 .expect("test operation should succeed");
6997 let bitmap = pack_bitmap_index(
6998 ObjectFormat::Sha1,
6999 2,
7000 PackBitmapIndex::OPTION_FULL_DAG,
7001 &pack_checksum,
7002 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7003 None,
7004 );
7005
7006 let mut truncated = bitmap.clone();
7007 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7008 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7009 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7010
7011 let mut out_of_range_position = pack_bitmap_index(
7012 ObjectFormat::Sha1,
7013 2,
7014 PackBitmapIndex::OPTION_FULL_DAG,
7015 &pack_checksum,
7016 &[(2, 0, 0, &[0b01])],
7017 None,
7018 );
7019 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7020 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7021 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7022
7023 let invalid_xor = pack_bitmap_index(
7024 ObjectFormat::Sha1,
7025 2,
7026 PackBitmapIndex::OPTION_FULL_DAG,
7027 &pack_checksum,
7028 &[(0, 1, 0, &[0b01])],
7029 None,
7030 );
7031 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7032 }
7033
7034 #[test]
7035 fn parses_single_entry_pack_index_sha256() {
7036 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7037 .expect("test operation should succeed");
7038 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7039 .expect("test operation should succeed");
7040 let index = single_entry_index(
7041 ObjectFormat::Sha256,
7042 oid,
7043 0x1234_5678,
7044 12,
7045 pack_checksum.clone(),
7046 );
7047 let parsed =
7048 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7049 assert_eq!(parsed.version, 2);
7050 assert_eq!(parsed.pack_checksum, pack_checksum);
7051 assert_eq!(parsed.entries.len(), 1);
7052 assert_eq!(
7053 parsed
7054 .find(&oid)
7055 .expect("test operation should succeed")
7056 .offset,
7057 12
7058 );
7059 assert_eq!(
7060 parsed
7061 .find(&oid)
7062 .expect("test operation should succeed")
7063 .crc32,
7064 0x1234_5678
7065 );
7066 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7067 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7068 }
7069
7070 #[test]
7071 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7072 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7073 }
7074
7075 #[test]
7076 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7077 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7078 }
7079
7080 #[test]
7081 fn write_packed_rejects_duplicate_objects() {
7082 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7083 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7084 }
7085
7086 #[test]
7087 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7088 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7089 let sha1 = object
7090 .object_id(ObjectFormat::Sha1)
7091 .expect("test operation should succeed");
7092 let sha256 = object
7093 .object_id(ObjectFormat::Sha256)
7094 .expect("test operation should succeed");
7095 let duplicate = [
7096 PackInput {
7097 oid: &sha1,
7098 object: &object,
7099 },
7100 PackInput {
7101 oid: &sha1,
7102 object: &object,
7103 },
7104 ];
7105 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7106
7107 let wrong_format = [PackInput {
7108 oid: &sha256,
7109 object: &object,
7110 }];
7111 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7112 }
7113
7114 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7115 let objects = similar_blob_family(8);
7116 let packed =
7117 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7118 let undeltified =
7119 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7120
7121 assert!(
7124 packed.pack.len() < undeltified.pack.len(),
7125 "expected delta pack ({}) smaller than undeltified pack ({})",
7126 packed.pack.len(),
7127 undeltified.pack.len()
7128 );
7129
7130 let kinds = pack_entry_kinds(&packed.pack, format);
7132 let delta_count = kinds
7133 .iter()
7134 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7135 .count();
7136 assert!(
7137 delta_count >= 1,
7138 "expected at least one delta entry, found kinds {kinds:?}"
7139 );
7140
7141 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7143 assert_eq!(parsed.entries.len(), objects.len());
7144 for object in &objects {
7145 let oid = object
7146 .object_id(format)
7147 .expect("test operation should succeed");
7148 let found = parsed
7149 .entries
7150 .iter()
7151 .find(|entry| entry.entry.oid == oid)
7152 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
7153 assert_eq!(&found.object, object, "object {oid} did not round-trip");
7154 }
7155
7156 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
7158 assert_eq!(index.pack_checksum, packed.checksum);
7159 for object in &objects {
7160 let oid = object
7161 .object_id(format)
7162 .expect("test operation should succeed");
7163 assert!(index.find(&oid).is_some(), "index missing {oid}");
7164 }
7165 }
7166
7167 #[test]
7168 fn write_packed_emits_ofs_delta_by_default() {
7169 let objects = similar_blob_family(6);
7170 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7171 .expect("test operation should succeed");
7172 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7173 assert!(
7174 kinds.contains(&PackObjectKind::OfsDelta),
7175 "expected an ofs-delta entry by default, found {kinds:?}"
7176 );
7177 assert!(
7178 !kinds.contains(&PackObjectKind::RefDelta),
7179 "default self-contained pack must not use ref-delta, found {kinds:?}"
7180 );
7181 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
7183 }
7184
7185 #[test]
7186 fn write_packed_can_emit_ref_delta() {
7187 let objects = similar_blob_family(6);
7188 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
7189 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7190 .expect("test operation should succeed");
7191 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7192 assert!(
7193 kinds.contains(&PackObjectKind::RefDelta),
7194 "expected a ref-delta entry, found {kinds:?}"
7195 );
7196 assert!(
7197 !kinds.contains(&PackObjectKind::OfsDelta),
7198 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
7199 );
7200
7201 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
7204 .expect("test operation should succeed");
7205 assert_eq!(parsed.entries.len(), objects.len());
7206 }
7207
7208 #[test]
7209 fn write_packed_bounds_delta_chain_depth() {
7210 let objects = incremental_blob_chain(20);
7214 let format = ObjectFormat::Sha1;
7215
7216 for max_depth in [1usize, 2, 5] {
7217 let options = PackWriteOptions::new()
7218 .with_window(20)
7219 .with_depth(max_depth);
7220 let packed = PackFile::write_packed_with_options(&objects, format, &options)
7221 .expect("test operation should succeed");
7222
7223 let depths = pack_entry_depths(&packed.pack, format);
7224 let observed = depths.iter().copied().max().unwrap_or(0);
7225 assert!(
7226 observed <= max_depth,
7227 "max chain depth {observed} exceeded bound {max_depth}"
7228 );
7229
7230 let parsed =
7232 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7233 for object in &objects {
7234 let oid = object
7235 .object_id(format)
7236 .expect("test operation should succeed");
7237 let found = parsed
7238 .entries
7239 .iter()
7240 .find(|entry| entry.entry.oid == oid)
7241 .expect("test operation should succeed");
7242 assert_eq!(&found.object, object);
7243 }
7244 }
7245 }
7246
7247 #[test]
7248 fn write_packed_depth_zero_stores_everything_undeltified() {
7249 let objects = similar_blob_family(5);
7250 let options = PackWriteOptions::new().with_depth(0);
7251 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7252 .expect("test operation should succeed");
7253 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7254 assert!(
7255 kinds
7256 .iter()
7257 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
7258 "depth 0 must disable deltas, found {kinds:?}"
7259 );
7260 }
7261
7262 #[test]
7263 fn write_thin_uses_external_base_and_round_trips_sha1() {
7264 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
7265 }
7266
7267 #[test]
7268 fn write_thin_uses_external_base_and_round_trips_sha256() {
7269 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
7270 }
7271
7272 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
7273 let base = blob_with_marker("EXTERNAL-BASE");
7276 let target = blob_with_marker("EXTERNAL-TARGET");
7277 let base_oid = base
7278 .object_id(format)
7279 .expect("test operation should succeed");
7280
7281 let mut external = HashMap::new();
7282 external.insert(base_oid, base.clone());
7283 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
7284 .expect("test operation should succeed");
7285
7286 let kinds = pack_entry_kinds(&packed.pack, format);
7288 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7289
7290 let mut offset = 12usize;
7292 let header =
7293 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7294 assert_eq!(header.kind, PackObjectKind::RefDelta);
7295 let referenced =
7296 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7297 .expect("test operation should succeed");
7298 assert_eq!(referenced, base_oid);
7299
7300 assert!(PackFile::parse(&packed.pack, format).is_err());
7302
7303 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7305 if oid == &base_oid {
7306 Ok(Some(base.clone()))
7307 } else {
7308 Ok(None)
7309 }
7310 })
7311 .expect("test operation should succeed");
7312 assert_eq!(parsed.entries.len(), 1);
7313 assert_eq!(parsed.entries[0].object, target);
7314 }
7315
7316 #[test]
7317 fn write_packed_preserves_distinct_objects_with_no_similarity() {
7318 let objects = vec![
7321 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7322 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7323 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7324 ];
7325 let format = ObjectFormat::Sha1;
7326 let packed =
7327 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7328 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7329 assert_eq!(parsed.entries.len(), objects.len());
7330 for object in &objects {
7331 let oid = object
7332 .object_id(format)
7333 .expect("test operation should succeed");
7334 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7335 }
7336 }
7337
7338 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7342 let mut common_head = Vec::new();
7343 for _ in 0..200 {
7344 common_head.extend_from_slice(b"shared header line for delta testing\n");
7345 }
7346 let mut common_tail = Vec::new();
7347 for _ in 0..200 {
7348 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7349 }
7350 (0..count)
7351 .map(|idx| {
7352 let mut body = common_head.clone();
7353 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7354 body.extend_from_slice(&common_tail);
7355 EncodedObject::new(ObjectType::Blob, body)
7356 })
7357 .collect()
7358 }
7359
7360 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7363 let mut body = Vec::new();
7364 for _ in 0..100 {
7365 body.extend_from_slice(b"baseline content shared across the whole chain\n");
7366 }
7367 let mut objects = Vec::with_capacity(count);
7368 for idx in 0..count {
7369 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7370 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7371 }
7372 objects
7373 }
7374
7375 fn blob_with_marker(marker: &str) -> EncodedObject {
7376 let mut body = Vec::new();
7377 for _ in 0..150 {
7378 body.extend_from_slice(b"common body shared between base and target\n");
7379 }
7380 body.extend_from_slice(marker.as_bytes());
7381 body.push(b'\n');
7382 for _ in 0..150 {
7383 body.extend_from_slice(b"more common body shared between objects\n");
7384 }
7385 EncodedObject::new(ObjectType::Blob, body)
7386 }
7387
7388 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7390 pack_entry_descriptors(pack, format)
7391 .into_iter()
7392 .map(|descriptor| descriptor.kind)
7393 .collect()
7394 }
7395
7396 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7400 let descriptors = pack_entry_descriptors(pack, format);
7401 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7402 let mut depths = Vec::with_capacity(descriptors.len());
7403 for descriptor in &descriptors {
7404 let depth = match &descriptor.base {
7405 EntryBase::None => 0,
7406 EntryBase::Offset(base_offset) => {
7407 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7408 }
7409 EntryBase::Ref => 1,
7413 };
7414 depth_by_offset.insert(descriptor.offset, depth);
7415 depths.push(depth);
7416 }
7417 depths
7418 }
7419
7420 struct EntryDescriptor {
7421 offset: u64,
7422 kind: PackObjectKind,
7423 base: EntryBase,
7424 }
7425
7426 enum EntryBase {
7427 None,
7428 Offset(u64),
7429 Ref,
7430 }
7431
7432 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7433 let trailer_offset = pack.len() - format.raw_len();
7434 let count = u32_be(&pack[8..12]) as usize;
7435 let mut offset = 12usize;
7436 let mut descriptors = Vec::with_capacity(count);
7437 for _ in 0..count {
7438 let entry_offset = offset as u64;
7439 let header =
7440 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7441 let base = match header.kind {
7442 PackObjectKind::OfsDelta => {
7443 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7444 .expect("test operation should succeed");
7445 EntryBase::Offset(base_offset)
7446 }
7447 PackObjectKind::RefDelta => {
7448 offset += format.raw_len();
7449 EntryBase::Ref
7450 }
7451 _ => EntryBase::None,
7452 };
7453 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7454 let mut body = Vec::new();
7455 decoder
7456 .read_to_end(&mut body)
7457 .expect("test operation should succeed");
7458 offset += decoder.total_in() as usize;
7459 descriptors.push(EntryDescriptor {
7460 offset: entry_offset,
7461 kind: header.kind,
7462 base,
7463 });
7464 }
7465 descriptors
7466 }
7467
7468 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7469 let mut base = Vec::new();
7470 for _ in 0..300 {
7471 base.extend_from_slice(b"common payload\n");
7472 }
7473 base.extend_from_slice(b"base\n");
7474 let mut changed = Vec::new();
7475 for _ in 0..300 {
7476 changed.extend_from_slice(b"common payload\n");
7477 }
7478 changed.extend_from_slice(b"changed\n");
7479 (
7480 EncodedObject::new(ObjectType::Blob, base),
7481 EncodedObject::new(ObjectType::Blob, changed),
7482 )
7483 }
7484
7485 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7486 let mut pack = Vec::new();
7487 pack.extend_from_slice(b"PACK");
7488 pack.extend_from_slice(&2u32.to_be_bytes());
7489 pack.extend_from_slice(&1u32.to_be_bytes());
7490 write_entry_header(&mut pack, object_type, body.len() as u64);
7491 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7492 encoder
7493 .write_all(body)
7494 .expect("test operation should succeed");
7495 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7496 let checksum =
7497 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7498 pack.extend_from_slice(checksum.as_bytes());
7499 pack
7500 }
7501
7502 #[derive(Clone, Copy, Debug)]
7503 enum DeltaKind {
7504 Offset,
7505 Ref,
7506 }
7507
7508 fn two_object_delta_pack(
7509 format: ObjectFormat,
7510 base: &[u8],
7511 result: &[u8],
7512 delta_kind: DeltaKind,
7513 ) -> Vec<u8> {
7514 let mut pack = Vec::new();
7515 pack.extend_from_slice(b"PACK");
7516 pack.extend_from_slice(&2u32.to_be_bytes());
7517 pack.extend_from_slice(&2u32.to_be_bytes());
7518
7519 let base_offset = pack.len();
7520 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7521 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7522 encoder
7523 .write_all(base)
7524 .expect("test operation should succeed");
7525 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7526
7527 let delta = append_suffix_delta(base, result);
7528 let delta_offset = pack.len();
7529 write_pack_entry_header_kind(
7530 &mut pack,
7531 match delta_kind {
7532 DeltaKind::Offset => 6,
7533 DeltaKind::Ref => 7,
7534 },
7535 delta.len() as u64,
7536 );
7537 match delta_kind {
7538 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7539 DeltaKind::Ref => {
7540 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7541 .expect("test operation should succeed");
7542 pack.extend_from_slice(base_oid.as_bytes());
7543 }
7544 }
7545 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7546 encoder
7547 .write_all(&delta)
7548 .expect("test operation should succeed");
7549 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7550
7551 let checksum =
7552 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7553 pack.extend_from_slice(checksum.as_bytes());
7554 pack
7555 }
7556
7557 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7558 let mut pack = Vec::new();
7559 pack.extend_from_slice(b"PACK");
7560 pack.extend_from_slice(&2u32.to_be_bytes());
7561 pack.extend_from_slice(&1u32.to_be_bytes());
7562
7563 let delta = append_suffix_delta(base, result);
7564 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7565 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7566 .expect("test operation should succeed");
7567 pack.extend_from_slice(base_oid.as_bytes());
7568 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7569 encoder
7570 .write_all(&delta)
7571 .expect("test operation should succeed");
7572 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7573
7574 let checksum =
7575 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7576 pack.extend_from_slice(checksum.as_bytes());
7577 pack
7578 }
7579
7580 fn unique_temp_dir(name: &str) -> PathBuf {
7581 let nanos = SystemTime::now()
7582 .duration_since(UNIX_EPOCH)
7583 .expect("test operation should succeed")
7584 .as_nanos();
7585 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7586 }
7587
7588 fn run_git_success(cwd: &Path, args: &[&str]) {
7589 let output = Command::new("git")
7590 .current_dir(cwd)
7591 .args(args)
7592 .output()
7593 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7594 assert!(
7595 output.status.success(),
7596 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7597 output.status.code(),
7598 String::from_utf8_lossy(&output.stdout),
7599 String::from_utf8_lossy(&output.stderr)
7600 );
7601 }
7602
7603 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7604 let mut paths = fs::read_dir(dir)
7605 .expect("test operation should succeed")
7606 .map(|entry| entry.expect("test operation should succeed").path())
7607 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7608 .collect::<Vec<_>>();
7609 assert_eq!(paths.len(), 1, "expected one .{extension} file");
7610 paths.remove(0)
7611 }
7612
7613 fn pack_bitmap_index(
7614 format: ObjectFormat,
7615 object_count: u32,
7616 options: u16,
7617 pack_checksum: &ObjectId,
7618 entries: &[(u32, u8, u8, &[u64])],
7619 name_hash_cache: Option<&[u32]>,
7620 ) -> Vec<u8> {
7621 let mut out = Vec::new();
7622 out.extend_from_slice(b"BITM");
7623 out.extend_from_slice(&1u16.to_be_bytes());
7624 out.extend_from_slice(&options.to_be_bytes());
7625 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7626 out.extend_from_slice(pack_checksum.as_bytes());
7627 write_test_ewah(&mut out, object_count, &[0b001]);
7628 write_test_ewah(&mut out, object_count, &[0b010]);
7629 write_test_ewah(&mut out, object_count, &[0b100]);
7630 write_test_ewah(&mut out, object_count, &[0]);
7631 for (position, xor_offset, flags, words) in entries {
7632 out.extend_from_slice(&position.to_be_bytes());
7633 out.push(*xor_offset);
7634 out.push(*flags);
7635 write_test_ewah(&mut out, object_count, words);
7636 }
7637 if let Some(cache) = name_hash_cache {
7638 for value in cache {
7639 out.extend_from_slice(&value.to_be_bytes());
7640 }
7641 }
7642 let checksum =
7643 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7644 out.extend_from_slice(checksum.as_bytes());
7645 out
7646 }
7647
7648 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7649 out.extend_from_slice(&bit_size.to_be_bytes());
7650 let words = ewah_literal_words(literals);
7651 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7652 for word in words {
7653 out.extend_from_slice(&word.to_be_bytes());
7654 }
7655 out.extend_from_slice(&0u32.to_be_bytes());
7656 }
7657
7658 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7659 let rlw = (literals.len() as u64) << 33;
7660 let mut words = vec![rlw];
7661 words.extend_from_slice(literals);
7662 words
7663 }
7664
7665 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7666 let checksum_offset = bytes.len() - format.raw_len();
7667 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7668 .expect("test operation should succeed");
7669 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7670 }
7671
7672 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7673 assert!(result.starts_with(base));
7674 let suffix = &result[base.len()..];
7675 assert!(base.len() < 0x10000);
7676 assert!(suffix.len() < 0x80);
7677 let mut delta = Vec::new();
7678 write_delta_varint(&mut delta, base.len() as u64);
7679 write_delta_varint(&mut delta, result.len() as u64);
7680 delta.push(0x90);
7681 delta.push(base.len() as u8);
7682 delta.push(suffix.len() as u8);
7683 delta.extend_from_slice(suffix);
7684 delta
7685 }
7686
7687 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7688 loop {
7689 let mut byte = (value as u8) & 0x7f;
7690 value >>= 7;
7691 if value != 0 {
7692 byte |= 0x80;
7693 }
7694 out.push(byte);
7695 if value == 0 {
7696 break;
7697 }
7698 }
7699 }
7700
7701 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7702 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7703 size >>= 4;
7704 if size != 0 {
7705 byte |= 0x80;
7706 }
7707 out.push(byte);
7708 while size != 0 {
7709 let mut byte = (size as u8) & 0x7f;
7710 size >>= 7;
7711 if size != 0 {
7712 byte |= 0x80;
7713 }
7714 out.push(byte);
7715 }
7716 }
7717
7718 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7719 assert!(relative < 0x80);
7720 out.push(relative as u8);
7721 }
7722
7723 fn single_entry_index(
7724 format: ObjectFormat,
7725 oid: ObjectId,
7726 crc32: u32,
7727 offset: u32,
7728 pack_checksum: ObjectId,
7729 ) -> Vec<u8> {
7730 let mut index = Vec::new();
7731 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7732 index.extend_from_slice(&2u32.to_be_bytes());
7733 for idx in 0..256 {
7734 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7735 1u32
7736 } else {
7737 0u32
7738 };
7739 index.extend_from_slice(&count.to_be_bytes());
7740 }
7741 index.extend_from_slice(oid.as_bytes());
7742 index.extend_from_slice(&crc32.to_be_bytes());
7743 index.extend_from_slice(&offset.to_be_bytes());
7744 index.extend_from_slice(pack_checksum.as_bytes());
7745 let checksum =
7746 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7747 index.extend_from_slice(checksum.as_bytes());
7748 index
7749 }
7750
7751 fn single_entry_index_v1(
7752 format: ObjectFormat,
7753 oid: ObjectId,
7754 offset: u32,
7755 pack_checksum: ObjectId,
7756 ) -> Vec<u8> {
7757 let mut index = Vec::new();
7758 for idx in 0..256 {
7759 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7760 1u32
7761 } else {
7762 0u32
7763 };
7764 index.extend_from_slice(&count.to_be_bytes());
7765 }
7766 index.extend_from_slice(&offset.to_be_bytes());
7767 index.extend_from_slice(oid.as_bytes());
7768 index.extend_from_slice(pack_checksum.as_bytes());
7769 let checksum =
7770 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7771 index.extend_from_slice(checksum.as_bytes());
7772 index
7773 }
7774
7775 fn pack_reverse_index(
7776 format: ObjectFormat,
7777 positions: &[u32],
7778 pack_checksum: ObjectId,
7779 ) -> Vec<u8> {
7780 let mut reverse_index = Vec::new();
7781 reverse_index.extend_from_slice(b"RIDX");
7782 reverse_index.extend_from_slice(&1u32.to_be_bytes());
7783 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7784 for position in positions {
7785 reverse_index.extend_from_slice(&position.to_be_bytes());
7786 }
7787 reverse_index.extend_from_slice(pack_checksum.as_bytes());
7788 let checksum =
7789 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7790 reverse_index.extend_from_slice(checksum.as_bytes());
7791 reverse_index
7792 }
7793
7794 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7795 let mut out = Vec::new();
7796 out.extend_from_slice(b"MTME");
7797 out.extend_from_slice(&1u32.to_be_bytes());
7798 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7799 for mtime in mtimes {
7800 out.extend_from_slice(&mtime.to_be_bytes());
7801 }
7802 out.extend_from_slice(pack_checksum.as_bytes());
7803 let checksum =
7804 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7805 out.extend_from_slice(checksum.as_bytes());
7806 out
7807 }
7808
7809 fn midx_chunks_with_pack_names(
7810 _format: ObjectFormat,
7811 pack_names: Vec<u8>,
7812 entries: &[(ObjectId, u32, u64)],
7813 ) -> Vec<([u8; 4], Vec<u8>)> {
7814 let mut entries = entries.to_vec();
7815 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7816 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7817 let mut large_offsets = Vec::new();
7818 let mut chunks = vec![
7819 (*b"PNAM", pack_names),
7820 (*b"OIDF", midx_oid_fanout(&object_ids)),
7821 (*b"OIDL", midx_oid_lookup(&object_ids)),
7822 (
7823 *b"OOFF",
7824 midx_ooff_entries(
7825 &entries
7826 .iter()
7827 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7828 .collect::<Vec<_>>(),
7829 &mut large_offsets,
7830 ),
7831 ),
7832 ];
7833 if !large_offsets.is_empty() {
7834 chunks.push((*b"LOFF", large_offsets));
7835 }
7836 chunks
7837 }
7838
7839 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7840 let mut counts = [0u32; 256];
7841 for oid in object_ids {
7842 counts[oid.as_bytes()[0] as usize] += 1;
7843 }
7844 let mut running = 0u32;
7845 let mut out = Vec::new();
7846 for count in counts {
7847 running += count;
7848 out.extend_from_slice(&running.to_be_bytes());
7849 }
7850 out
7851 }
7852
7853 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7854 let mut out = Vec::new();
7855 for oid in object_ids {
7856 out.extend_from_slice(oid.as_bytes());
7857 }
7858 out
7859 }
7860
7861 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7862 let mut out = Vec::new();
7863 for (pack_int_id, offset) in entries {
7864 out.extend_from_slice(&pack_int_id.to_be_bytes());
7865 if *offset < 0x8000_0000 {
7866 out.extend_from_slice(&(*offset as u32).to_be_bytes());
7867 } else {
7868 let large_idx = (large_offsets.len() / 8) as u32;
7869 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7870 large_offsets.extend_from_slice(&offset.to_be_bytes());
7871 }
7872 }
7873 out
7874 }
7875
7876 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7877 let mut out = Vec::new();
7878 for value in values {
7879 out.extend_from_slice(&value.to_be_bytes());
7880 }
7881 out
7882 }
7883
7884 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
7885 let mut out = Vec::new();
7886 for (bitmap_pos, bitmap_nr) in entries {
7887 out.extend_from_slice(&bitmap_pos.to_be_bytes());
7888 out.extend_from_slice(&bitmap_nr.to_be_bytes());
7889 }
7890 out
7891 }
7892
7893 fn multi_pack_index(
7894 format: ObjectFormat,
7895 version: u8,
7896 pack_count: u32,
7897 chunks: &[([u8; 4], Vec<u8>)],
7898 ) -> Vec<u8> {
7899 let lookup_len = (chunks.len() + 1) * 12;
7900 let mut out = Vec::new();
7901 out.extend_from_slice(b"MIDX");
7902 out.push(version);
7903 out.push(hash_function_id(format) as u8);
7904 out.push(chunks.len() as u8);
7905 out.push(0);
7906 out.extend_from_slice(&pack_count.to_be_bytes());
7907 let mut chunk_offset = (12 + lookup_len) as u64;
7908 for (id, data) in chunks {
7909 out.extend_from_slice(id);
7910 out.extend_from_slice(&chunk_offset.to_be_bytes());
7911 chunk_offset += data.len() as u64;
7912 }
7913 out.extend_from_slice(&[0, 0, 0, 0]);
7914 out.extend_from_slice(&chunk_offset.to_be_bytes());
7915 for (_id, data) in chunks {
7916 out.extend_from_slice(data);
7917 }
7918 let checksum =
7919 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7920 out.extend_from_slice(checksum.as_bytes());
7921 out
7922 }
7923
7924 fn pack_checksum_sha1() -> ObjectId {
7927 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
7928 }
7929
7930 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
7931 let mut offset = 0usize;
7934 let checksum_offset = bytes.len();
7935 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
7936 .expect("test operation should succeed")
7937 }
7938
7939 #[test]
7940 fn ewah_encodes_single_literal_word_matching_helper() {
7941 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
7945 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
7946 assert_eq!(ewah.rlw_position, 0);
7947 assert_eq!(ewah.bit_size, 64);
7948 }
7949
7950 #[test]
7951 fn ewah_byte_layout_is_big_endian() {
7952 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
7953 .expect("test operation should succeed");
7954 let bytes = ewah.to_bytes();
7955 let mut expected = Vec::new();
7956 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
7960 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
7962 }
7963
7964 #[test]
7965 fn ewah_empty_bitmap_serialises_like_git() {
7966 let ewah = EwahBitmap::empty();
7967 let bytes = ewah.to_bytes();
7968 assert_eq!(bytes, vec![0u8; 12]);
7970 let parsed = parse_ewah_bytes(&bytes);
7972 assert_eq!(parsed, ewah);
7973 assert!(
7974 parsed
7975 .to_positions()
7976 .expect("test operation should succeed")
7977 .is_empty()
7978 );
7979 }
7980
7981 #[test]
7982 fn ewah_compresses_clean_zero_run() {
7983 let ewah =
7986 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
7987 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
7988 let rlw = ewah.words[0];
7989 assert_eq!(rlw & 1, 0, "run bit should be zero");
7990 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7991 assert_eq!(rlw >> 33, 1, "literal length should be 1");
7992 assert_eq!(ewah.words[1], 0b1);
7993 }
7994
7995 #[test]
7996 fn ewah_compresses_clean_ones_run() {
7997 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
7998 .expect("test operation should succeed");
7999 assert_eq!(ewah.words.len(), 1);
8001 let rlw = ewah.words[0];
8002 assert_eq!(rlw & 1, 1, "run bit should be one");
8003 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8004 assert_eq!(rlw >> 33, 0, "no literals");
8005 }
8006
8007 #[test]
8008 fn ewah_run_then_literal_then_run_roundtrips() {
8009 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8010 let bit_size = (words.len() * 64) as u32;
8011 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8012 assert_eq!(
8013 ewah.to_words().expect("test operation should succeed"),
8014 words
8015 );
8016 }
8017
8018 #[test]
8019 fn ewah_drops_trailing_clean_zero_words() {
8020 let words = vec![0b1, 0, 0, 0];
8023 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8024 assert_eq!(ewah.bit_size, 1);
8026 assert_eq!(
8027 ewah.to_words().expect("test operation should succeed"),
8028 vec![0b1]
8029 );
8030 }
8031
8032 #[test]
8033 fn ewah_from_positions_roundtrips_via_positions() {
8034 let positions = [0u32, 1, 63, 64, 65, 200, 511];
8035 let ewah =
8036 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8037 let mut decoded = ewah.to_positions().expect("test operation should succeed");
8038 decoded.sort_unstable();
8039 assert_eq!(decoded, positions);
8040 }
8041
8042 #[test]
8043 fn ewah_from_positions_dedupes_and_orders() {
8044 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8045 .expect("test operation should succeed");
8046 assert_eq!(
8047 ewah.to_positions().expect("test operation should succeed"),
8048 vec![5, 100]
8049 );
8050 }
8051
8052 #[test]
8053 fn ewah_huge_zero_run_spans_multiple_rlws() {
8054 let mut builder = EwahBuilder::new(0);
8059 builder.add_empty_words(false, 0xffff_ffff);
8060 builder.add_empty_words(false, 5);
8061 let ewah = builder.finish().expect("test operation should succeed");
8062 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8063 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8064 assert_eq!(ewah.words[1] & 1, 0);
8065 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8066 assert_eq!(ewah.rlw_position, 1);
8067 }
8068
8069 #[test]
8070 fn ewah_from_words_rejects_oversized_bit_size() {
8071 assert!(EwahBitmap::from_words(65, &[0]).is_err());
8073 }
8074
8075 #[test]
8076 fn ewah_from_positions_rejects_out_of_range() {
8077 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8078 }
8079
8080 #[test]
8081 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8082 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8085 let bit_size = (words.len() * 64) as u32;
8086 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8087 let bytes = ewah.to_bytes();
8088 let parsed = parse_ewah_bytes(&bytes);
8089 assert_eq!(parsed, ewah);
8090 assert_eq!(
8091 parsed.to_words().expect("test operation should succeed"),
8092 words
8093 );
8094 }
8095
8096 #[test]
8097 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8098 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8100 let bytes = write_bitmap(
8101 ObjectFormat::Sha1,
8102 pack_checksum_sha1(),
8103 &object_types,
8104 &[(0u32, 0u32, vec![1u32, 2u32])],
8105 None,
8106 )
8107 .expect("test operation should succeed");
8108 assert_eq!(&bytes[..4], b"BITM");
8109
8110 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8111 .expect("test operation should succeed");
8112 assert_eq!(parsed.version, 1);
8113 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8114 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8115 assert_eq!(
8116 parsed
8117 .type_bitmaps
8118 .commits
8119 .to_positions()
8120 .expect("test operation should succeed"),
8121 vec![0]
8122 );
8123 assert_eq!(
8124 parsed
8125 .type_bitmaps
8126 .trees
8127 .to_positions()
8128 .expect("test operation should succeed"),
8129 vec![1]
8130 );
8131 assert_eq!(
8132 parsed
8133 .type_bitmaps
8134 .blobs
8135 .to_positions()
8136 .expect("test operation should succeed"),
8137 vec![2]
8138 );
8139 assert!(
8140 parsed
8141 .type_bitmaps
8142 .tags
8143 .to_positions()
8144 .expect("test operation should succeed")
8145 .is_empty()
8146 );
8147 assert_eq!(parsed.entries.len(), 1);
8148 let entry = parsed
8149 .entry_for_index_position(0)
8150 .expect("test operation should succeed");
8151 assert_eq!(entry.xor_offset, 0);
8152 assert_eq!(entry.flags, 0);
8153 assert_eq!(
8154 entry
8155 .bitmap
8156 .to_positions()
8157 .expect("test operation should succeed"),
8158 vec![0, 1, 2]
8159 );
8160 assert_eq!(parsed.name_hash_cache, None);
8161 }
8162
8163 #[test]
8164 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
8165 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8166 .expect("test operation should succeed");
8167 let object_types = [ObjectType::Commit, ObjectType::Tree];
8168 let bytes = write_bitmap(
8169 ObjectFormat::Sha256,
8170 pack_checksum.clone(),
8171 &object_types,
8172 &[(0u32, 0u32, vec![1u32])],
8173 None,
8174 )
8175 .expect("test operation should succeed");
8176 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
8177 .expect("test operation should succeed");
8178 assert_eq!(parsed.format, ObjectFormat::Sha256);
8179 assert_eq!(parsed.pack_checksum, pack_checksum);
8180 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8181 assert_eq!(
8182 parsed.entries[0]
8183 .bitmap
8184 .to_positions()
8185 .expect("test operation should succeed"),
8186 vec![0, 1]
8187 );
8188 }
8189
8190 #[test]
8191 fn pack_bitmap_index_write_includes_name_hash_cache() {
8192 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8193 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
8194 let bytes = write_bitmap(
8195 ObjectFormat::Sha1,
8196 pack_checksum_sha1(),
8197 &object_types,
8198 &[(0u32, 0u32, vec![1u32, 2u32])],
8199 Some(cache.clone()),
8200 )
8201 .expect("test operation should succeed");
8202 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8203 .expect("test operation should succeed");
8204 assert_eq!(
8205 parsed.options,
8206 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8207 );
8208 assert_eq!(parsed.name_hash_cache, Some(cache));
8209 }
8210
8211 #[test]
8212 fn pack_bitmap_writer_supports_multiple_commits() {
8213 let object_types = [
8214 ObjectType::Commit,
8215 ObjectType::Commit,
8216 ObjectType::Tree,
8217 ObjectType::Blob,
8218 ];
8219 let mut writer =
8220 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8221 .expect("test operation should succeed");
8222 writer
8223 .add_commit(0, 0, &[2, 3])
8224 .expect("test operation should succeed");
8225 writer
8226 .add_commit(1, 1, &[2])
8227 .expect("test operation should succeed");
8228 let bytes = writer.write().expect("test operation should succeed");
8229 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
8230 .expect("test operation should succeed");
8231 assert_eq!(parsed.entries.len(), 2);
8232 assert_eq!(
8233 parsed
8234 .type_bitmaps
8235 .commits
8236 .to_positions()
8237 .expect("test operation should succeed"),
8238 vec![0, 1]
8239 );
8240 let first = parsed
8241 .entry_for_index_position(0)
8242 .expect("test operation should succeed");
8243 assert_eq!(
8244 first
8245 .bitmap
8246 .to_positions()
8247 .expect("test operation should succeed"),
8248 vec![0, 2, 3]
8249 );
8250 let second = parsed
8251 .entry_for_index_position(1)
8252 .expect("test operation should succeed");
8253 assert_eq!(
8254 second
8255 .bitmap
8256 .to_positions()
8257 .expect("test operation should succeed"),
8258 vec![1, 2]
8259 );
8260 }
8261
8262 #[test]
8263 fn pack_bitmap_index_recomputes_checksum_on_write() {
8264 let object_types = [ObjectType::Commit, ObjectType::Blob];
8267 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8268 .expect("test operation should succeed");
8269 let mut index = writer.build().expect("test operation should succeed");
8270 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
8272 index.entries.clear(); index.entries.push(PackBitmapEntry {
8274 object_position: 0,
8275 xor_offset: 0,
8276 flags: 0,
8277 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
8278 });
8279 let bytes = index.write().expect("test operation should succeed");
8280 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
8282 .expect("test operation should succeed");
8283 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
8284 }
8285
8286 #[test]
8287 fn pack_bitmap_writer_rejects_non_commit_selection() {
8288 let object_types = [ObjectType::Commit, ObjectType::Blob];
8289 let mut writer =
8290 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8291 .expect("test operation should succeed");
8292 assert!(writer.add_commit(1, 1, &[]).is_err());
8294 assert!(writer.add_commit(5, 5, &[]).is_err());
8296 assert!(writer.add_commit(0, 5, &[]).is_err());
8298 assert!(writer.add_commit(0, 0, &[9]).is_err());
8300 }
8301
8302 #[test]
8303 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8304 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8305 .expect("test operation should succeed");
8306 assert!(
8307 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8308 .is_err()
8309 );
8310 }
8311
8312 #[test]
8313 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8314 let writer = PackBitmapWriter::new(
8315 ObjectFormat::Sha1,
8316 pack_checksum_sha1(),
8317 &[ObjectType::Commit],
8318 )
8319 .expect("test operation should succeed");
8320 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8321 }
8322
8323 #[test]
8324 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8325 let mut index = PackBitmapWriter::new(
8326 ObjectFormat::Sha1,
8327 pack_checksum_sha1(),
8328 &[ObjectType::Commit],
8329 )
8330 .expect("test operation should succeed")
8331 .build()
8332 .expect("test operation should succeed");
8333 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8335 assert!(index.write().is_err());
8336 index.options = PackBitmapIndex::OPTION_FULL_DAG;
8338 index.name_hash_cache = Some(vec![0]);
8339 assert!(index.write().is_err());
8340 }
8341
8342 #[test]
8343 fn write_bitmap_roundtrips_through_upstream_git_parser() {
8344 let root = unique_temp_dir("git-pack-bitmap-writer");
8348 fs::create_dir_all(&root).expect("test operation should succeed");
8349 {
8350 run_git_success(&root, &["init", "-q", "-b", "main"]);
8351 run_git_success(
8352 &root,
8353 &[
8354 "-c",
8355 "user.name=Example User",
8356 "-c",
8357 "user.email=example@example.invalid",
8358 "commit",
8359 "--allow-empty",
8360 "-q",
8361 "-m",
8362 "one",
8363 ],
8364 );
8365 run_git_success(&root, &["repack", "-adb"]);
8366 let pack_dir = root.join(".git").join("objects").join("pack");
8367 let idx_path = single_path_with_extension(&pack_dir, "idx");
8368 let index = PackIndex::parse(
8369 &fs::read(idx_path).expect("test operation should succeed"),
8370 ObjectFormat::Sha1,
8371 )
8372 .expect("test operation should succeed");
8373 let pack_path = single_path_with_extension(&pack_dir, "pack");
8375 let pack =
8376 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8377 .expect("test operation should succeed");
8378 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8381 offsets.sort_unstable();
8382 let position_of = |offset: u64| -> u32 {
8383 offsets
8384 .iter()
8385 .position(|value| *value == offset)
8386 .expect("test operation should succeed") as u32
8387 };
8388 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8389 for entry in &index.entries {
8390 let position = position_of(entry.offset) as usize;
8391 if let Some(parsed) = pack
8393 .entries
8394 .iter()
8395 .find(|po| po.entry.offset == entry.offset)
8396 {
8397 object_types[position] = parsed.object.object_type;
8398 }
8399 }
8400 let commit_position = object_types
8402 .iter()
8403 .position(|ty| *ty == ObjectType::Commit)
8404 .expect("test operation should succeed") as u32;
8405 let commit_index_position = index
8407 .entries
8408 .iter()
8409 .position(|entry| position_of(entry.offset) == commit_position)
8410 .expect("test operation should succeed")
8411 as u32;
8412 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8413 let bytes = write_bitmap(
8414 ObjectFormat::Sha1,
8415 index.pack_checksum.clone(),
8416 &object_types,
8417 &[(commit_position, commit_index_position, reachable)],
8418 None,
8419 )
8420 .expect("test operation should succeed");
8421 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8422 .expect("test operation should succeed");
8423 assert_eq!(parsed.pack_checksum, index.pack_checksum);
8424 assert_eq!(parsed.entries.len(), 1);
8425 assert_eq!(
8426 parsed.entries[0]
8427 .bitmap
8428 .to_positions()
8429 .expect("test operation should succeed")
8430 .len(),
8431 index.entries.len()
8432 );
8433 };
8434 let _ = fs::remove_dir_all(&root);
8435 }
8436}