1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result, StreamingDigest};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet, VecDeque};
12use std::fmt;
13use std::fs::File;
14use std::io::{Read, Seek, SeekFrom, Write};
15use std::ops::Range;
16use std::path::Path;
17use std::sync::Arc;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct PackEntry {
21 pub oid: ObjectId,
22 pub compressed_size: u64,
23 pub uncompressed_size: u64,
24 pub offset: u64,
25}
26
27pub const DEFAULT_PACK_WINDOW: usize = 10;
33
34pub const DEFAULT_PACK_DEPTH: usize = 50;
40
41const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
45
46const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
50
51#[derive(Debug, Clone)]
57pub struct PackWriteOptions {
58 pub window: usize,
61 pub depth: usize,
63 pub prefer_ofs_delta: bool,
67 pub thin_bases: HashMap<ObjectId, EncodedObject>,
72 pub reorder: bool,
78 pub compression_level: u32,
80}
81
82impl Default for PackWriteOptions {
83 fn default() -> Self {
84 Self::new()
85 }
86}
87
88impl PackWriteOptions {
89 pub fn new() -> Self {
93 Self {
94 window: DEFAULT_PACK_WINDOW,
95 depth: DEFAULT_PACK_DEPTH,
96 prefer_ofs_delta: true,
97 thin_bases: HashMap::new(),
98 reorder: true,
99 compression_level: 6,
100 }
101 }
102
103 pub fn with_window(mut self, window: usize) -> Self {
105 self.window = window;
106 self
107 }
108
109 pub fn with_depth(mut self, depth: usize) -> Self {
111 self.depth = depth;
112 self
113 }
114
115 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
118 self.prefer_ofs_delta = prefer_ofs_delta;
119 self
120 }
121
122 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
124 self.thin_bases = thin_bases;
125 self
126 }
127
128 pub fn with_reorder(mut self, reorder: bool) -> Self {
131 self.reorder = reorder;
132 self
133 }
134
135 pub fn with_compression_level(mut self, level: u32) -> Self {
137 self.compression_level = level.min(9);
138 self
139 }
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct RepackPolicy {
144 pub write_bitmaps: bool,
145 pub cruft_packs: bool,
146 pub geometric_factor: Option<u8>,
147}
148
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct PackFile {
151 pub version: u32,
152 pub entries: Vec<PackObject>,
153 pub checksum: ObjectId,
154}
155
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct PackObject {
158 pub entry: PackEntry,
159 pub object: EncodedObject,
160}
161
162#[derive(Debug, Clone, PartialEq, Eq)]
165pub struct PackVerifyStat {
166 pub oid: ObjectId,
168 pub object_type: ObjectType,
170 pub size: u64,
172 pub size_in_pack: u64,
175 pub offset: u64,
177 pub delta_depth: u32,
179 pub base_oid: Option<ObjectId>,
182}
183
184#[derive(Debug, Clone, PartialEq, Eq)]
187pub struct PackVerifyStats {
188 pub objects: Vec<PackVerifyStat>,
189 pub checksum: ObjectId,
190}
191
192#[derive(Debug, Clone, PartialEq, Eq)]
193pub struct PackWrite {
194 pub pack: Vec<u8>,
195 pub index: Vec<u8>,
196 pub checksum: ObjectId,
197 pub entries: Vec<PackIndexEntry>,
198 pub delta_count: u32,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct PackWriteSummary {
203 pub index: Vec<u8>,
204 pub checksum: ObjectId,
205 pub entries: Vec<PackIndexEntry>,
206 pub delta_count: u32,
207 pub pack_size: u64,
208}
209
210#[derive(Debug, Clone, Copy, PartialEq, Eq)]
211pub struct PackInput<'a> {
212 pub oid: &'a ObjectId,
213 pub object: &'a EncodedObject,
214}
215
216#[derive(Debug, Clone, PartialEq, Eq)]
217pub struct PackIndexBuild {
218 pub index: Vec<u8>,
219 pub pack_checksum: ObjectId,
220 pub entries: Vec<PackIndexEntry>,
221}
222
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct PackStreamIndexBuild {
225 pub index: Vec<u8>,
226 pub pack_checksum: ObjectId,
227 pub entries: Vec<PackIndexEntry>,
228 pub objects: Vec<PackIndexedObject>,
229}
230
231#[derive(Debug, Clone, PartialEq, Eq)]
232pub struct PackIndexedObject {
233 pub oid: ObjectId,
234 pub object_type: ObjectType,
235 pub size: u64,
236 pub offset: u64,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240pub struct PackIndex {
241 pub version: u32,
242 pub fanout: [u32; 256],
243 pub entries: Vec<PackIndexEntry>,
244 pub pack_checksum: ObjectId,
245 pub index_checksum: ObjectId,
246}
247
248#[derive(Debug, Clone, PartialEq, Eq)]
249pub struct PackIndexView<'a> {
250 pub version: u32,
251 pub count: usize,
252 pub fanout: [u32; 256],
253 pub pack_checksum: ObjectId,
254 pub index_checksum: ObjectId,
255 bytes: &'a [u8],
256 format: ObjectFormat,
257 tables: PackIndexViewTables,
258}
259
260pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
261 fn as_bytes(&self) -> &[u8];
262}
263
264impl<T> PackIndexByteSource for T
265where
266 T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
267{
268 fn as_bytes(&self) -> &[u8] {
269 self.as_ref()
270 }
271}
272
273#[derive(Debug)]
274struct SharedIndexBytes(Arc<[u8]>);
275
276impl PackIndexByteSource for SharedIndexBytes {
277 fn as_bytes(&self) -> &[u8] {
278 self.0.as_ref()
279 }
280}
281
282#[derive(Debug, Clone)]
283pub struct PackIndexViewData {
284 pub version: u32,
285 pub count: usize,
286 pub fanout: [u32; 256],
287 pub pack_checksum: ObjectId,
288 pub index_checksum: ObjectId,
289 bytes: Arc<dyn PackIndexByteSource>,
290 format: ObjectFormat,
291 tables: PackIndexViewTables,
292}
293
294#[derive(Debug, Clone, PartialEq, Eq)]
295pub struct PackIndexEntry {
296 pub oid: ObjectId,
297 pub crc32: u32,
298 pub offset: u64,
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
302pub struct PackIndexLookup {
303 pub crc32: u32,
304 pub offset: u64,
305}
306
307#[derive(Debug, Clone, PartialEq, Eq)]
308enum PackIndexViewTables {
309 V1 {
310 entry_table: Range<usize>,
311 },
312 V2 {
313 oid_table: Range<usize>,
314 crc_table: Range<usize>,
315 small_offset_table: Range<usize>,
316 large_offset_table: Range<usize>,
317 },
318}
319
320#[derive(Debug, Clone, PartialEq, Eq)]
321pub struct PackReverseIndex {
322 pub version: u32,
323 pub format: ObjectFormat,
324 pub positions: Vec<u32>,
325 pub pack_checksum: ObjectId,
326 pub index_checksum: ObjectId,
327}
328
329#[derive(Debug, Clone, PartialEq, Eq)]
330pub struct PackMtimes {
331 pub version: u32,
332 pub format: ObjectFormat,
333 pub mtimes: Vec<u32>,
334 pub pack_checksum: ObjectId,
335 pub index_checksum: ObjectId,
336}
337
338#[derive(Debug, Clone, PartialEq, Eq)]
339pub struct PackBitmapIndex {
340 pub version: u16,
341 pub format: ObjectFormat,
342 pub options: u16,
343 pub pack_checksum: ObjectId,
344 pub index_checksum: ObjectId,
345 pub type_bitmaps: PackBitmapTypeBitmaps,
346 pub entries: Vec<PackBitmapEntry>,
347 pub name_hash_cache: Option<Vec<u32>>,
348}
349
350#[derive(Debug, Clone, PartialEq, Eq)]
351pub struct PackBitmapTypeBitmaps {
352 pub commits: EwahBitmap,
353 pub trees: EwahBitmap,
354 pub blobs: EwahBitmap,
355 pub tags: EwahBitmap,
356}
357
358#[derive(Debug, Clone, PartialEq, Eq)]
359pub struct PackBitmapEntry {
360 pub object_position: u32,
365 pub xor_offset: u8,
366 pub flags: u8,
367 pub bitmap: EwahBitmap,
370}
371
372#[derive(Debug, Clone, PartialEq, Eq)]
373pub struct EwahBitmap {
374 pub bit_size: u32,
375 pub words: Vec<u64>,
376 pub rlw_position: u32,
377}
378
379#[derive(Debug, Clone, PartialEq, Eq)]
380pub struct MultiPackIndex {
381 pub version: u8,
382 pub format: ObjectFormat,
383 pub pack_count: u32,
384 pub pack_names: Vec<String>,
385 pub object_count: u32,
386 pub fanout: [u32; 256],
387 pub objects: Vec<MultiPackIndexEntry>,
388 pub reverse_index: Option<Vec<u32>>,
389 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
390 pub chunks: Vec<MultiPackIndexChunk>,
391 pub checksum: ObjectId,
392}
393
394#[derive(Debug, Clone)]
395pub struct MultiPackIndexOidLookup {
396 format: ObjectFormat,
397 pack_count: u32,
398 pack_names: Vec<String>,
399 fanout: [u32; 256],
400 object_count: usize,
401 oid_lookup_offset: usize,
402 object_offsets_offset: usize,
403 large_offsets_offset: Option<usize>,
404 large_offsets_len: usize,
405 bytes: Arc<dyn PackIndexByteSource>,
406}
407
408#[derive(Debug, Clone, PartialEq, Eq)]
409pub struct MultiPackIndexEntry {
410 pub oid: ObjectId,
411 pub pack_int_id: u32,
412 pub offset: u64,
413 pub force_large_offset: bool,
414}
415
416#[derive(Debug, Clone, PartialEq, Eq)]
417pub struct MultiPackBitmapPack {
418 pub bitmap_pos: u32,
419 pub bitmap_nr: u32,
420}
421
422#[derive(Debug, Clone, PartialEq, Eq)]
423pub struct MultiPackIndexChunk {
424 pub id: [u8; 4],
425 pub offset: u64,
426 pub len: u64,
427}
428
429#[derive(Debug, Clone, Copy, PartialEq, Eq)]
430enum PackObjectKind {
431 Commit,
432 Tree,
433 Blob,
434 Tag,
435 OfsDelta,
436 RefDelta,
437}
438
439#[derive(Debug, Clone, PartialEq, Eq)]
440enum ParsedPackEntry {
441 Resolved(PackObject),
442 Delta {
443 base: DeltaBase,
444 compressed_size: u64,
445 delta_size: u64,
446 offset: u64,
447 delta: Vec<u8>,
448 },
449}
450
451#[derive(Debug, Clone, PartialEq, Eq)]
452enum DeltaBase {
453 Offset(u64),
454 Ref(ObjectId),
455}
456
457struct OnDiskEntry {
461 offset: u64,
462 base: Option<DeltaBase>,
463 stream_size: u64,
464}
465
466impl PackFile {
467 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
468 Self::parse(bytes, ObjectFormat::Sha1)
469 }
470
471 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
472 Self::parse_with_base(bytes, format, |_| Ok(None))
473 }
474
475 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
476 Self::parse(&bundle.pack, bundle.format)
477 }
478
479 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
480 let PackIndexBuild {
481 index,
482 pack_checksum,
483 entries,
484 } = PackIndex::write_v2_for_pack(bytes, format)?;
485 Ok(PackWrite {
486 pack: bytes.to_vec(),
487 index,
488 checksum: pack_checksum,
489 entries,
490 delta_count: 0,
491 })
492 }
493
494 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
495 where
496 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
497 {
498 Self::parse_with_base(bytes, format, external_base)
499 }
500
501 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
502 where
503 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
504 {
505 let trailer_len = format.raw_len();
506 if bytes.len() < 12 + trailer_len {
507 return Err(GitError::InvalidFormat("pack file too short".into()));
508 }
509 let trailer_offset = bytes.len() - trailer_len;
510 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
511 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
512 if checksum != expected {
513 return Err(GitError::InvalidFormat(format!(
514 "pack checksum mismatch: expected {expected}, got {checksum}"
515 )));
516 }
517
518 if &bytes[..4] != b"PACK" {
519 return Err(GitError::InvalidFormat("missing PACK signature".into()));
520 }
521 let version = u32_be(&bytes[4..8]);
522 if version != 2 && version != 3 {
523 return Err(GitError::Unsupported(format!("pack version {version}")));
524 }
525 let count = u32_be(&bytes[8..12]) as usize;
526 let mut offset = 12usize;
527 let mut entries = Vec::with_capacity(count);
528 for _ in 0..count {
529 let entry_offset = offset;
530 let header = parse_entry_header(bytes, &mut offset)?;
531 let base =
532 match header.kind {
533 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
534 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
535 )),
536 PackObjectKind::RefDelta => {
537 let hash_len = format.raw_len();
538 if offset + hash_len > trailer_offset {
539 return Err(GitError::InvalidFormat(
540 "truncated ref-delta base object id".into(),
541 ));
542 }
543 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
544 offset += hash_len;
545 Some(DeltaBase::Ref(oid))
546 }
547 _ => None,
548 };
549 let mut body = Vec::new();
550 let consumed = inflate_into(
551 &bytes[offset..trailer_offset],
552 &mut body,
553 header.size.min(usize::MAX as u64) as usize,
554 )?;
555 if body.len() as u64 != header.size {
556 return Err(GitError::InvalidObject(format!(
557 "pack object declared {} bytes, decoded {}",
558 header.size,
559 body.len()
560 )));
561 }
562 if consumed == 0 {
563 return Err(GitError::InvalidFormat(
564 "empty compressed pack entry".into(),
565 ));
566 }
567 offset = offset
568 .checked_add(consumed)
569 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
570 if offset > trailer_offset {
571 return Err(GitError::InvalidFormat(
572 "pack entry extends past checksum".into(),
573 ));
574 }
575 if let Some(base) = base {
576 entries.push(ParsedPackEntry::Delta {
577 base,
578 compressed_size: consumed as u64,
579 delta_size: header.size,
580 offset: entry_offset as u64,
581 delta: body,
582 });
583 } else {
584 let object_type = match header.kind {
585 PackObjectKind::Commit => ObjectType::Commit,
586 PackObjectKind::Tree => ObjectType::Tree,
587 PackObjectKind::Blob => ObjectType::Blob,
588 PackObjectKind::Tag => ObjectType::Tag,
589 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
590 };
591 let object = EncodedObject::new(object_type, body);
592 let oid = object.object_id(format)?;
593 entries.push(ParsedPackEntry::Resolved(PackObject {
594 entry: PackEntry {
595 oid,
596 compressed_size: consumed as u64,
597 uncompressed_size: header.size,
598 offset: entry_offset as u64,
599 },
600 object,
601 }));
602 }
603 }
604 if offset != trailer_offset {
605 return Err(GitError::InvalidFormat(format!(
606 "pack has {} trailing bytes before checksum",
607 trailer_offset - offset
608 )));
609 }
610 Ok(Self {
611 version,
612 entries: resolve_pack_entries(entries, format, &mut external_base)?,
613 checksum,
614 })
615 }
616
617 pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
628 let pack = Self::parse(bytes, format)?;
632
633 let trailer_len = format.raw_len();
637 let trailer_offset = bytes.len() - trailer_len;
638 let count = u32_be(&bytes[8..12]) as usize;
639 let mut offset = 12usize;
640 let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
646 for _ in 0..count {
647 let entry_offset = offset as u64;
648 let header = parse_entry_header(bytes, &mut offset)?;
649 let stream_size = header.size;
650 let base =
651 match header.kind {
652 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
653 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
654 )),
655 PackObjectKind::RefDelta => {
656 let hash_len = format.raw_len();
657 if offset + hash_len > trailer_offset {
658 return Err(GitError::InvalidFormat(
659 "truncated ref-delta base object id".into(),
660 ));
661 }
662 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
663 offset += hash_len;
664 Some(DeltaBase::Ref(oid))
665 }
666 _ => None,
667 };
668 let mut body = Vec::new();
670 let consumed = inflate_into(
671 &bytes[offset..trailer_offset],
672 &mut body,
673 header.size.min(usize::MAX as u64) as usize,
674 )?;
675 offset = offset
676 .checked_add(consumed)
677 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
678 on_disk.push(OnDiskEntry {
679 offset: entry_offset,
680 base,
681 stream_size,
682 });
683 }
684
685 let mut resolved_by_offset: HashMap<u64, &PackObject> =
687 HashMap::with_capacity(pack.entries.len());
688 for object in &pack.entries {
689 resolved_by_offset.insert(object.entry.offset, object);
690 }
691 let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
693 for entry in &on_disk {
694 if let Some(object) = resolved_by_offset.get(&entry.offset) {
695 oid_by_offset.insert(entry.offset, object.entry.oid);
696 }
697 }
698 let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
700 for (idx, entry) in on_disk.iter().enumerate() {
701 index_by_offset.insert(entry.offset, idx);
702 }
703
704 let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
707 sorted_offsets.sort_unstable();
708 let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
709 for window in sorted_offsets.windows(2) {
710 next_offset.insert(window[0], window[1]);
711 }
712 if let Some(last) = sorted_offsets.last() {
713 next_offset.insert(*last, trailer_offset as u64);
714 }
715
716 let mut depth = vec![None; on_disk.len()];
722 fn resolve_depth(
723 idx: usize,
724 on_disk: &[OnDiskEntry],
725 index_by_offset: &HashMap<u64, usize>,
726 offset_of_oid: &HashMap<ObjectId, u64>,
727 depth: &mut [Option<u32>],
728 ) -> u32 {
729 if let Some(d) = depth[idx] {
730 return d;
731 }
732 let computed = match &on_disk[idx].base {
733 None => 0,
734 Some(base) => {
735 let base_idx = match base {
736 DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
737 DeltaBase::Ref(oid) => offset_of_oid
738 .get(oid)
739 .and_then(|off| index_by_offset.get(off).copied()),
740 };
741 match base_idx {
742 Some(bi) => {
743 resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
744 }
745 None => 1,
747 }
748 }
749 };
750 depth[idx] = Some(computed);
751 computed
752 }
753 let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
754 for (off, oid) in &oid_by_offset {
755 offset_of_oid.insert(*oid, *off);
756 }
757 for idx in 0..on_disk.len() {
758 resolve_depth(idx, &on_disk, &index_by_offset, &offset_of_oid, &mut depth);
759 }
760
761 let mut stats = Vec::with_capacity(on_disk.len());
762 for (idx, entry) in on_disk.iter().enumerate() {
763 let off = entry.offset;
764 let object = resolved_by_offset.get(&off).ok_or_else(|| {
765 GitError::InvalidFormat("pack offset missing from resolved set".into())
766 })?;
767 let size_in_pack = next_offset
768 .get(&off)
769 .copied()
770 .unwrap_or(trailer_offset as u64)
771 .saturating_sub(off);
772 let base_oid = match &entry.base {
773 None => None,
774 Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
775 Some(DeltaBase::Ref(oid)) => Some(*oid),
776 };
777 stats.push(PackVerifyStat {
778 oid: object.entry.oid,
779 object_type: object.object.object_type,
780 size: entry.stream_size,
783 size_in_pack,
784 offset: off,
785 delta_depth: depth[idx].unwrap_or(0),
786 base_oid,
787 });
788 }
789 stats.sort_by_key(|stat| stat.offset);
791
792 Ok(PackVerifyStats {
793 objects: stats,
794 checksum: pack.checksum,
795 })
796 }
797
798 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
799 where
800 T: Borrow<EncodedObject>,
801 {
802 Self::write_undeltified(objects, ObjectFormat::Sha1)
803 }
804
805 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
811 where
812 T: Borrow<EncodedObject>,
813 {
814 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
815 Self::write_packed_impl(objects, format, &options)
816 }
817
818 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
827 where
828 T: Borrow<EncodedObject>,
829 {
830 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
831 }
832
833 pub fn write_packed_with_options<T>(
837 objects: &[T],
838 format: ObjectFormat,
839 options: &PackWriteOptions,
840 ) -> Result<PackWrite>
841 where
842 T: Borrow<EncodedObject>,
843 {
844 Self::write_packed_impl(objects, format, options)
845 }
846
847 pub fn write_packed_with_known_ids(
856 inputs: &[PackInput<'_>],
857 format: ObjectFormat,
858 ) -> Result<PackWrite> {
859 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
860 }
861
862 pub fn write_packed_with_known_ids_and_options(
865 inputs: &[PackInput<'_>],
866 format: ObjectFormat,
867 options: &PackWriteOptions,
868 ) -> Result<PackWrite> {
869 if inputs.len() > u32::MAX as usize {
870 return Err(GitError::InvalidFormat("too many pack objects".into()));
871 }
872 let mut objects = Vec::with_capacity(inputs.len());
873 let mut object_ids = Vec::with_capacity(inputs.len());
874 for input in inputs {
875 if input.oid.format() != format {
876 return Err(GitError::InvalidObjectId(format!(
877 "pack object id {} uses {}, pack uses {}",
878 input.oid,
879 input.oid.format().name(),
880 format.name()
881 )));
882 }
883 objects.push(input.object);
884 object_ids.push(*input.oid);
885 }
886 Self::write_packed_from_parts(objects, object_ids, format, options)
887 }
888
889 pub fn write_packed_with_known_ids_to_writer<W>(
890 inputs: &[PackInput<'_>],
891 format: ObjectFormat,
892 options: &PackWriteOptions,
893 writer: &mut W,
894 ) -> Result<PackWriteSummary>
895 where
896 W: Write,
897 {
898 if inputs.len() > u32::MAX as usize {
899 return Err(GitError::InvalidFormat("too many pack objects".into()));
900 }
901 let mut objects = Vec::with_capacity(inputs.len());
902 let mut object_ids = Vec::with_capacity(inputs.len());
903 for input in inputs {
904 if input.oid.format() != format {
905 return Err(GitError::InvalidObjectId(format!(
906 "pack object id {} uses {}, pack uses {}",
907 input.oid,
908 input.oid.format().name(),
909 format.name()
910 )));
911 }
912 objects.push(input.object);
913 object_ids.push(*input.oid);
914 }
915 Self::write_packed_from_parts_to_writer(objects, object_ids, format, options, writer)
916 }
917
918 pub fn write_thin<T>(
927 objects: &[T],
928 format: ObjectFormat,
929 external_bases: HashMap<ObjectId, EncodedObject>,
930 ) -> Result<PackWrite>
931 where
932 T: Borrow<EncodedObject>,
933 {
934 let options = PackWriteOptions::new().with_thin_bases(external_bases);
935 Self::write_packed_impl(objects, format, &options)
936 }
937
938 fn write_packed_impl<T>(
939 objects: &[T],
940 format: ObjectFormat,
941 options: &PackWriteOptions,
942 ) -> Result<PackWrite>
943 where
944 T: Borrow<EncodedObject>,
945 {
946 if objects.len() > u32::MAX as usize {
947 return Err(GitError::InvalidFormat("too many pack objects".into()));
948 }
949 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
950
951 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
954 for object in &objects {
955 object_ids.push(object.object_id(format)?);
956 }
957 Self::write_packed_from_parts(objects, object_ids, format, options)
958 }
959
960 fn write_packed_from_parts(
961 objects: Vec<&EncodedObject>,
962 object_ids: Vec<ObjectId>,
963 format: ObjectFormat,
964 options: &PackWriteOptions,
965 ) -> Result<PackWrite> {
966 let mut seen = HashSet::with_capacity(object_ids.len());
967 for oid in &object_ids {
968 if !seen.insert(oid) {
969 return Err(GitError::InvalidFormat(format!(
970 "pack contains duplicate object id {oid}"
971 )));
972 }
973 }
974
975 for oid in options.thin_bases.keys() {
977 if oid.format() != format {
978 return Err(GitError::InvalidObjectId(
979 "thin pack base object id format does not match pack format".into(),
980 ));
981 }
982 }
983
984 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
990
991 let mut pack = Vec::new();
992 pack.extend_from_slice(b"PACK");
993 pack.extend_from_slice(&2u32.to_be_bytes());
994 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
995
996 let mut index_entries = Vec::with_capacity(objects.len());
997 let mut delta_count = 0u32;
998 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1001
1002 let compressed_payloads =
1003 compress_planned_payloads(&objects, &plan, &order, options.compression_level)?;
1004
1005 for (order_pos, &idx) in order.iter().enumerate() {
1006 let offset = pack.len() as u64;
1007 let mut entry_bytes = Vec::new();
1008 match &plan[idx].base {
1009 PlannedBase::None => {
1010 write_entry_header(
1011 &mut entry_bytes,
1012 objects[idx].object_type,
1013 objects[idx].body.len() as u64,
1014 );
1015 }
1016 PlannedBase::InPack { base_idx, delta } => {
1017 delta_count += 1;
1018 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1019 GitError::InvalidFormat(
1020 "in-pack delta base emitted after dependent object".into(),
1021 )
1022 })?;
1023 if options.prefer_ofs_delta {
1024 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
1025 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1026 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
1027 })?;
1028 write_ofs_delta_offset(&mut entry_bytes, relative)?;
1029 } else {
1030 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1031 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
1032 }
1033 }
1034 PlannedBase::External { base_oid, delta } => {
1035 delta_count += 1;
1036 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1037 entry_bytes.extend_from_slice(base_oid.as_bytes());
1038 }
1039 }
1040 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
1041 let crc32 = crc32fast::hash(&entry_bytes);
1042 pack.extend_from_slice(&entry_bytes);
1043 written_offsets[idx] = Some(offset);
1044 index_entries.push(PackIndexEntry {
1045 oid: object_ids[idx].clone(),
1046 crc32,
1047 offset,
1048 });
1049 }
1050
1051 let checksum = sley_core::digest_bytes(format, &pack)?;
1052 pack.extend_from_slice(checksum.as_bytes());
1053 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1054 Ok(PackWrite {
1055 pack,
1056 index,
1057 checksum,
1058 entries: index_entries,
1059 delta_count,
1060 })
1061 }
1062
1063 fn write_packed_from_parts_to_writer<W>(
1064 objects: Vec<&EncodedObject>,
1065 object_ids: Vec<ObjectId>,
1066 format: ObjectFormat,
1067 options: &PackWriteOptions,
1068 writer: &mut W,
1069 ) -> Result<PackWriteSummary>
1070 where
1071 W: Write,
1072 {
1073 let mut seen = HashSet::with_capacity(object_ids.len());
1074 for oid in &object_ids {
1075 if !seen.insert(oid) {
1076 return Err(GitError::InvalidFormat(format!(
1077 "pack contains duplicate object id {oid}"
1078 )));
1079 }
1080 }
1081
1082 for oid in options.thin_bases.keys() {
1083 if oid.format() != format {
1084 return Err(GitError::InvalidObjectId(
1085 "thin pack base object id format does not match pack format".into(),
1086 ));
1087 }
1088 }
1089
1090 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
1091 let mut output = PackDigestWriter::new(writer, format);
1092 output.write_pack_bytes(b"PACK")?;
1093 output.write_pack_bytes(&2u32.to_be_bytes())?;
1094 output.write_pack_bytes(&(objects.len() as u32).to_be_bytes())?;
1095
1096 let mut index_entries = Vec::with_capacity(objects.len());
1097 let mut delta_count = 0u32;
1098 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1099
1100 for &idx in &order {
1101 let offset = output.position();
1102 let mut entry_header = Vec::new();
1103 match &plan[idx].base {
1104 PlannedBase::None => {
1105 write_entry_header(
1106 &mut entry_header,
1107 objects[idx].object_type,
1108 objects[idx].body.len() as u64,
1109 );
1110 }
1111 PlannedBase::InPack { base_idx, delta } => {
1112 delta_count += 1;
1113 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1114 GitError::InvalidFormat(
1115 "in-pack delta base emitted after dependent object".into(),
1116 )
1117 })?;
1118 if options.prefer_ofs_delta {
1119 write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1120 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1121 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
1122 })?;
1123 write_ofs_delta_offset(&mut entry_header, relative)?;
1124 } else {
1125 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1126 entry_header.extend_from_slice(object_ids[*base_idx].as_bytes());
1127 }
1128 }
1129 PlannedBase::External { base_oid, delta } => {
1130 delta_count += 1;
1131 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1132 entry_header.extend_from_slice(base_oid.as_bytes());
1133 }
1134 }
1135 let compressed_payload = compressed_payload(
1136 planned_payload(&objects, &plan, idx),
1137 options.compression_level,
1138 )?;
1139 let mut crc32 = crc32fast::Hasher::new();
1140 crc32.update(&entry_header);
1141 crc32.update(&compressed_payload);
1142 output.write_pack_bytes(&entry_header)?;
1143 output.write_pack_bytes(&compressed_payload)?;
1144 written_offsets[idx] = Some(offset);
1145 index_entries.push(PackIndexEntry {
1146 oid: object_ids[idx],
1147 crc32: crc32.finalize(),
1148 offset,
1149 });
1150 }
1151
1152 let (checksum, pack_size) = output.finish()?;
1153 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1154 Ok(PackWriteSummary {
1155 index,
1156 checksum,
1157 entries: index_entries,
1158 delta_count,
1159 pack_size,
1160 })
1161 }
1162}
1163
1164struct PackDigestWriter<'a, W> {
1165 writer: &'a mut W,
1166 digest: StreamingDigest,
1167 position: u64,
1168}
1169
1170impl<'a, W> PackDigestWriter<'a, W>
1171where
1172 W: Write,
1173{
1174 fn new(writer: &'a mut W, format: ObjectFormat) -> Self {
1175 Self {
1176 writer,
1177 digest: StreamingDigest::new(format),
1178 position: 0,
1179 }
1180 }
1181
1182 fn position(&self) -> u64 {
1183 self.position
1184 }
1185
1186 fn write_pack_bytes(&mut self, bytes: &[u8]) -> Result<()> {
1187 self.writer.write_all(bytes)?;
1188 self.digest.update(bytes);
1189 self.position = self
1190 .position
1191 .checked_add(bytes.len() as u64)
1192 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1193 Ok(())
1194 }
1195
1196 fn finish(mut self) -> Result<(ObjectId, u64)> {
1197 let checksum = self.digest.finalize()?;
1198 self.writer.write_all(checksum.as_bytes())?;
1199 self.position = self
1200 .position
1201 .checked_add(checksum.as_bytes().len() as u64)
1202 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1203 Ok((checksum, self.position))
1204 }
1205}
1206
1207impl<'a> PackIndexView<'a> {
1208 pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
1209 Self::parse(bytes, ObjectFormat::Sha1)
1210 }
1211
1212 pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1213 Self::parse_impl(bytes, format, true, true)
1214 }
1215
1216 pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1220 Self::parse_impl(bytes, format, false, true)
1221 }
1222
1223 pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1230 Self::parse_impl(bytes, format, false, false)
1231 }
1232
1233 pub fn count(&self) -> usize {
1234 self.count
1235 }
1236
1237 pub fn fanout(&self) -> &[u32; 256] {
1238 &self.fanout
1239 }
1240
1241 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1242 if oid.format() != self.format {
1243 return None;
1244 }
1245 let bucket = usize::from(oid.as_bytes()[0]);
1246 let mut start = if bucket == 0 {
1247 0
1248 } else {
1249 self.fanout[bucket - 1] as usize
1250 };
1251 let mut end = self.fanout[bucket] as usize;
1252 let target = oid.as_bytes();
1253
1254 while start < end {
1255 let mid = start + (end - start) / 2;
1256 match self.oid_bytes_at(mid).cmp(target) {
1257 std::cmp::Ordering::Less => start = mid + 1,
1258 std::cmp::Ordering::Equal => return self.lookup_at(mid),
1259 std::cmp::Ordering::Greater => end = mid,
1260 }
1261 }
1262 None
1263 }
1264
1265 fn parse_impl(
1266 bytes: &'a [u8],
1267 format: ObjectFormat,
1268 verify_checksum: bool,
1269 validate_entries: bool,
1270 ) -> Result<Self> {
1271 let hash_len = format.raw_len();
1272 if bytes.len() < 4 {
1273 return Err(GitError::InvalidFormat("pack index too short".into()));
1274 }
1275 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1276 return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1277 }
1278 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1279 return Err(GitError::InvalidFormat("pack index too short".into()));
1280 }
1281 let version = u32_be(&bytes[4..8]);
1282 if version != 2 {
1283 return Err(GitError::Unsupported(format!(
1284 "pack index version {version}"
1285 )));
1286 }
1287 let index_checksum_offset = bytes.len() - hash_len;
1288 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1289 if verify_checksum {
1290 let actual_index_checksum =
1291 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1292 if actual_index_checksum != index_checksum {
1293 return Err(GitError::InvalidFormat(format!(
1294 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1295 )));
1296 }
1297 }
1298
1299 let mut offset = 8usize;
1300 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1301 let count = fanout[255] as usize;
1302 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1303 offset = oid_table.end;
1304 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1305 offset = crc_table.end;
1306 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1307 offset = small_offset_table.end;
1308
1309 let large_offset_count = (0..count)
1310 .filter(|idx| {
1311 let start = small_offset_table.start + idx * 4;
1312 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1313 })
1314 .count();
1315 let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1316 offset = large_offset_table.end;
1317
1318 let expected_trailer_offset = bytes.len() - hash_len * 2;
1319 if offset != expected_trailer_offset {
1320 if !verify_checksum && offset < expected_trailer_offset {
1321 large_offset_table = large_offset_table.start..expected_trailer_offset;
1322 offset = expected_trailer_offset;
1323 } else {
1324 return Err(GitError::InvalidFormat(format!(
1325 "pack index has {} unexpected bytes before trailer",
1326 expected_trailer_offset.saturating_sub(offset)
1327 )));
1328 }
1329 }
1330 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1331
1332 let view = Self {
1333 version,
1334 count,
1335 fanout,
1336 pack_checksum,
1337 index_checksum,
1338 bytes,
1339 format,
1340 tables: PackIndexViewTables::V2 {
1341 oid_table,
1342 crc_table,
1343 small_offset_table,
1344 large_offset_table,
1345 },
1346 };
1347 if validate_entries {
1348 view.validate_v2_entries()?;
1349 }
1350 Ok(view)
1351 }
1352
1353 fn parse_v1_impl(
1354 bytes: &'a [u8],
1355 format: ObjectFormat,
1356 verify_checksum: bool,
1357 validate_entries: bool,
1358 ) -> Result<Self> {
1359 let hash_len = format.raw_len();
1360 if bytes.len() < 256 * 4 + 2 * hash_len {
1361 return Err(GitError::InvalidFormat("pack index too short".into()));
1362 }
1363 let index_checksum_offset = bytes.len() - hash_len;
1364 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1365 if verify_checksum {
1366 let actual_index_checksum =
1367 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1368 if actual_index_checksum != index_checksum {
1369 return Err(GitError::InvalidFormat(format!(
1370 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1371 )));
1372 }
1373 }
1374
1375 let mut offset = 0usize;
1376 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1377 let count = fanout[255] as usize;
1378 let entry_len = hash_len
1379 .checked_add(4)
1380 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1381 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1382 offset = entry_table.end;
1383 let expected_trailer_offset = bytes.len() - hash_len * 2;
1384 if offset != expected_trailer_offset {
1385 return Err(GitError::InvalidFormat(format!(
1386 "pack index has {} unexpected bytes before trailer",
1387 expected_trailer_offset.saturating_sub(offset)
1388 )));
1389 }
1390 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1391
1392 let view = Self {
1393 version: 1,
1394 count,
1395 fanout,
1396 pack_checksum,
1397 index_checksum,
1398 bytes,
1399 format,
1400 tables: PackIndexViewTables::V1 { entry_table },
1401 };
1402 if validate_entries {
1403 view.validate_v1_entries()?;
1404 }
1405 Ok(view)
1406 }
1407
1408 fn validate_v2_entries(&self) -> Result<()> {
1409 let PackIndexViewTables::V2 {
1410 oid_table,
1411 small_offset_table,
1412 large_offset_table,
1413 ..
1414 } = &self.tables
1415 else {
1416 unreachable!("v2 validation only runs for v2 views");
1417 };
1418 let oid_table = self.slice(oid_table.clone());
1419 let small_offset_table = self.slice(small_offset_table.clone());
1420 let large_offset_table = self.slice(large_offset_table.clone());
1421 let hash_len = self.format.raw_len();
1422 for idx in 0..self.count {
1423 let oid_start = idx * hash_len;
1424 let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1425 if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1426 return Err(GitError::InvalidFormat(
1427 "pack index object ids are not strictly ascending".into(),
1428 ));
1429 }
1430 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1431
1432 let offset_start = idx * 4;
1433 let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1434 pack_index_v2_offset(raw_offset, large_offset_table)?;
1435 }
1436 Ok(())
1437 }
1438
1439 fn validate_v1_entries(&self) -> Result<()> {
1440 let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1441 unreachable!("v1 validation only runs for v1 views");
1442 };
1443 let entry_table = self.slice(entry_table.clone());
1444 let hash_len = self.format.raw_len();
1445 let entry_len = hash_len
1446 .checked_add(4)
1447 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1448 for idx in 0..self.count {
1449 let start = idx * entry_len;
1450 let oid_start = start + 4;
1451 let oid_bytes = &entry_table[oid_start..start + entry_len];
1452 if idx > 0 {
1453 let previous_oid_start = oid_start - entry_len;
1454 let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1455 if previous_oid >= oid_bytes {
1456 return Err(GitError::InvalidFormat(
1457 "pack index object ids are not strictly sorted".into(),
1458 ));
1459 }
1460 }
1461 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1462 }
1463 Ok(())
1464 }
1465
1466 fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1467 let hash_len = self.format.raw_len();
1468 match &self.tables {
1469 PackIndexViewTables::V1 { entry_table } => {
1470 let entry_table = self.slice(entry_table.clone());
1471 let entry_len = hash_len + 4;
1472 let start = idx * entry_len + 4;
1473 &entry_table[start..start + hash_len]
1474 }
1475 PackIndexViewTables::V2 { oid_table, .. } => {
1476 let oid_table = self.slice(oid_table.clone());
1477 let start = idx * hash_len;
1478 &oid_table[start..start + hash_len]
1479 }
1480 }
1481 }
1482
1483 fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1484 if idx >= self.count {
1485 return None;
1486 }
1487 let hash_len = self.format.raw_len();
1488 match &self.tables {
1489 PackIndexViewTables::V1 { entry_table } => {
1490 let entry_table = self.slice(entry_table.clone());
1491 let entry_len = hash_len + 4;
1492 let start = idx * entry_len;
1493 Some(PackIndexLookup {
1494 crc32: 0,
1495 offset: u64::from(u32_be(&entry_table[start..start + 4])),
1496 })
1497 }
1498 PackIndexViewTables::V2 {
1499 crc_table,
1500 small_offset_table,
1501 large_offset_table,
1502 ..
1503 } => {
1504 let crc_table = self.slice(crc_table.clone());
1505 let small_offset_table = self.slice(small_offset_table.clone());
1506 let large_offset_table = self.slice(large_offset_table.clone());
1507 let crc_start = idx * 4;
1508 let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1509 Some(PackIndexLookup {
1510 crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1511 offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1512 })
1513 }
1514 }
1515 }
1516
1517 fn slice(&self, range: Range<usize>) -> &'a [u8] {
1518 &self.bytes[range]
1519 }
1520}
1521
1522impl PackIndexViewData {
1523 pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1524 Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1525 }
1526
1527 pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1531 Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1532 }
1533
1534 pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1537 Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1538 }
1539
1540 pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1541 Self::parse_impl(bytes, format, true, true)
1542 }
1543
1544 pub fn parse_source_without_checksum(
1545 bytes: Arc<dyn PackIndexByteSource>,
1546 format: ObjectFormat,
1547 ) -> Result<Self> {
1548 Self::parse_impl(bytes, format, false, true)
1549 }
1550
1551 pub fn parse_trusted_source_without_checksum(
1552 bytes: Arc<dyn PackIndexByteSource>,
1553 format: ObjectFormat,
1554 ) -> Result<Self> {
1555 Self::parse_impl(bytes, format, false, false)
1556 }
1557
1558 pub fn count(&self) -> usize {
1559 self.count
1560 }
1561
1562 pub fn fanout(&self) -> &[u32; 256] {
1563 &self.fanout
1564 }
1565
1566 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1567 self.as_view().find(oid)
1568 }
1569
1570 pub fn as_view(&self) -> PackIndexView<'_> {
1571 PackIndexView {
1572 version: self.version,
1573 count: self.count,
1574 fanout: self.fanout,
1575 pack_checksum: self.pack_checksum,
1576 index_checksum: self.index_checksum,
1577 bytes: self.bytes.as_bytes(),
1578 format: self.format,
1579 tables: self.tables.clone(),
1580 }
1581 }
1582
1583 fn parse_impl(
1584 bytes: Arc<dyn PackIndexByteSource>,
1585 format: ObjectFormat,
1586 verify_checksum: bool,
1587 validate_entries: bool,
1588 ) -> Result<Self> {
1589 let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1590 let view = PackIndexView::parse_impl(
1591 bytes.as_bytes(),
1592 format,
1593 verify_checksum,
1594 validate_entries,
1595 )?;
1596 (
1597 view.version,
1598 view.count,
1599 view.fanout,
1600 view.pack_checksum,
1601 view.index_checksum,
1602 view.tables,
1603 )
1604 };
1605 Ok(Self {
1606 version,
1607 count,
1608 fanout,
1609 pack_checksum,
1610 index_checksum,
1611 bytes,
1612 format,
1613 tables,
1614 })
1615 }
1616}
1617
1618impl PackIndex {
1619 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1620 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1621 }
1622
1623 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1624 let trailer_len = format.raw_len();
1625 if pack_bytes.len() < 12 + trailer_len {
1626 return Err(GitError::InvalidFormat("pack file too short".into()));
1627 }
1628 let trailer_offset = pack_bytes.len() - trailer_len;
1629 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1630 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1631 if pack_checksum != expected {
1632 return Err(GitError::InvalidFormat(format!(
1633 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1634 )));
1635 }
1636
1637 if &pack_bytes[..4] != b"PACK" {
1638 return Err(GitError::InvalidFormat("missing PACK signature".into()));
1639 }
1640 let version = u32_be(&pack_bytes[4..8]);
1641 if version != 2 && version != 3 {
1642 return Err(GitError::Unsupported(format!("pack version {version}")));
1643 }
1644 let count = u32_be(&pack_bytes[8..12]) as usize;
1645 let mut offset = 12usize;
1646 let mut parsed_entries = Vec::with_capacity(count);
1647 let mut raw_entries = Vec::with_capacity(count);
1648 for _ in 0..count {
1649 let entry_offset = offset;
1650 let header = parse_entry_header(pack_bytes, &mut offset)?;
1651 let base = match header.kind {
1652 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1653 pack_bytes,
1654 &mut offset,
1655 entry_offset as u64,
1656 )?)),
1657 PackObjectKind::RefDelta => {
1658 let hash_len = format.raw_len();
1659 if offset + hash_len > trailer_offset {
1660 return Err(GitError::InvalidFormat(
1661 "truncated ref-delta base object id".into(),
1662 ));
1663 }
1664 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1665 offset += hash_len;
1666 Some(DeltaBase::Ref(oid))
1667 }
1668 _ => None,
1669 };
1670 let mut body = Vec::new();
1671 let consumed = inflate_into(
1672 &pack_bytes[offset..trailer_offset],
1673 &mut body,
1674 header.size.min(usize::MAX as u64) as usize,
1675 )?;
1676 if body.len() as u64 != header.size {
1677 return Err(GitError::InvalidObject(format!(
1678 "pack object declared {} bytes, decoded {}",
1679 header.size,
1680 body.len()
1681 )));
1682 }
1683 if consumed == 0 {
1684 return Err(GitError::InvalidFormat(
1685 "empty compressed pack entry".into(),
1686 ));
1687 }
1688 offset = offset
1689 .checked_add(consumed)
1690 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1691 if offset > trailer_offset {
1692 return Err(GitError::InvalidFormat(
1693 "pack entry extends past checksum".into(),
1694 ));
1695 }
1696 raw_entries.push((
1697 entry_offset as u64,
1698 crc32fast::hash(&pack_bytes[entry_offset..offset]),
1699 ));
1700 if let Some(base) = base {
1701 parsed_entries.push(ParsedPackEntry::Delta {
1702 base,
1703 compressed_size: consumed as u64,
1704 delta_size: header.size,
1705 offset: entry_offset as u64,
1706 delta: body,
1707 });
1708 } else {
1709 let object_type = match header.kind {
1710 PackObjectKind::Commit => ObjectType::Commit,
1711 PackObjectKind::Tree => ObjectType::Tree,
1712 PackObjectKind::Blob => ObjectType::Blob,
1713 PackObjectKind::Tag => ObjectType::Tag,
1714 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1715 };
1716 let object = EncodedObject::new(object_type, body);
1717 let oid = object.object_id(format)?;
1718 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1719 entry: PackEntry {
1720 oid,
1721 compressed_size: consumed as u64,
1722 uncompressed_size: header.size,
1723 offset: entry_offset as u64,
1724 },
1725 object,
1726 }));
1727 }
1728 }
1729 if offset != trailer_offset {
1730 return Err(GitError::InvalidFormat(format!(
1731 "pack has {} trailing bytes before checksum",
1732 trailer_offset - offset
1733 )));
1734 }
1735
1736 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1737 let entries = resolved
1738 .iter()
1739 .zip(raw_entries)
1740 .map(|(object, (offset, crc32))| PackIndexEntry {
1741 oid: object.entry.oid,
1742 crc32,
1743 offset,
1744 })
1745 .collect::<Vec<_>>();
1746 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1747 Ok(PackIndexBuild {
1748 index,
1749 pack_checksum,
1750 entries,
1751 })
1752 }
1753
1754 pub fn write_v2_for_pack_reader<R>(
1761 reader: &mut R,
1762 format: ObjectFormat,
1763 ) -> Result<PackStreamIndexBuild>
1764 where
1765 R: Read + Seek,
1766 {
1767 let start = reader.stream_position()?;
1768 let end = reader.seek(SeekFrom::End(0))?;
1769 let pack_len = end
1770 .checked_sub(start)
1771 .ok_or_else(|| GitError::InvalidFormat("pack stream position overflow".into()))?;
1772 reader.seek(SeekFrom::Start(start))?;
1773 index_pack_from_reader(reader, format, pack_len)
1774 }
1775
1776 pub fn write_v2_for_pack_reader_to_trailer<R>(
1783 reader: &mut R,
1784 format: ObjectFormat,
1785 ) -> Result<PackStreamIndexBuild>
1786 where
1787 R: Read,
1788 {
1789 index_pack_from_reader_to_trailer(reader, format)
1790 }
1791
1792 pub fn write_v2_for_pack_reader_with_len<R>(
1793 reader: &mut R,
1794 format: ObjectFormat,
1795 pack_len: u64,
1796 ) -> Result<PackStreamIndexBuild>
1797 where
1798 R: Read,
1799 {
1800 index_pack_from_reader(reader, format, pack_len)
1801 }
1802
1803 pub fn write_v2_for_pack_path(
1806 path: impl AsRef<Path>,
1807 format: ObjectFormat,
1808 ) -> Result<PackStreamIndexBuild> {
1809 let mut file = File::open(path)?;
1810 Self::write_v2_for_pack_reader(&mut file, format)
1811 }
1812
1813 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1814 Self::parse(bytes, ObjectFormat::Sha1)
1815 }
1816
1817 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1818 Self::parse_impl(bytes, format, true)
1819 }
1820
1821 pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1822 Self::parse_impl(bytes, format, false)
1823 }
1824
1825 fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1826 let hash_len = format.raw_len();
1827 if bytes.len() < 4 {
1828 return Err(GitError::InvalidFormat("pack index too short".into()));
1829 }
1830 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1831 return Self::parse_v1_impl(bytes, format, verify_checksum);
1832 }
1833 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1834 return Err(GitError::InvalidFormat("pack index too short".into()));
1835 }
1836 let version = u32_be(&bytes[4..8]);
1837 if version != 2 {
1838 return Err(GitError::Unsupported(format!(
1839 "pack index version {version}"
1840 )));
1841 }
1842 let index_checksum_offset = bytes.len() - hash_len;
1843 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1844 if verify_checksum {
1845 let actual_index_checksum =
1846 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1847 if actual_index_checksum != index_checksum {
1848 return Err(GitError::InvalidFormat(format!(
1849 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1850 )));
1851 }
1852 }
1853
1854 let mut offset = 8usize;
1855 let mut fanout = [0u32; 256];
1856 let mut previous = 0u32;
1857 for slot in &mut fanout {
1858 *slot = u32_be(&bytes[offset..offset + 4]);
1859 if *slot < previous {
1860 return Err(GitError::InvalidFormat(
1861 "pack index fanout is not monotonic".into(),
1862 ));
1863 }
1864 previous = *slot;
1865 offset += 4;
1866 }
1867 let count = fanout[255] as usize;
1868 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1869 offset = oid_table.end;
1870 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1871 offset = crc_table.end;
1872 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1873 offset = small_offset_table.end;
1874
1875 let large_offset_count = (0..count)
1876 .filter(|idx| {
1877 let start = small_offset_table.start + idx * 4;
1878 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1879 })
1880 .count();
1881 let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1882 offset = large_offset_table.end;
1883
1884 let expected_trailer_offset = bytes.len() - hash_len * 2;
1885 if offset != expected_trailer_offset {
1886 if !verify_checksum && offset < expected_trailer_offset {
1887 large_offset_table = large_offset_table.start..expected_trailer_offset;
1888 offset = expected_trailer_offset;
1889 } else {
1890 return Err(GitError::InvalidFormat(format!(
1891 "pack index has {} unexpected bytes before trailer",
1892 expected_trailer_offset.saturating_sub(offset)
1893 )));
1894 }
1895 }
1896 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1897
1898 let mut entries = Vec::with_capacity(count);
1899 for idx in 0..count {
1900 let oid_start = oid_table.start + idx * hash_len;
1901 let crc_start = crc_table.start + idx * 4;
1902 let offset_start = small_offset_table.start + idx * 4;
1903 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1904 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1908 return Err(GitError::InvalidFormat(
1909 "pack index object ids are not strictly ascending".into(),
1910 ));
1911 }
1912 let expected_min = if oid_bytes[0] == 0 {
1913 0
1914 } else {
1915 fanout[usize::from(oid_bytes[0] - 1)]
1916 };
1917 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1918 return Err(GitError::InvalidFormat(
1919 "pack index object id is outside its fanout bucket".into(),
1920 ));
1921 }
1922 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1923 let offset = if raw_offset & 0x8000_0000 == 0 {
1924 u64::from(raw_offset)
1925 } else {
1926 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1927 let large_start = large_offset_table.start + large_idx * 8;
1928 if large_idx >= large_offset_table.len() / 8 {
1929 return Err(GitError::InvalidFormat(
1930 "pack index large offset points past table".into(),
1931 ));
1932 }
1933 u64_be(&bytes[large_start..large_start + 8])
1934 };
1935 entries.push(PackIndexEntry {
1936 oid: ObjectId::from_raw(format, oid_bytes)?,
1937 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1938 offset,
1939 });
1940 }
1941 Ok(Self {
1942 version,
1943 fanout,
1944 entries,
1945 pack_checksum,
1946 index_checksum,
1947 })
1948 }
1949
1950 fn parse_v1_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1951 let hash_len = format.raw_len();
1952 if bytes.len() < 256 * 4 + 2 * hash_len {
1953 return Err(GitError::InvalidFormat("pack index too short".into()));
1954 }
1955 let index_checksum_offset = bytes.len() - hash_len;
1956 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1957 if verify_checksum {
1958 let actual_index_checksum =
1959 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1960 if actual_index_checksum != index_checksum {
1961 return Err(GitError::InvalidFormat(format!(
1962 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1963 )));
1964 }
1965 }
1966
1967 let mut offset = 0usize;
1968 let mut fanout = [0u32; 256];
1969 let mut previous = 0u32;
1970 for slot in &mut fanout {
1971 *slot = u32_be(&bytes[offset..offset + 4]);
1972 if *slot < previous {
1973 return Err(GitError::InvalidFormat(
1974 "pack index fanout is not monotonic".into(),
1975 ));
1976 }
1977 previous = *slot;
1978 offset += 4;
1979 }
1980 let count = fanout[255] as usize;
1981 let entry_len = hash_len
1982 .checked_add(4)
1983 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1984 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1985 offset = entry_table.end;
1986 let expected_trailer_offset = bytes.len() - hash_len * 2;
1987 if offset != expected_trailer_offset {
1988 return Err(GitError::InvalidFormat(format!(
1989 "pack index has {} unexpected bytes before trailer",
1990 expected_trailer_offset.saturating_sub(offset)
1991 )));
1992 }
1993 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1994
1995 let mut entries = Vec::with_capacity(count);
1996 let mut previous_oid: Option<ObjectId> = None;
1997 for idx in 0..count {
1998 let start = entry_table.start + idx * entry_len;
1999 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
2000 if let Some(previous) = &previous_oid
2001 && previous.as_bytes() >= oid.as_bytes()
2002 {
2003 return Err(GitError::InvalidFormat(
2004 "pack index object ids are not strictly sorted".into(),
2005 ));
2006 }
2007 previous_oid = Some(oid);
2008 entries.push(PackIndexEntry {
2009 oid,
2010 crc32: 0,
2011 offset: u64::from(u32_be(&bytes[start..start + 4])),
2012 });
2013 }
2014 Ok(Self {
2015 version: 1,
2016 fanout,
2017 entries,
2018 pack_checksum,
2019 index_checksum,
2020 })
2021 }
2022
2023 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
2024 self.entries
2025 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2026 .ok()
2027 .map(|idx| &self.entries[idx])
2028 }
2029
2030 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
2031 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
2032 }
2033
2034 pub fn write_v2(
2035 format: ObjectFormat,
2036 entries: &[PackIndexEntry],
2037 pack_checksum: &ObjectId,
2038 ) -> Result<Vec<u8>> {
2039 if pack_checksum.format() != format {
2040 return Err(GitError::InvalidObjectId(
2041 "pack checksum format does not match index format".into(),
2042 ));
2043 }
2044 let mut entries = entries.iter().collect::<Vec<_>>();
2045 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2046 for pair in entries.windows(2) {
2047 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2048 return Err(GitError::InvalidFormat(format!(
2049 "pack index contains duplicate object id {}",
2050 pair[0].oid
2051 )));
2052 }
2053 }
2054 let mut fanout = [0u32; 256];
2055 for entry in &entries {
2056 if entry.oid.format() != format {
2057 return Err(GitError::InvalidObjectId(
2058 "pack index entry format does not match index format".into(),
2059 ));
2060 }
2061 let first = entry.oid.as_bytes()[0] as usize;
2062 fanout[first] = fanout[first]
2063 .checked_add(1)
2064 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2065 }
2066 let mut running = 0u32;
2067 for slot in &mut fanout {
2068 running = running
2069 .checked_add(*slot)
2070 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2071 *slot = running;
2072 }
2073
2074 let mut index = Vec::new();
2075 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
2076 index.extend_from_slice(&2u32.to_be_bytes());
2077 for count in fanout {
2078 index.extend_from_slice(&count.to_be_bytes());
2079 }
2080 for entry in &entries {
2081 index.extend_from_slice(entry.oid.as_bytes());
2082 }
2083 for entry in &entries {
2084 index.extend_from_slice(&entry.crc32.to_be_bytes());
2085 }
2086
2087 let mut large_offsets = Vec::new();
2088 for entry in &entries {
2089 if entry.offset < 0x8000_0000 {
2090 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2091 } else {
2092 if large_offsets.len() > 0x7fff_ffff {
2093 return Err(GitError::InvalidFormat(
2094 "too many large pack offsets".into(),
2095 ));
2096 }
2097 let large_idx = large_offsets.len() as u32;
2098 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
2099 large_offsets.push(entry.offset);
2100 }
2101 }
2102 for offset in large_offsets {
2103 index.extend_from_slice(&offset.to_be_bytes());
2104 }
2105 index.extend_from_slice(pack_checksum.as_bytes());
2106 let index_checksum = sley_core::digest_bytes(format, &index)?;
2107 index.extend_from_slice(index_checksum.as_bytes());
2108 Ok(index)
2109 }
2110
2111 pub fn write_v1(
2117 format: ObjectFormat,
2118 entries: &[PackIndexEntry],
2119 pack_checksum: &ObjectId,
2120 ) -> Result<Vec<u8>> {
2121 if pack_checksum.format() != format {
2122 return Err(GitError::InvalidObjectId(
2123 "pack checksum format does not match index format".into(),
2124 ));
2125 }
2126 let mut entries = entries.iter().collect::<Vec<_>>();
2127 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2128 for pair in entries.windows(2) {
2129 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2130 return Err(GitError::InvalidFormat(format!(
2131 "pack index contains duplicate object id {}",
2132 pair[0].oid
2133 )));
2134 }
2135 }
2136 let mut fanout = [0u32; 256];
2137 for entry in &entries {
2138 if entry.oid.format() != format {
2139 return Err(GitError::InvalidObjectId(
2140 "pack index entry format does not match index format".into(),
2141 ));
2142 }
2143 if entry.offset > 0xffff_ffff {
2144 return Err(GitError::InvalidFormat(
2145 "pack offset too large for a version-1 index".into(),
2146 ));
2147 }
2148 let first = entry.oid.as_bytes()[0] as usize;
2149 fanout[first] = fanout[first]
2150 .checked_add(1)
2151 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2152 }
2153 let mut running = 0u32;
2154 for slot in &mut fanout {
2155 running = running
2156 .checked_add(*slot)
2157 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2158 *slot = running;
2159 }
2160
2161 let mut index = Vec::new();
2162 for count in fanout {
2163 index.extend_from_slice(&count.to_be_bytes());
2164 }
2165 for entry in &entries {
2166 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2167 index.extend_from_slice(entry.oid.as_bytes());
2168 }
2169 index.extend_from_slice(pack_checksum.as_bytes());
2170 let index_checksum = sley_core::digest_bytes(format, &index)?;
2171 index.extend_from_slice(index_checksum.as_bytes());
2172 Ok(index)
2173 }
2174}
2175
2176fn index_pack_from_reader<R>(
2177 reader: &mut R,
2178 format: ObjectFormat,
2179 pack_len: u64,
2180) -> Result<PackStreamIndexBuild>
2181where
2182 R: Read,
2183{
2184 index_pack_from_stream(PackReadStream::new(reader, format, Some(pack_len))?, format)
2185}
2186
2187fn index_pack_from_reader_to_trailer<R>(
2188 reader: &mut R,
2189 format: ObjectFormat,
2190) -> Result<PackStreamIndexBuild>
2191where
2192 R: Read,
2193{
2194 index_pack_from_stream(PackReadStream::new(reader, format, None)?, format)
2195}
2196
2197fn index_pack_from_stream<R>(
2198 mut stream: PackReadStream<'_, R>,
2199 format: ObjectFormat,
2200) -> Result<PackStreamIndexBuild>
2201where
2202 R: Read,
2203{
2204 let mut header = [0u8; 12];
2205 stream.read_pack_bytes(&mut header)?;
2206 if &header[..4] != b"PACK" {
2207 return Err(GitError::InvalidFormat("missing PACK signature".into()));
2208 }
2209 let version = u32_be(&header[4..8]);
2210 if version != 2 && version != 3 {
2211 return Err(GitError::Unsupported(format!("pack version {version}")));
2212 }
2213 let count = u32_be(&header[8..12]) as usize;
2214 let mut parsed_entries = Vec::with_capacity(count);
2215 let mut raw_entries = Vec::with_capacity(count);
2216 for _ in 0..count {
2217 let entry_offset = stream.pack_offset();
2218 let mut entry_crc = crc32fast::Hasher::new();
2219 let header = parse_entry_header_from_stream(&mut stream, &mut entry_crc)?;
2220 let base = match header.kind {
2221 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
2222 parse_ofs_delta_base_offset_from_stream(&mut stream, &mut entry_crc, entry_offset)?,
2223 )),
2224 PackObjectKind::RefDelta => {
2225 let mut raw = vec![0u8; format.raw_len()];
2226 stream.read_entry_bytes(&mut raw, &mut entry_crc)?;
2227 Some(DeltaBase::Ref(ObjectId::from_raw(format, &raw)?))
2228 }
2229 _ => None,
2230 };
2231 let (body, consumed) = inflate_entry_from_stream(
2232 &mut stream,
2233 &mut entry_crc,
2234 header.size.min(usize::MAX as u64) as usize,
2235 )?;
2236 if body.len() as u64 != header.size {
2237 return Err(GitError::InvalidObject(format!(
2238 "pack object declared {} bytes, decoded {}",
2239 header.size,
2240 body.len()
2241 )));
2242 }
2243 if consumed == 0 {
2244 return Err(GitError::InvalidFormat(
2245 "empty compressed pack entry".into(),
2246 ));
2247 }
2248 raw_entries.push((entry_offset, entry_crc.finalize()));
2249 if let Some(base) = base {
2250 parsed_entries.push(ParsedPackEntry::Delta {
2251 base,
2252 compressed_size: consumed as u64,
2253 delta_size: header.size,
2254 offset: entry_offset,
2255 delta: body,
2256 });
2257 } else {
2258 let object_type = pack_object_kind_to_object_type(header.kind)?;
2259 let object = EncodedObject::new(object_type, body);
2260 let oid = object.object_id(format)?;
2261 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
2262 entry: PackEntry {
2263 oid,
2264 compressed_size: consumed as u64,
2265 uncompressed_size: header.size,
2266 offset: entry_offset,
2267 },
2268 object,
2269 }));
2270 }
2271 }
2272 if stream.pack_offset() != stream.trailer_pack_offset() {
2273 return Err(GitError::InvalidFormat(format!(
2274 "pack has {} trailing bytes before checksum",
2275 stream.trailer_pack_offset() - stream.pack_offset()
2276 )));
2277 }
2278 let expected = stream.read_trailer_oid()?;
2279 let pack_checksum = stream.finish_digest()?;
2280 if pack_checksum != expected {
2281 return Err(GitError::InvalidFormat(format!(
2282 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
2283 )));
2284 }
2285
2286 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
2287 let entries = resolved
2288 .iter()
2289 .zip(raw_entries)
2290 .map(|(object, (offset, crc32))| PackIndexEntry {
2291 oid: object.entry.oid,
2292 crc32,
2293 offset,
2294 })
2295 .collect::<Vec<_>>();
2296 let objects = resolved
2297 .iter()
2298 .map(|object| PackIndexedObject {
2299 oid: object.entry.oid,
2300 object_type: object.object.object_type,
2301 size: object.object.body.len() as u64,
2302 offset: object.entry.offset,
2303 })
2304 .collect::<Vec<_>>();
2305 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
2306 Ok(PackStreamIndexBuild {
2307 index,
2308 pack_checksum,
2309 entries,
2310 objects,
2311 })
2312}
2313
2314fn pack_object_kind_to_object_type(kind: PackObjectKind) -> Result<ObjectType> {
2315 match kind {
2316 PackObjectKind::Commit => Ok(ObjectType::Commit),
2317 PackObjectKind::Tree => Ok(ObjectType::Tree),
2318 PackObjectKind::Blob => Ok(ObjectType::Blob),
2319 PackObjectKind::Tag => Ok(ObjectType::Tag),
2320 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => Err(GitError::InvalidFormat(
2321 "delta entry cannot be used as an object type".into(),
2322 )),
2323 }
2324}
2325
2326struct PackReadStream<'a, R> {
2327 reader: &'a mut R,
2328 position: u64,
2329 pack_len: Option<u64>,
2330 trailer_position: Option<u64>,
2331 digest: StreamingDigest,
2332 format: ObjectFormat,
2333 pending: VecDeque<u8>,
2334}
2335
2336impl<'a, R> PackReadStream<'a, R>
2337where
2338 R: Read,
2339{
2340 fn new(reader: &'a mut R, format: ObjectFormat, pack_len: Option<u64>) -> Result<Self> {
2341 let trailer_len = format.raw_len() as u64;
2342 let trailer_position = pack_len
2343 .map(|pack_len| {
2344 if pack_len < 12 + trailer_len {
2345 return Err(GitError::InvalidFormat("pack file too short".into()));
2346 }
2347 Ok(pack_len - trailer_len)
2348 })
2349 .transpose()?;
2350 Ok(Self {
2351 reader,
2352 position: 0,
2353 pack_len,
2354 trailer_position,
2355 digest: StreamingDigest::new(format),
2356 format,
2357 pending: VecDeque::new(),
2358 })
2359 }
2360
2361 fn pack_offset(&self) -> u64 {
2362 self.position
2363 }
2364
2365 fn trailer_pack_offset(&self) -> u64 {
2366 self.trailer_position.unwrap_or(self.position)
2367 }
2368
2369 fn read_pack_bytes(&mut self, bytes: &mut [u8]) -> Result<()> {
2370 let end = self
2371 .position
2372 .checked_add(bytes.len() as u64)
2373 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2374 if self
2375 .trailer_position
2376 .is_some_and(|trailer_position| end > trailer_position)
2377 {
2378 return Err(GitError::InvalidFormat(
2379 "pack entry extends past checksum".into(),
2380 ));
2381 }
2382 self.read_exact_raw(bytes)?;
2383 self.position = end;
2384 self.digest.update(bytes);
2385 Ok(())
2386 }
2387
2388 fn read_exact_raw(&mut self, bytes: &mut [u8]) -> Result<()> {
2389 let mut written = 0usize;
2390 while written < bytes.len() {
2391 if let Some(byte) = self.pending.pop_front() {
2392 bytes[written] = byte;
2393 written += 1;
2394 continue;
2395 }
2396 self.reader.read_exact(&mut bytes[written..])?;
2397 break;
2398 }
2399 Ok(())
2400 }
2401
2402 fn read_entry_bytes(&mut self, bytes: &mut [u8], crc: &mut crc32fast::Hasher) -> Result<()> {
2403 self.read_pack_bytes(bytes)?;
2404 crc.update(bytes);
2405 Ok(())
2406 }
2407
2408 fn read_entry_byte(&mut self, crc: &mut crc32fast::Hasher) -> Result<u8> {
2409 let mut byte = [0u8; 1];
2410 self.read_entry_bytes(&mut byte, crc)?;
2411 Ok(byte[0])
2412 }
2413
2414 fn read_compressed_chunk(&mut self, bytes: &mut [u8]) -> Result<usize> {
2415 let len = if let Some(trailer_position) = self.trailer_position {
2416 if self.position >= trailer_position {
2417 return Ok(0);
2418 }
2419 let remaining = trailer_position - self.position;
2420 if remaining < bytes.len() as u64 {
2421 remaining as usize
2422 } else {
2423 bytes.len()
2424 }
2425 } else {
2426 bytes.len()
2427 };
2428 let mut read = 0usize;
2429 while read < len {
2430 let Some(byte) = self.pending.pop_front() else {
2431 break;
2432 };
2433 bytes[read] = byte;
2434 read += 1;
2435 }
2436 if read < len {
2437 read += self.reader.read(&mut bytes[read..len])?;
2438 }
2439 self.position = self
2440 .position
2441 .checked_add(read as u64)
2442 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2443 Ok(read)
2444 }
2445
2446 fn accept_compressed_bytes(&mut self, bytes: &[u8], crc: &mut crc32fast::Hasher) {
2447 self.digest.update(bytes);
2448 crc.update(bytes);
2449 }
2450
2451 fn push_back_compressed_bytes(&mut self, bytes: &[u8]) -> Result<()> {
2452 if bytes.is_empty() {
2453 return Ok(());
2454 }
2455 self.position = self
2456 .position
2457 .checked_sub(bytes.len() as u64)
2458 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2459 for byte in bytes.iter().rev() {
2460 self.pending.push_front(*byte);
2461 }
2462 Ok(())
2463 }
2464
2465 fn read_trailer_oid(&mut self) -> Result<ObjectId> {
2466 let mut raw = vec![0u8; self.format.raw_len()];
2467 self.read_exact_raw(&mut raw)?;
2468 self.position = self
2469 .position
2470 .checked_add(raw.len() as u64)
2471 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2472 if let Some(pack_len) = self.pack_len
2473 && self.position != pack_len
2474 {
2475 return Err(GitError::InvalidFormat(format!(
2476 "pack has {} trailing bytes after checksum",
2477 pack_len - self.position
2478 )));
2479 }
2480 if self.pack_len.is_none() && !self.pending.is_empty() {
2481 return Err(GitError::InvalidFormat(
2482 "pack has trailing bytes after checksum".into(),
2483 ));
2484 }
2485 ObjectId::from_raw(self.format, &raw)
2486 }
2487
2488 fn finish_digest(self) -> Result<ObjectId> {
2489 self.digest.finalize()
2490 }
2491}
2492
2493const STREAM_INFLATE_CHUNK: usize = 32 * 1024;
2494
2495fn inflate_entry_from_stream<R>(
2496 stream: &mut PackReadStream<'_, R>,
2497 crc: &mut crc32fast::Hasher,
2498 size_hint: usize,
2499) -> Result<(Vec<u8>, usize)>
2500where
2501 R: Read,
2502{
2503 INFLATE.with(|cell| {
2504 let mut decompress = cell.borrow_mut();
2505 decompress.reset(true);
2506 let mut out = Vec::with_capacity(bounded_inflate_reserve(size_hint, STREAM_INFLATE_CHUNK));
2507 let mut compressed_total = 0usize;
2508 let mut input = [0u8; STREAM_INFLATE_CHUNK];
2509 loop {
2510 let read = stream.read_compressed_chunk(&mut input)?;
2511 if read == 0 {
2512 return Err(GitError::InvalidObject("truncated zlib stream".into()));
2513 }
2514 let mut cursor = 0usize;
2515 while cursor < read {
2516 if out.len() == out.capacity() {
2517 out.reserve(out.len().max(64));
2518 }
2519 let before_in = decompress.total_in();
2520 let before_out = decompress.total_out();
2521 let status = decompress
2522 .decompress_vec(
2523 &input[cursor..read],
2524 &mut out,
2525 flate2::FlushDecompress::None,
2526 )
2527 .map_err(|err| {
2528 GitError::InvalidObject(format!("zlib inflate failed: {err}"))
2529 })?;
2530 let consumed = (decompress.total_in() - before_in) as usize;
2531 let produced = decompress.total_out() - before_out;
2532 if consumed > 0 {
2533 let consumed_end = cursor + consumed;
2534 stream.accept_compressed_bytes(&input[cursor..consumed_end], crc);
2535 compressed_total = compressed_total
2536 .checked_add(consumed)
2537 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2538 cursor = consumed_end;
2539 }
2540 match status {
2541 flate2::Status::StreamEnd => {
2542 stream.push_back_compressed_bytes(&input[cursor..read])?;
2543 return Ok((out, compressed_total));
2544 }
2545 _ if consumed == 0 && produced == 0 => {
2546 return Err(GitError::InvalidObject("truncated zlib stream".into()));
2547 }
2548 _ => {}
2549 }
2550 }
2551 }
2552 })
2553}
2554
2555fn parse_entry_header_from_stream<R>(
2556 stream: &mut PackReadStream<'_, R>,
2557 crc: &mut crc32fast::Hasher,
2558) -> Result<EntryHeader>
2559where
2560 R: Read,
2561{
2562 let first = stream.read_entry_byte(crc)?;
2563 let mut size = u64::from(first & 0x0f);
2564 let kind = match (first >> 4) & 0x07 {
2565 1 => PackObjectKind::Commit,
2566 2 => PackObjectKind::Tree,
2567 3 => PackObjectKind::Blob,
2568 4 => PackObjectKind::Tag,
2569 6 => PackObjectKind::OfsDelta,
2570 7 => PackObjectKind::RefDelta,
2571 other => {
2572 return Err(GitError::InvalidFormat(format!(
2573 "invalid pack object type {other}"
2574 )));
2575 }
2576 };
2577 let mut shift = 4;
2578 let mut byte = first;
2579 while byte & 0x80 != 0 {
2580 byte = stream.read_entry_byte(crc)?;
2581 let part = u64::from(byte & 0x7f);
2582 size = size
2583 .checked_add(
2584 part.checked_shl(shift)
2585 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
2586 )
2587 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
2588 shift += 7;
2589 }
2590 Ok(EntryHeader { kind, size })
2591}
2592
2593fn parse_ofs_delta_base_offset_from_stream<R>(
2594 stream: &mut PackReadStream<'_, R>,
2595 crc: &mut crc32fast::Hasher,
2596 entry_offset: u64,
2597) -> Result<u64>
2598where
2599 R: Read,
2600{
2601 let mut byte = stream.read_entry_byte(crc)?;
2602 let mut relative = u64::from(byte & 0x7f);
2603 while byte & 0x80 != 0 {
2604 byte = stream.read_entry_byte(crc)?;
2605 relative = relative
2606 .checked_add(1)
2607 .and_then(|value| value.checked_shl(7))
2608 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2609 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2610 }
2611 entry_offset
2612 .checked_sub(relative)
2613 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2614}
2615
2616pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
2621 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
2622 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
2623 let mut index_position = vec![0u32; entries.len()];
2624 for (position, &entry) in oid_sorted.iter().enumerate() {
2625 index_position[entry] = position as u32;
2626 }
2627 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
2628 by_offset.sort_by_key(|&entry| entries[entry].offset);
2629 by_offset
2630 .into_iter()
2631 .map(|entry| index_position[entry])
2632 .collect()
2633}
2634
2635impl PackReverseIndex {
2636 pub fn write(
2637 format: ObjectFormat,
2638 positions: &[u32],
2639 pack_checksum: &ObjectId,
2640 ) -> Result<Vec<u8>> {
2641 if pack_checksum.format() != format {
2642 return Err(GitError::InvalidObjectId(
2643 "pack checksum format does not match reverse index format".into(),
2644 ));
2645 }
2646 validate_position_permutation(positions)?;
2647
2648 let mut out = Vec::new();
2649 out.extend_from_slice(b"RIDX");
2650 out.extend_from_slice(&1u32.to_be_bytes());
2651 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2652 for position in positions {
2653 out.extend_from_slice(&position.to_be_bytes());
2654 }
2655 out.extend_from_slice(pack_checksum.as_bytes());
2656 let checksum = sley_core::digest_bytes(format, &out)?;
2657 out.extend_from_slice(checksum.as_bytes());
2658 Ok(out)
2659 }
2660
2661 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2662 let hash_len = format.raw_len();
2663 let table_len = object_count
2664 .checked_mul(4)
2665 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
2666 let min_len = 12usize
2667 .checked_add(table_len)
2668 .and_then(|len| len.checked_add(hash_len * 2))
2669 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
2670 if bytes.len() < min_len {
2671 return Err(GitError::InvalidFormat("reverse index too short".into()));
2672 }
2673 if bytes.len() != min_len {
2674 return Err(GitError::InvalidFormat(format!(
2675 "reverse index has {} trailing bytes",
2676 bytes.len() - min_len
2677 )));
2678 }
2679 if &bytes[..4] != b"RIDX" {
2680 return Err(GitError::InvalidFormat(
2681 "missing reverse index signature".into(),
2682 ));
2683 }
2684 let version = u32_be(&bytes[4..8]);
2685 if version != 1 {
2686 return Err(GitError::Unsupported(format!(
2687 "reverse index version {version}"
2688 )));
2689 }
2690 let hash_id = u32_be(&bytes[8..12]);
2691 if hash_id != hash_function_id(format) {
2692 return Err(GitError::InvalidFormat(format!(
2693 "reverse index hash id {hash_id} does not match {}",
2694 format.name()
2695 )));
2696 }
2697
2698 let index_checksum_offset = bytes.len() - hash_len;
2699 let actual_index_checksum =
2700 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2701 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2702 if actual_index_checksum != index_checksum {
2703 return Err(GitError::InvalidFormat(format!(
2704 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2705 )));
2706 }
2707
2708 let pack_checksum_offset = index_checksum_offset - hash_len;
2709 let pack_checksum =
2710 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2711 let mut positions = Vec::with_capacity(object_count);
2712 let mut offset = 12usize;
2713 for _ in 0..object_count {
2714 let position = u32_be(&bytes[offset..offset + 4]);
2715 positions.push(position);
2716 offset += 4;
2717 }
2718 validate_position_permutation(&positions)?;
2719
2720 Ok(Self {
2721 version,
2722 format,
2723 positions,
2724 pack_checksum,
2725 index_checksum,
2726 })
2727 }
2728}
2729
2730impl PackMtimes {
2731 pub fn write(
2732 format: ObjectFormat,
2733 mtimes: &[u32],
2734 pack_checksum: &ObjectId,
2735 ) -> Result<Vec<u8>> {
2736 if pack_checksum.format() != format {
2737 return Err(GitError::InvalidObjectId(
2738 "pack checksum format does not match mtimes format".into(),
2739 ));
2740 }
2741
2742 let mut out = Vec::new();
2743 out.extend_from_slice(b"MTME");
2744 out.extend_from_slice(&1u32.to_be_bytes());
2745 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2746 for mtime in mtimes {
2747 out.extend_from_slice(&mtime.to_be_bytes());
2748 }
2749 out.extend_from_slice(pack_checksum.as_bytes());
2750 let checksum = sley_core::digest_bytes(format, &out)?;
2751 out.extend_from_slice(checksum.as_bytes());
2752 Ok(out)
2753 }
2754
2755 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2756 let hash_len = format.raw_len();
2757 let table_len = object_count
2758 .checked_mul(4)
2759 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2760 let expected_len = 12usize
2761 .checked_add(table_len)
2762 .and_then(|len| len.checked_add(hash_len * 2))
2763 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2764 if bytes.len() < expected_len {
2765 return Err(GitError::InvalidFormat("mtimes file too short".into()));
2766 }
2767 if bytes.len() != expected_len {
2768 return Err(GitError::InvalidFormat(format!(
2769 "mtimes file has {} trailing bytes",
2770 bytes.len() - expected_len
2771 )));
2772 }
2773 if &bytes[..4] != b"MTME" {
2774 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2775 }
2776 let version = u32_be(&bytes[4..8]);
2777 if version != 1 {
2778 return Err(GitError::Unsupported(format!("mtimes version {version}")));
2779 }
2780 let hash_id = u32_be(&bytes[8..12]);
2781 if hash_id != hash_function_id(format) {
2782 return Err(GitError::InvalidFormat(format!(
2783 "mtimes hash id {hash_id} does not match {}",
2784 format.name()
2785 )));
2786 }
2787
2788 let index_checksum_offset = bytes.len() - hash_len;
2789 let actual_index_checksum =
2790 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2791 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2792 if actual_index_checksum != index_checksum {
2793 return Err(GitError::InvalidFormat(format!(
2794 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2795 )));
2796 }
2797
2798 let pack_checksum_offset = index_checksum_offset - hash_len;
2799 let pack_checksum =
2800 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2801 let mut mtimes = Vec::with_capacity(object_count);
2802 let mut offset = 12usize;
2803 for _ in 0..object_count {
2804 mtimes.push(u32_be(&bytes[offset..offset + 4]));
2805 offset += 4;
2806 }
2807
2808 Ok(Self {
2809 version,
2810 format,
2811 mtimes,
2812 pack_checksum,
2813 index_checksum,
2814 })
2815 }
2816}
2817
2818impl PackBitmapIndex {
2819 pub const OPTION_FULL_DAG: u16 = 0x0001;
2820 pub const OPTION_HASH_CACHE: u16 = 0x0004;
2821
2822 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2823 let hash_len = format.raw_len();
2824 let min_len = 12usize
2825 .checked_add(hash_len * 2)
2826 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2827 if bytes.len() < min_len {
2828 return Err(GitError::InvalidFormat("bitmap index too short".into()));
2829 }
2830 if &bytes[..4] != b"BITM" {
2831 return Err(GitError::InvalidFormat(
2832 "missing bitmap index signature".into(),
2833 ));
2834 }
2835 let version = u16_be(&bytes[4..6]);
2836 if version != 1 {
2837 return Err(GitError::Unsupported(format!(
2838 "bitmap index version {version}"
2839 )));
2840 }
2841 let options = u16_be(&bytes[6..8]);
2842 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2843 if options & !known_options != 0 {
2844 return Err(GitError::Unsupported(format!(
2845 "bitmap index options {:#06x}",
2846 options & !known_options
2847 )));
2848 }
2849 let entry_count = u32_be(&bytes[8..12]) as usize;
2850 let checksum_offset = bytes.len() - hash_len;
2851 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2852 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2853 if actual_index_checksum != index_checksum {
2854 return Err(GitError::InvalidFormat(format!(
2855 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2856 )));
2857 }
2858
2859 let pack_checksum_end = 12usize
2860 .checked_add(hash_len)
2861 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2862 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2863 let mut offset = pack_checksum_end;
2864 let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2865 let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2866 let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2867 let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2868
2869 let mut entries = Vec::with_capacity(entry_count);
2870 for idx in 0..entry_count {
2871 if checksum_offset.saturating_sub(offset) < 6 {
2872 return Err(GitError::InvalidFormat(
2873 "truncated bitmap index entry".into(),
2874 ));
2875 }
2876 let object_position = u32_be(&bytes[offset..offset + 4]);
2877 offset += 4;
2878 if object_position as usize >= object_count {
2879 return Err(GitError::InvalidFormat(
2880 "bitmap index entry points past object table".into(),
2881 ));
2882 }
2883 let xor_offset = bytes[offset];
2884 offset += 1;
2885 if xor_offset as usize > idx || xor_offset > 160 {
2886 return Err(GitError::InvalidFormat(
2887 "bitmap index entry has invalid XOR offset".into(),
2888 ));
2889 }
2890 let flags = bytes[offset];
2891 offset += 1;
2892 let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2893 entries.push(PackBitmapEntry {
2894 object_position,
2895 xor_offset,
2896 flags,
2897 bitmap,
2898 });
2899 }
2900
2901 let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2902 let cache_len = object_count
2903 .checked_mul(4)
2904 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2905 if checksum_offset.saturating_sub(offset) < cache_len {
2906 return Err(GitError::InvalidFormat(
2907 "truncated bitmap hash cache".into(),
2908 ));
2909 }
2910 let mut cache = Vec::with_capacity(object_count);
2911 for _ in 0..object_count {
2912 cache.push(u32_be(&bytes[offset..offset + 4]));
2913 offset += 4;
2914 }
2915 Some(cache)
2916 } else {
2917 None
2918 };
2919
2920 if offset != checksum_offset {
2921 return Err(GitError::InvalidFormat(format!(
2922 "bitmap index has {} trailing bytes",
2923 checksum_offset - offset
2924 )));
2925 }
2926
2927 Ok(Self {
2928 version,
2929 format,
2930 options,
2931 pack_checksum,
2932 index_checksum,
2933 type_bitmaps: PackBitmapTypeBitmaps {
2934 commits,
2935 trees,
2936 blobs,
2937 tags,
2938 },
2939 entries,
2940 name_hash_cache,
2941 })
2942 }
2943
2944 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2947 self.entries
2948 .iter()
2949 .find(|entry| entry.object_position == position)
2950 }
2951}
2952
2953fn parse_bitmap_ewah(
2954 bytes: &[u8],
2955 offset: &mut usize,
2956 checksum_offset: usize,
2957 _object_count: usize,
2958) -> Result<EwahBitmap> {
2959 if checksum_offset.saturating_sub(*offset) < 12 {
2960 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2961 }
2962 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2963 *offset += 4;
2964 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2965 *offset += 4;
2966 let words_len = word_count
2967 .checked_mul(8)
2968 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2969 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2970 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2971 }
2972 let mut words = Vec::with_capacity(word_count);
2973 for _ in 0..word_count {
2974 words.push(u64_be(&bytes[*offset..*offset + 8]));
2975 *offset += 8;
2976 }
2977 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2978 *offset += 4;
2979 validate_ewah_words(bit_size, &words, rlw_position)?;
2980 Ok(EwahBitmap {
2981 bit_size,
2982 words,
2983 rlw_position,
2984 })
2985}
2986
2987fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2988 if words.is_empty() {
2989 if rlw_position != 0 || bit_size != 0 {
2990 return Err(GitError::InvalidFormat(
2991 "EWAH bitmap has invalid empty RLW".into(),
2992 ));
2993 }
2994 return Ok(());
2995 }
2996 if rlw_position as usize >= words.len() {
2997 return Err(GitError::InvalidFormat(
2998 "EWAH RLW position points past word table".into(),
2999 ));
3000 }
3001 let mut word_idx = 0usize;
3002 let mut decoded_words = 0u64;
3003 while word_idx < words.len() {
3004 let rlw = words[word_idx];
3005 let run_words = (rlw >> 1) & 0xffff_ffff;
3006 let literal_words = (rlw >> 33) as usize;
3007 word_idx += 1;
3008 word_idx = word_idx
3009 .checked_add(literal_words)
3010 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
3011 if word_idx > words.len() {
3012 return Err(GitError::InvalidFormat(
3013 "EWAH literal words extend past word table".into(),
3014 ));
3015 }
3016 decoded_words = decoded_words
3017 .checked_add(run_words)
3018 .and_then(|value| value.checked_add(literal_words as u64))
3019 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
3020 }
3021 let decoded_bits = decoded_words
3022 .checked_mul(64)
3023 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
3024 if decoded_bits < u64::from(bit_size) {
3025 return Err(GitError::InvalidFormat(
3026 "EWAH bitmap decodes fewer bits than declared".into(),
3027 ));
3028 }
3029 Ok(())
3030}
3031
3032impl MultiPackIndex {
3033 pub fn write(
3034 format: ObjectFormat,
3035 version: u8,
3036 pack_names: &[String],
3037 objects: &[MultiPackIndexEntry],
3038 ) -> Result<Vec<u8>> {
3039 Self::write_with_reverse_index(format, version, pack_names, objects, None)
3040 }
3041
3042 pub fn write_with_reverse_index(
3051 format: ObjectFormat,
3052 version: u8,
3053 pack_names: &[String],
3054 objects: &[MultiPackIndexEntry],
3055 preferred_pack: Option<u32>,
3056 ) -> Result<Vec<u8>> {
3057 Self::write_with_bitmap_packs(format, version, pack_names, objects, preferred_pack, None)
3058 }
3059
3060 pub fn write_with_bitmap_packs(
3061 format: ObjectFormat,
3062 version: u8,
3063 pack_names: &[String],
3064 objects: &[MultiPackIndexEntry],
3065 preferred_pack: Option<u32>,
3066 bitmapped_packs: Option<&[MultiPackBitmapPack]>,
3067 ) -> Result<Vec<u8>> {
3068 if let Some(preferred) = preferred_pack
3069 && preferred as usize >= pack_names.len()
3070 {
3071 return Err(GitError::InvalidFormat(format!(
3072 "preferred pack {preferred} out of range for {} packs",
3073 pack_names.len()
3074 )));
3075 }
3076 if version != 1 && version != 2 {
3077 return Err(GitError::Unsupported(format!(
3078 "multi-pack-index version {version}"
3079 )));
3080 }
3081 if pack_names.len() > u32::MAX as usize {
3082 return Err(GitError::InvalidFormat(
3083 "too many multi-pack-index packs".into(),
3084 ));
3085 }
3086 if objects.len() > u32::MAX as usize {
3087 return Err(GitError::InvalidFormat(
3088 "too many multi-pack-index objects".into(),
3089 ));
3090 }
3091 if let Some(bitmapped_packs) = bitmapped_packs {
3092 if bitmapped_packs.len() != pack_names.len() {
3093 return Err(GitError::InvalidFormat(
3094 "multi-pack-index BTMP pack count mismatch".into(),
3095 ));
3096 }
3097 for pack in bitmapped_packs {
3098 let bitmap_end = u64::from(pack.bitmap_pos)
3099 .checked_add(u64::from(pack.bitmap_nr))
3100 .ok_or_else(|| {
3101 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
3102 })?;
3103 if bitmap_end > objects.len() as u64 {
3104 return Err(GitError::InvalidFormat(
3105 "multi-pack-index BTMP range points past object table".into(),
3106 ));
3107 }
3108 }
3109 }
3110 validate_midx_pack_names(pack_names)?;
3111 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
3112 return Err(GitError::InvalidFormat(
3113 "multi-pack-index v1 pack names must be sorted".into(),
3114 ));
3115 }
3116
3117 let mut objects = objects.iter().collect::<Vec<_>>();
3118 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
3119 let mut previous_oid: Option<&ObjectId> = None;
3120 for object in &objects {
3121 if object.oid.format() != format {
3122 return Err(GitError::InvalidObjectId(
3123 "multi-pack-index object format does not match index format".into(),
3124 ));
3125 }
3126 if let Some(previous) = previous_oid
3127 && previous.as_bytes() == object.oid.as_bytes()
3128 {
3129 return Err(GitError::InvalidFormat(
3130 "multi-pack-index contains duplicate object ids".into(),
3131 ));
3132 }
3133 if object.pack_int_id as usize >= pack_names.len() {
3134 return Err(GitError::InvalidFormat(
3135 "multi-pack-index object points past pack table".into(),
3136 ));
3137 }
3138 previous_oid = Some(&object.oid);
3139 }
3140
3141 let mut large_offsets = Vec::new();
3142 let mut chunks = vec![
3143 (*b"PNAM", write_midx_pack_names(pack_names)),
3144 (*b"OIDF", write_midx_oid_fanout(&objects)?),
3145 (*b"OIDL", write_midx_oid_lookup(&objects)),
3146 (
3147 *b"OOFF",
3148 write_midx_object_offsets(&objects, &mut large_offsets)?,
3149 ),
3150 ];
3151 if !large_offsets.is_empty() {
3152 chunks.push((*b"LOFF", large_offsets));
3153 }
3154 if let Some(preferred) = preferred_pack {
3155 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
3158 pseudo.sort_by_key(|&midx_pos| {
3159 let object = objects[midx_pos as usize];
3160 (
3161 object.pack_int_id != preferred,
3162 object.pack_int_id,
3163 object.offset,
3164 )
3165 });
3166 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
3167 for midx_pos in pseudo {
3168 ridx.extend_from_slice(&midx_pos.to_be_bytes());
3169 }
3170 chunks.push((*b"RIDX", ridx));
3171 }
3172 if let Some(bitmapped_packs) = bitmapped_packs {
3173 let mut btmp = Vec::with_capacity(bitmapped_packs.len() * 8);
3174 for pack in bitmapped_packs {
3175 btmp.extend_from_slice(&pack.bitmap_pos.to_be_bytes());
3176 btmp.extend_from_slice(&pack.bitmap_nr.to_be_bytes());
3177 }
3178 chunks.push((*b"BTMP", btmp));
3179 }
3180 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
3181 }
3182
3183 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3184 Self::parse_impl(bytes, format, true)
3185 }
3186
3187 pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3188 Self::parse_impl(bytes, format, false)
3189 }
3190
3191 fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
3192 let hash_len = format.raw_len();
3193 if bytes.len() < 12 + 12 + hash_len {
3194 return Err(GitError::InvalidFormat(
3195 "multi-pack-index file too short".into(),
3196 ));
3197 }
3198 if &bytes[..4] != b"MIDX" {
3199 return Err(GitError::InvalidFormat(
3200 "missing multi-pack-index signature".into(),
3201 ));
3202 }
3203 let version = bytes[4];
3204 if version != 1 && version != 2 {
3205 return Err(GitError::Unsupported(format!(
3206 "multi-pack-index version {version}"
3207 )));
3208 }
3209 let hash_id = bytes[5];
3210 if u32::from(hash_id) != hash_function_id(format) {
3211 return Err(GitError::InvalidFormat(format!(
3212 "multi-pack-index hash id {hash_id} does not match {}",
3213 format.name()
3214 )));
3215 }
3216 let chunk_count = bytes[6] as usize;
3217 let base_midx_count = bytes[7];
3218 if base_midx_count != 0 {
3219 return Err(GitError::Unsupported(format!(
3220 "multi-pack-index base count {base_midx_count}"
3221 )));
3222 }
3223 let pack_count = u32_be(&bytes[8..12]);
3224 let lookup_len = (chunk_count + 1)
3225 .checked_mul(12)
3226 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3227 let data_start = 12usize
3228 .checked_add(lookup_len)
3229 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3230 let checksum_offset = bytes.len() - hash_len;
3231 if data_start > checksum_offset {
3232 return Err(GitError::InvalidFormat(
3233 "truncated multi-pack-index chunk lookup".into(),
3234 ));
3235 }
3236
3237 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
3238 if verify_checksum {
3239 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
3240 if actual_checksum != checksum {
3241 return Err(GitError::InvalidFormat(format!(
3242 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
3243 )));
3244 }
3245 }
3246
3247 let mut entries = Vec::with_capacity(chunk_count + 1);
3248 let mut offset = 12usize;
3249 for _ in 0..=chunk_count {
3250 let id = [
3251 bytes[offset],
3252 bytes[offset + 1],
3253 bytes[offset + 2],
3254 bytes[offset + 3],
3255 ];
3256 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
3257 entries.push((id, chunk_offset));
3258 offset += 12;
3259 }
3260 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3261 return Err(GitError::InvalidFormat(
3262 "multi-pack-index chunk lookup is empty".into(),
3263 ));
3264 };
3265 if terminator_id != [0, 0, 0, 0] {
3266 return Err(GitError::InvalidFormat(
3267 "multi-pack-index chunk lookup missing terminator".into(),
3268 ));
3269 }
3270 if terminator_offset != checksum_offset as u64 {
3271 return Err(GitError::InvalidFormat(
3272 "multi-pack-index terminator does not point at checksum".into(),
3273 ));
3274 }
3275
3276 let mut chunks = Vec::with_capacity(chunk_count);
3277 let mut previous_offset = data_start as u64;
3278 let mut reported_unaligned = false;
3279 for pair in entries.windows(2) {
3280 let (id, chunk_offset) = pair[0];
3281 let (_next_id, next_offset) = pair[1];
3282 if id == [0, 0, 0, 0] {
3283 return Err(GitError::InvalidFormat(
3284 "multi-pack-index chunk id is zero before terminator".into(),
3285 ));
3286 }
3287 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3288 return Err(GitError::InvalidFormat(
3289 "multi-pack-index chunk offsets are not monotonic".into(),
3290 ));
3291 }
3292 if chunk_offset % 4 != 0 && !reported_unaligned {
3293 eprintln!(
3294 "error: chunk id {:08x} not 4-byte aligned",
3295 u32::from_be_bytes(id)
3296 );
3297 reported_unaligned = true;
3298 }
3299 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3300 return Err(GitError::InvalidFormat(
3301 "multi-pack-index chunk length is invalid".into(),
3302 ));
3303 }
3304 chunks.push(MultiPackIndexChunk {
3305 id,
3306 offset: chunk_offset,
3307 len: next_offset - chunk_offset,
3308 });
3309 previous_offset = chunk_offset;
3310 }
3311
3312 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
3313 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
3314 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
3315 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
3316 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
3317 let bitmapped_packs =
3318 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
3319
3320 Ok(Self {
3321 version,
3322 format,
3323 pack_count,
3324 pack_names,
3325 object_count: object_count as u32,
3326 fanout,
3327 objects,
3328 reverse_index,
3329 bitmapped_packs,
3330 chunks,
3331 checksum,
3332 })
3333 }
3334
3335 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
3336 self.objects
3337 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
3338 .ok()
3339 .map(|idx| &self.objects[idx])
3340 }
3341}
3342
3343impl MultiPackIndexOidLookup {
3344 pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
3345 let raw = bytes.as_bytes();
3346 let hash_len = format.raw_len();
3347 if raw.len() < 12 + 12 + hash_len {
3348 return Err(GitError::InvalidFormat(
3349 "multi-pack-index file too short".into(),
3350 ));
3351 }
3352 if &raw[..4] != b"MIDX" {
3353 return Err(GitError::InvalidFormat(
3354 "missing multi-pack-index signature".into(),
3355 ));
3356 }
3357 let version = raw[4];
3358 if version != 1 && version != 2 {
3359 return Err(GitError::Unsupported(format!(
3360 "multi-pack-index version {version}"
3361 )));
3362 }
3363 let hash_id = raw[5];
3364 if u32::from(hash_id) != hash_function_id(format) {
3365 return Err(GitError::InvalidFormat(format!(
3366 "multi-pack-index hash id {hash_id} does not match {}",
3367 format.name()
3368 )));
3369 }
3370 let chunk_count = raw[6] as usize;
3371 let base_midx_count = raw[7];
3372 if base_midx_count != 0 {
3373 return Err(GitError::Unsupported(format!(
3374 "multi-pack-index base count {base_midx_count}"
3375 )));
3376 }
3377 let pack_count = u32_be(&raw[8..12]);
3378 let lookup_len = (chunk_count + 1)
3379 .checked_mul(12)
3380 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3381 let data_start = 12usize
3382 .checked_add(lookup_len)
3383 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3384 let checksum_offset = raw.len() - hash_len;
3385 if data_start > checksum_offset {
3386 return Err(GitError::InvalidFormat(
3387 "truncated multi-pack-index chunk lookup".into(),
3388 ));
3389 }
3390
3391 let mut entries = Vec::with_capacity(chunk_count + 1);
3392 let mut offset = 12usize;
3393 for _ in 0..=chunk_count {
3394 let id = [
3395 raw[offset],
3396 raw[offset + 1],
3397 raw[offset + 2],
3398 raw[offset + 3],
3399 ];
3400 let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
3401 entries.push((id, chunk_offset));
3402 offset += 12;
3403 }
3404 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3405 return Err(GitError::InvalidFormat(
3406 "multi-pack-index chunk lookup is empty".into(),
3407 ));
3408 };
3409 if terminator_id != [0, 0, 0, 0] {
3410 return Err(GitError::InvalidFormat(
3411 "multi-pack-index chunk lookup missing terminator".into(),
3412 ));
3413 }
3414 if terminator_offset != checksum_offset as u64 {
3415 return Err(GitError::InvalidFormat(
3416 "multi-pack-index terminator does not point at checksum".into(),
3417 ));
3418 }
3419
3420 let mut chunks = Vec::with_capacity(chunk_count);
3421 let mut previous_offset = data_start as u64;
3422 let mut reported_unaligned = false;
3423 for pair in entries.windows(2) {
3424 let (id, chunk_offset) = pair[0];
3425 let (_next_id, next_offset) = pair[1];
3426 if id == [0, 0, 0, 0] {
3427 return Err(GitError::InvalidFormat(
3428 "multi-pack-index chunk id is zero before terminator".into(),
3429 ));
3430 }
3431 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3432 return Err(GitError::InvalidFormat(
3433 "multi-pack-index chunk offsets are not monotonic".into(),
3434 ));
3435 }
3436 if chunk_offset % 4 != 0 && !reported_unaligned {
3437 eprintln!(
3438 "error: chunk id {:08x} not 4-byte aligned",
3439 u32::from_be_bytes(id)
3440 );
3441 reported_unaligned = true;
3442 }
3443 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3444 return Err(GitError::InvalidFormat(
3445 "multi-pack-index chunk length is invalid".into(),
3446 ));
3447 }
3448 chunks.push(MultiPackIndexChunk {
3449 id,
3450 offset: chunk_offset,
3451 len: next_offset - chunk_offset,
3452 });
3453 previous_offset = chunk_offset;
3454 }
3455
3456 let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
3457 let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
3458 let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
3459 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
3460 let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
3461 GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
3462 })?;
3463 if oid_lookup.len() != expected_len {
3464 return Err(GitError::InvalidFormat(
3465 "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
3466 ));
3467 }
3468 let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
3469 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
3470 let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
3471 GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
3472 })?;
3473 if object_offsets.len() != expected_offsets_len {
3474 return Err(GitError::InvalidFormat(
3475 "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
3476 ));
3477 }
3478 let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
3479 if let Some(large_offsets) = large_offsets
3480 && large_offsets.len() % 8 != 0
3481 {
3482 return Err(GitError::InvalidFormat(
3483 "multi-pack-index LOFF chunk has invalid length".into(),
3484 ));
3485 }
3486 let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
3487 let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
3488 let (large_offsets_offset, large_offsets_len) = match large_offsets {
3489 Some(large_offsets) => (
3490 Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
3491 large_offsets.len(),
3492 ),
3493 None => (None, 0),
3494 };
3495 Ok(Self {
3496 format,
3497 pack_count,
3498 pack_names,
3499 fanout,
3500 object_count,
3501 oid_lookup_offset,
3502 object_offsets_offset,
3503 large_offsets_offset,
3504 large_offsets_len,
3505 bytes,
3506 })
3507 }
3508
3509 pub fn contains(&self, oid: &ObjectId) -> bool {
3510 self.find_position(oid).is_some()
3511 }
3512
3513 pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
3514 let Some(position) = self.find_position(oid) else {
3515 return Ok(None);
3516 };
3517 let bytes = self.bytes.as_bytes();
3518 let hash_len = self.format.raw_len();
3519 let oid_start = self
3520 .oid_lookup_offset
3521 .checked_add(position * hash_len)
3522 .ok_or_else(|| {
3523 GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
3524 })?;
3525 let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
3526 let offset_start = self
3527 .object_offsets_offset
3528 .checked_add(position * 8)
3529 .ok_or_else(|| {
3530 GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
3531 })?;
3532 let data = &bytes[offset_start..offset_start + 8];
3533 let pack_int_id = u32_be(&data[..4]);
3534 if pack_int_id >= self.pack_count {
3535 return Err(GitError::InvalidFormat(
3536 "multi-pack-index object points past pack table".into(),
3537 ));
3538 }
3539 let raw_offset = u32_be(&data[4..8]);
3540 let offset = if raw_offset & 0x8000_0000 == 0 {
3541 u64::from(raw_offset)
3542 } else {
3543 let Some(large_offsets_offset) = self.large_offsets_offset else {
3544 return Err(GitError::InvalidFormat(
3545 "multi-pack-index large offset missing LOFF chunk".into(),
3546 ));
3547 };
3548 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
3549 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
3550 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3551 })?;
3552 let large_end = large_start.checked_add(8).ok_or_else(|| {
3553 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3554 })?;
3555 if large_end > self.large_offsets_len {
3556 return Err(GitError::InvalidFormat(
3557 "fatal: multi-pack-index large offset out of bounds".into(),
3558 ));
3559 }
3560 let start = large_offsets_offset + large_start;
3561 u64_be(&bytes[start..start + 8])
3562 };
3563 Ok(Some(MultiPackIndexEntry {
3564 oid,
3565 pack_int_id,
3566 offset,
3567 force_large_offset: raw_offset & 0x8000_0000 != 0,
3568 }))
3569 }
3570
3571 pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
3572 self.pack_names
3573 .get(pack_int_id as usize)
3574 .map(String::as_str)
3575 }
3576
3577 fn find_position(&self, oid: &ObjectId) -> Option<usize> {
3578 if oid.format() != self.format || self.object_count == 0 {
3579 return None;
3580 }
3581 let first = oid.as_bytes()[0] as usize;
3582 let start = if first == 0 {
3583 0
3584 } else {
3585 self.fanout[first - 1] as usize
3586 };
3587 let end = self.fanout[first] as usize;
3588 if start >= end || end > self.object_count {
3589 return None;
3590 }
3591 let hash_len = self.format.raw_len();
3592 let table_start = self.oid_lookup_offset;
3593 let table_end = table_start + self.object_count * hash_len;
3594 let bytes = self.bytes.as_bytes();
3595 let table = &bytes[table_start..table_end];
3596 let needle = oid.as_bytes();
3597 let mut low = start;
3598 let mut high = end;
3599 while low < high {
3600 let mid = low + (high - low) / 2;
3601 let raw = &table[mid * hash_len..(mid + 1) * hash_len];
3602 match raw.cmp(needle) {
3603 std::cmp::Ordering::Less => low = mid + 1,
3604 std::cmp::Ordering::Equal => return Some(mid),
3605 std::cmp::Ordering::Greater => high = mid,
3606 }
3607 }
3608 None
3609 }
3610}
3611
3612fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
3613 for name in pack_names {
3614 if name.is_empty() {
3615 return Err(GitError::InvalidFormat(
3616 "multi-pack-index pack name is empty".into(),
3617 ));
3618 }
3619 if name
3620 .bytes()
3621 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
3622 {
3623 return Err(GitError::InvalidFormat(
3624 "multi-pack-index pack name contains an invalid byte".into(),
3625 ));
3626 }
3627 }
3628 Ok(())
3629}
3630
3631fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
3632 let mut out = Vec::new();
3633 for name in pack_names {
3634 out.extend_from_slice(name.as_bytes());
3635 out.push(0);
3636 }
3637 while out.len() % 4 != 0 {
3638 out.push(0);
3639 }
3640 out
3641}
3642
3643fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
3644 let mut counts = [0u32; 256];
3645 for object in objects {
3646 let first = object.oid.as_bytes()[0] as usize;
3647 counts[first] = counts[first]
3648 .checked_add(1)
3649 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3650 }
3651 let mut running = 0u32;
3652 let mut out = Vec::with_capacity(256 * 4);
3653 for count in counts {
3654 running = running
3655 .checked_add(count)
3656 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3657 out.extend_from_slice(&running.to_be_bytes());
3658 }
3659 Ok(out)
3660}
3661
3662fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
3663 let mut out = Vec::new();
3664 for object in objects {
3665 out.extend_from_slice(object.oid.as_bytes());
3666 }
3667 out
3668}
3669
3670fn write_midx_object_offsets(
3671 objects: &[&MultiPackIndexEntry],
3672 large_offsets: &mut Vec<u8>,
3673) -> Result<Vec<u8>> {
3674 let mut out = Vec::new();
3675 for object in objects {
3676 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
3677 if object.offset < 0x8000_0000 && !object.force_large_offset {
3678 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
3679 } else {
3680 let large_idx = large_offsets.len() / 8;
3681 if large_idx > 0x7fff_ffff {
3682 return Err(GitError::InvalidFormat(
3683 "too many multi-pack-index large offsets".into(),
3684 ));
3685 }
3686 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
3687 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
3688 }
3689 }
3690 Ok(out)
3691}
3692
3693fn write_multi_pack_index_chunks(
3694 format: ObjectFormat,
3695 version: u8,
3696 pack_count: u32,
3697 chunks: &[([u8; 4], Vec<u8>)],
3698) -> Result<Vec<u8>> {
3699 if chunks.len() > u8::MAX as usize {
3700 return Err(GitError::InvalidFormat(
3701 "too many multi-pack-index chunks".into(),
3702 ));
3703 }
3704 let lookup_len = (chunks.len() + 1)
3705 .checked_mul(12)
3706 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3707 let mut out = Vec::new();
3708 out.extend_from_slice(b"MIDX");
3709 out.push(version);
3710 out.push(hash_function_id(format) as u8);
3711 out.push(chunks.len() as u8);
3712 out.push(0);
3713 out.extend_from_slice(&pack_count.to_be_bytes());
3714 let mut chunk_offset = (12usize)
3715 .checked_add(lookup_len)
3716 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
3717 as u64;
3718 for (id, data) in chunks {
3719 out.extend_from_slice(id);
3720 out.extend_from_slice(&chunk_offset.to_be_bytes());
3721 chunk_offset = chunk_offset
3722 .checked_add(data.len() as u64)
3723 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
3724 }
3725 out.extend_from_slice(&[0, 0, 0, 0]);
3726 out.extend_from_slice(&chunk_offset.to_be_bytes());
3727 for (_id, data) in chunks {
3728 out.extend_from_slice(data);
3729 }
3730 let checksum = sley_core::digest_bytes(format, &out)?;
3731 out.extend_from_slice(checksum.as_bytes());
3732 Ok(out)
3733}
3734
3735#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3736struct EntryHeader {
3737 kind: PackObjectKind,
3738 size: u64,
3739}
3740
3741pub trait PackDeltaCache {
3755 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
3757 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
3759}
3760
3761struct NoopDeltaCache;
3764
3765impl PackDeltaCache for NoopDeltaCache {
3766 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
3767 None
3768 }
3769 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
3770}
3771
3772thread_local! {
3778 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
3779}
3780
3781const MAX_INFLATE_EXPANSION: usize = 1032;
3792
3793const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3799
3800fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3808 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3809 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3811}
3812
3813fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3822 INFLATE.with(|cell| {
3823 let mut decompress = cell.borrow_mut();
3824 decompress.reset(true);
3825 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3826 let mut input = compressed;
3827 let mut consumed_total = 0usize;
3828 loop {
3829 if out.len() == out.capacity() {
3832 out.reserve(out.len().max(64));
3833 }
3834 let before_in = decompress.total_in();
3835 let before_out = decompress.total_out();
3836 let status = decompress
3837 .decompress_vec(input, out, flate2::FlushDecompress::None)
3838 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3839 let consumed = (decompress.total_in() - before_in) as usize;
3840 let produced = decompress.total_out() - before_out;
3841 input = &input[consumed..];
3842 consumed_total += consumed;
3843 match status {
3844 flate2::Status::StreamEnd => return Ok(consumed_total),
3845 _ if consumed == 0 && produced == 0 => {
3846 return Err(GitError::InvalidObject("truncated zlib stream".into()));
3847 }
3848 _ => {}
3849 }
3850 }
3851 })
3852}
3853
3854fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3858 INFLATE.with(|cell| {
3859 let mut decompress = cell.borrow_mut();
3860 decompress.reset(true);
3861 out.reserve(max_out.max(16));
3862 let mut input = compressed;
3863 while out.len() < max_out {
3864 if out.len() == out.capacity() {
3865 out.reserve(out.len().max(16));
3866 }
3867 let before_in = decompress.total_in();
3868 let before_out = decompress.total_out();
3869 let status = decompress
3870 .decompress_vec(input, out, flate2::FlushDecompress::None)
3871 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3872 let consumed = (decompress.total_in() - before_in) as usize;
3873 let produced = decompress.total_out() - before_out;
3874 input = &input[consumed..];
3875 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3876 break;
3877 }
3878 }
3879 Ok(())
3880 })
3881}
3882
3883pub fn read_object_at_arc<F>(
3891 pack_bytes: &[u8],
3892 offset: u64,
3893 format: ObjectFormat,
3894 resolve_ref_base: F,
3895) -> Result<Arc<EncodedObject>>
3896where
3897 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3898{
3899 read_object_at_with_cache_arc(
3900 pack_bytes,
3901 offset,
3902 format,
3903 resolve_ref_base,
3904 &NoopDeltaCache,
3905 )
3906}
3907
3908pub fn read_object_at_with_cache_arc<F, C>(
3917 pack_bytes: &[u8],
3918 offset: u64,
3919 format: ObjectFormat,
3920 mut resolve_ref_base: F,
3921 cache: &C,
3922) -> Result<Arc<EncodedObject>>
3923where
3924 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3925 C: PackDeltaCache + ?Sized,
3926{
3927 read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3928}
3929
3930fn read_object_at_inner<F, C>(
3931 pack_bytes: &[u8],
3932 offset: u64,
3933 format: ObjectFormat,
3934 resolve_ref_base: &mut F,
3935 cache: &C,
3936) -> Result<Arc<EncodedObject>>
3937where
3938 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3939 C: PackDeltaCache + ?Sized,
3940{
3941 if let Some(object) = cache.get(offset) {
3944 return Ok(object);
3945 }
3946 let trailer_offset = pack_bytes
3947 .len()
3948 .checked_sub(format.raw_len())
3949 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3950 let mut cursor = usize::try_from(offset)
3951 .ok()
3952 .filter(|&value| value < trailer_offset)
3953 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3954 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3955 let base = match header.kind {
3956 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3957 pack_bytes,
3958 &mut cursor,
3959 offset,
3960 )?)),
3961 PackObjectKind::RefDelta => {
3962 let hash_len = format.raw_len();
3963 if cursor + hash_len > trailer_offset {
3964 return Err(GitError::InvalidFormat(
3965 "truncated ref-delta base object id".into(),
3966 ));
3967 }
3968 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3969 cursor += hash_len;
3970 Some(DeltaBase::Ref(oid))
3971 }
3972 _ => None,
3973 };
3974 let mut body = Vec::new();
3975 inflate_into(
3976 &pack_bytes[cursor..trailer_offset],
3977 &mut body,
3978 header.size.min(usize::MAX as u64) as usize,
3979 )?;
3980 if body.len() as u64 != header.size {
3981 return Err(GitError::InvalidObject(format!(
3982 "pack object declared {} bytes, decoded {}",
3983 header.size,
3984 body.len()
3985 )));
3986 }
3987 let object = match base {
3988 None => {
3989 let object_type = match header.kind {
3990 PackObjectKind::Commit => ObjectType::Commit,
3991 PackObjectKind::Tree => ObjectType::Tree,
3992 PackObjectKind::Blob => ObjectType::Blob,
3993 PackObjectKind::Tag => ObjectType::Tag,
3994 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3995 return Err(GitError::InvalidFormat(
3996 "delta pack entry decoded without a base".into(),
3997 ));
3998 }
3999 };
4000 Arc::new(EncodedObject::new(object_type, body))
4001 }
4002 Some(DeltaBase::Offset(base_offset)) => {
4003 let base =
4004 read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
4005 let resolved = apply_pack_delta(&base.body, &body)?;
4006 Arc::new(EncodedObject::new(base.object_type, resolved))
4007 }
4008 Some(DeltaBase::Ref(base_oid)) => {
4009 let base = resolve_ref_base(&base_oid)?
4010 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
4011 let resolved = apply_pack_delta(&base.body, &body)?;
4012 Arc::new(EncodedObject::new(base.object_type, resolved))
4013 }
4014 };
4015 cache.insert(offset, Arc::clone(&object));
4019 Ok(object)
4020}
4021
4022pub fn read_object_header_at<F>(
4032 pack_bytes: &[u8],
4033 offset: u64,
4034 format: ObjectFormat,
4035 mut resolve_ref_base_type: F,
4036) -> Result<(ObjectType, u64)>
4037where
4038 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4039{
4040 read_object_header_at_inner(
4041 pack_bytes,
4042 offset,
4043 format,
4044 &mut resolve_ref_base_type,
4045 &mut NoopHeaderTypeCache,
4046 )
4047}
4048
4049pub trait HeaderTypeCache {
4066 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
4068 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
4070}
4071
4072struct NoopHeaderTypeCache;
4073
4074impl HeaderTypeCache for NoopHeaderTypeCache {
4075 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
4076 None
4077 }
4078 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
4079}
4080
4081pub fn read_object_header_at_with_cache<F, C>(
4087 pack_bytes: &[u8],
4088 offset: u64,
4089 format: ObjectFormat,
4090 mut resolve_ref_base_type: F,
4091 type_cache: &mut C,
4092) -> Result<(ObjectType, u64)>
4093where
4094 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4095 C: HeaderTypeCache + ?Sized,
4096{
4097 if let Some(header) = type_cache.get(offset) {
4098 return Ok(header);
4099 }
4100 read_object_header_at_inner(
4101 pack_bytes,
4102 offset,
4103 format,
4104 &mut resolve_ref_base_type,
4105 type_cache,
4106 )
4107}
4108
4109fn read_object_header_at_inner<F, C>(
4110 pack_bytes: &[u8],
4111 offset: u64,
4112 format: ObjectFormat,
4113 resolve_ref_base_type: &mut F,
4114 type_cache: &mut C,
4115) -> Result<(ObjectType, u64)>
4116where
4117 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4118 C: HeaderTypeCache + ?Sized,
4119{
4120 let trailer_offset = pack_bytes
4121 .len()
4122 .checked_sub(format.raw_len())
4123 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
4124 let mut cursor = usize::try_from(offset)
4125 .ok()
4126 .filter(|&value| value < trailer_offset)
4127 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
4128 let header = parse_entry_header(pack_bytes, &mut cursor)?;
4129 let resolved = match header.kind {
4130 PackObjectKind::Commit => (ObjectType::Commit, header.size),
4131 PackObjectKind::Tree => (ObjectType::Tree, header.size),
4132 PackObjectKind::Blob => (ObjectType::Blob, header.size),
4133 PackObjectKind::Tag => (ObjectType::Tag, header.size),
4134 PackObjectKind::OfsDelta => {
4135 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
4136 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4137 let base_type = match type_cache.get(base_offset) {
4140 Some((base_type, _)) => base_type,
4141 None => {
4142 let (base_type, _) = read_object_header_at_inner(
4143 pack_bytes,
4144 base_offset,
4145 format,
4146 resolve_ref_base_type,
4147 type_cache,
4148 )?;
4149 base_type
4150 }
4151 };
4152 (base_type, size)
4153 }
4154 PackObjectKind::RefDelta => {
4155 let hash_len = format.raw_len();
4156 if cursor + hash_len > trailer_offset {
4157 return Err(GitError::InvalidFormat(
4158 "truncated ref-delta base object id".into(),
4159 ));
4160 }
4161 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
4162 cursor += hash_len;
4163 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4164 let base_type = resolve_ref_base_type(&oid)?
4165 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
4166 (base_type, size)
4167 }
4168 };
4169 type_cache.put(offset, resolved);
4172 Ok(resolved)
4173}
4174
4175const DELTA_HEADER_PREFIX_LEN: usize = 32;
4179
4180fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
4183 let mut prefix = Vec::new();
4184 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
4185 decoded_delta_result_size(&prefix)
4186}
4187
4188fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
4189 let first = next_byte(bytes, offset)?;
4190 let mut size = u64::from(first & 0x0f);
4191 let kind = match (first >> 4) & 0x07 {
4192 1 => PackObjectKind::Commit,
4193 2 => PackObjectKind::Tree,
4194 3 => PackObjectKind::Blob,
4195 4 => PackObjectKind::Tag,
4196 6 => PackObjectKind::OfsDelta,
4197 7 => PackObjectKind::RefDelta,
4198 other => {
4199 return Err(GitError::InvalidFormat(format!(
4200 "invalid pack object type {other}"
4201 )));
4202 }
4203 };
4204 let mut shift = 4;
4205 let mut byte = first;
4206 while byte & 0x80 != 0 {
4207 byte = next_byte(bytes, offset)?;
4208 let part = u64::from(byte & 0x7f);
4209 size = size
4210 .checked_add(
4211 part.checked_shl(shift)
4212 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
4213 )
4214 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
4215 shift += 7;
4216 }
4217 Ok(EntryHeader { kind, size })
4218}
4219
4220fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
4221 let mut byte = next_byte(bytes, offset)?;
4222 let mut relative = u64::from(byte & 0x7f);
4223 while byte & 0x80 != 0 {
4224 byte = next_byte(bytes, offset)?;
4225 relative = relative
4226 .checked_add(1)
4227 .and_then(|value| value.checked_shl(7))
4228 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
4229 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
4230 }
4231 entry_offset
4232 .checked_sub(relative)
4233 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
4234}
4235
4236fn resolve_pack_entries<F>(
4237 parsed: Vec<ParsedPackEntry>,
4238 format: ObjectFormat,
4239 external_base: &mut F,
4240) -> Result<Vec<PackObject>>
4241where
4242 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4243{
4244 let mut offset_to_index = HashMap::with_capacity(parsed.len());
4245 for (idx, entry) in parsed.iter().enumerate() {
4246 offset_to_index.insert(parsed_entry_offset(entry), idx);
4247 }
4248
4249 let mut resolved = vec![None; parsed.len()];
4250 let mut oid_to_index = HashMap::new();
4251 let mut unresolved = 0usize;
4252 for (idx, entry) in parsed.iter().enumerate() {
4253 match entry {
4254 ParsedPackEntry::Resolved(object) => {
4255 oid_to_index.insert(object.entry.oid, idx);
4256 resolved[idx] = Some(object.clone());
4257 }
4258 ParsedPackEntry::Delta { .. } => unresolved += 1,
4259 }
4260 }
4261
4262 while unresolved != 0 {
4263 let mut progress = false;
4264 for idx in 0..parsed.len() {
4265 if resolved[idx].is_some() {
4266 continue;
4267 }
4268 let ParsedPackEntry::Delta {
4269 base,
4270 compressed_size,
4271 delta_size,
4272 offset,
4273 delta,
4274 } = &parsed[idx]
4275 else {
4276 continue;
4277 };
4278 let Some(base_object) = delta_base_object(
4279 base,
4280 &offset_to_index,
4281 &oid_to_index,
4282 &resolved,
4283 external_base,
4284 )?
4285 else {
4286 continue;
4287 };
4288 let body = apply_pack_delta(base_object.body(), delta)?;
4289 let object = EncodedObject::new(base_object.object_type(), body);
4290 let oid = object.object_id(format)?;
4291 let pack_object = PackObject {
4292 entry: PackEntry {
4293 oid,
4294 compressed_size: *compressed_size,
4295 uncompressed_size: object.body.len() as u64,
4296 offset: *offset,
4297 },
4298 object,
4299 };
4300 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
4301 return Err(GitError::InvalidObject(
4302 "resolved delta size does not match delta header".into(),
4303 ));
4304 }
4305 if *delta_size != delta.len() as u64 {
4306 return Err(GitError::InvalidObject(format!(
4307 "pack delta declared {delta_size} bytes, decoded {}",
4308 delta.len()
4309 )));
4310 }
4311 oid_to_index.insert(oid, idx);
4312 resolved[idx] = Some(pack_object);
4313 unresolved -= 1;
4314 progress = true;
4315 }
4316 if !progress {
4317 return Err(GitError::Unsupported("unresolved delta base".into()));
4318 }
4319 }
4320
4321 resolved
4322 .into_iter()
4323 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
4324 .collect()
4325}
4326
4327fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
4328 match entry {
4329 ParsedPackEntry::Resolved(object) => object.entry.offset,
4330 ParsedPackEntry::Delta { offset, .. } => *offset,
4331 }
4332}
4333
4334enum DeltaBaseObject<'a> {
4335 Borrowed(&'a EncodedObject),
4336 Owned(EncodedObject),
4337}
4338
4339impl DeltaBaseObject<'_> {
4340 fn object_type(&self) -> ObjectType {
4341 match self {
4342 Self::Borrowed(object) => object.object_type,
4343 Self::Owned(object) => object.object_type,
4344 }
4345 }
4346
4347 fn body(&self) -> &[u8] {
4348 match self {
4349 Self::Borrowed(object) => &object.body,
4350 Self::Owned(object) => &object.body,
4351 }
4352 }
4353}
4354
4355fn delta_base_object<'a, F>(
4356 base: &DeltaBase,
4357 offset_to_index: &HashMap<u64, usize>,
4358 oid_to_index: &HashMap<ObjectId, usize>,
4359 resolved: &'a [Option<PackObject>],
4360 external_base: &mut F,
4361) -> Result<Option<DeltaBaseObject<'a>>>
4362where
4363 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4364{
4365 match base {
4366 DeltaBase::Offset(offset) => {
4367 let Some(index) = offset_to_index.get(offset).copied() else {
4368 return Err(GitError::InvalidFormat(format!(
4369 "ofs-delta base offset {offset} not found"
4370 )));
4371 };
4372 Ok(resolved[index]
4373 .as_ref()
4374 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
4375 }
4376 DeltaBase::Ref(oid) => {
4377 if let Some(index) = oid_to_index.get(oid).copied() {
4378 return Ok(resolved[index]
4379 .as_ref()
4380 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
4381 }
4382 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
4383 }
4384 }
4385}
4386
4387fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
4388 let mut cursor = 0usize;
4389 let base_size = read_delta_varint(delta, &mut cursor)?;
4390 if base_size != base.len() as u64 {
4391 return Err(GitError::InvalidObject(format!(
4392 "delta base size mismatch: expected {base_size}, got {}",
4393 base.len()
4394 )));
4395 }
4396 let result_size = read_delta_varint(delta, &mut cursor)?;
4397 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
4406 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
4407 while cursor < delta.len() {
4408 let command = delta[cursor];
4409 cursor += 1;
4410 if command & 0x80 != 0 {
4411 let copy_offset =
4412 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
4413 let mut copy_size =
4414 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
4415 if copy_size == 0 {
4416 copy_size = 0x10000;
4417 }
4418 let start = usize::try_from(copy_offset)
4419 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
4420 let len = usize::try_from(copy_size)
4421 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
4422 let end = start
4423 .checked_add(len)
4424 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
4425 let Some(slice) = base.get(start..end) else {
4426 return Err(GitError::InvalidObject(
4427 "delta copy range exceeds base object".into(),
4428 ));
4429 };
4430 result.extend_from_slice(slice);
4431 } else if command != 0 {
4432 let len = usize::from(command);
4433 let end = cursor
4434 .checked_add(len)
4435 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
4436 let Some(slice) = delta.get(cursor..end) else {
4437 return Err(GitError::InvalidObject(
4438 "delta insert range exceeds delta data".into(),
4439 ));
4440 };
4441 result.extend_from_slice(slice);
4442 cursor = end;
4443 } else {
4444 return Err(GitError::InvalidObject(
4445 "delta contains reserved zero command".into(),
4446 ));
4447 }
4448 }
4449 if result.len() as u64 != result_size {
4450 return Err(GitError::InvalidObject(format!(
4451 "delta result size mismatch: expected {result_size}, got {}",
4452 result.len()
4453 )));
4454 }
4455 Ok(result)
4456}
4457
4458fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
4459 let mut cursor = 0usize;
4460 let _ = read_delta_varint(delta, &mut cursor)?;
4461 read_delta_varint(delta, &mut cursor)
4462}
4463
4464const DELTA_BLOCK_SIZE: usize = 16;
4467
4468const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
4473
4474const DELTA_BUCKET_BITS: usize = 12;
4477const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
4478const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
4479
4480struct DeltaIndex<'a> {
4487 base: &'a [u8],
4488 blocks: Vec<DeltaBlock>,
4489 buckets: Vec<usize>,
4490}
4491
4492#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4493struct DeltaBlock {
4494 hash: u32,
4495 offset: usize,
4496}
4497
4498impl<'a> DeltaIndex<'a> {
4499 fn new(base: &'a [u8]) -> Self {
4500 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
4501 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
4502 for_each_delta_anchor(base.len(), |offset| {
4503 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
4504 buckets[delta_bucket(hash) + 1] += 1;
4505 anchors.push(DeltaBlock { hash, offset });
4506 });
4507 for idx in 1..buckets.len() {
4508 buckets[idx] += buckets[idx - 1];
4509 }
4510
4511 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
4512 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
4513 for anchor in anchors {
4514 let bucket = delta_bucket(anchor.hash);
4515 let next = &mut next_offsets[bucket];
4516 blocks[*next] = anchor;
4517 *next += 1;
4518 }
4519
4520 Self {
4521 base,
4522 blocks,
4523 buckets,
4524 }
4525 }
4526
4527 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
4528 let bucket = delta_bucket(hash);
4529 let start = self.buckets[bucket];
4530 let end = self.buckets[bucket + 1];
4531 self.blocks[start..end]
4532 .iter()
4533 .filter(move |block| block.hash == hash)
4534 }
4535
4536 fn has_hash(&self, hash: u32) -> bool {
4537 self.candidate_blocks(hash).next().is_some()
4538 }
4539
4540 fn has_shared_anchor(&self, target: &[u8]) -> bool {
4541 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
4542 return false;
4543 }
4544 let last = target.len() - DELTA_BLOCK_SIZE;
4545 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
4546 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
4547 if self.has_hash(hash) {
4548 return true;
4549 }
4550 }
4551 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
4552 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
4553 if self.has_hash(hash) {
4554 return true;
4555 }
4556 }
4557 false
4558 }
4559
4560 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
4562 if !self.has_shared_anchor(target) {
4563 return None;
4564 }
4565 let base = self.base;
4566 let mut delta = Vec::new();
4567 write_delta_varint(&mut delta, base.len() as u64);
4568 write_delta_varint(&mut delta, target.len() as u64);
4569
4570 let mut pending_insert_start = 0usize;
4571 let mut pos = 0usize;
4572 while pos < target.len() {
4573 let mut best_len = 0usize;
4574 let mut best_offset = 0usize;
4575 if pos + DELTA_BLOCK_SIZE <= target.len() {
4576 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
4577 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
4578 let candidate = candidate.offset;
4581 let max_len = (base.len() - candidate).min(target.len() - pos);
4582 let mut len = 0usize;
4583 while len < max_len && base[candidate + len] == target[pos + len] {
4584 len += 1;
4585 }
4586 if len > best_len {
4587 best_len = len;
4588 best_offset = candidate;
4589 }
4590 }
4591 }
4592
4593 if best_len >= DELTA_BLOCK_SIZE {
4594 if pending_insert_start < pos {
4595 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
4596 }
4597 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
4598 pos += best_len;
4599 pending_insert_start = pos;
4600 } else {
4601 pos += 1;
4602 }
4603 }
4604 if pending_insert_start < target.len() {
4605 write_delta_insert(&mut delta, &target[pending_insert_start..]);
4606 }
4607 Some(delta)
4608 }
4609}
4610
4611fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
4612 if len < DELTA_BLOCK_SIZE {
4613 return;
4614 }
4615 len -= DELTA_BLOCK_SIZE;
4616 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
4617 visit(offset);
4618 }
4619 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
4620 visit(len);
4621 }
4622}
4623
4624fn delta_anchor_count(len: usize) -> usize {
4625 if len < DELTA_BLOCK_SIZE {
4626 return 0;
4627 }
4628 let last = len - DELTA_BLOCK_SIZE;
4629 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
4630}
4631
4632fn delta_bucket(hash: u32) -> usize {
4633 (hash as usize) & DELTA_BUCKET_MASK
4634}
4635
4636const DELTA_MAX_CHAIN: usize = 64;
4639
4640fn block_hash(block: &[u8]) -> u32 {
4646 let mut hash = 0u32;
4647 for &byte in block {
4648 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
4649 }
4650 hash
4651}
4652
4653#[derive(Debug, Clone, PartialEq, Eq)]
4655enum PlannedBase {
4656 None,
4658 InPack { base_idx: usize, delta: Vec<u8> },
4662 External { base_oid: ObjectId, delta: Vec<u8> },
4664}
4665
4666#[derive(Debug, Clone, PartialEq, Eq)]
4667struct PlannedEntry {
4668 base: PlannedBase,
4669}
4670
4671fn compress_planned_payloads(
4672 objects: &[&EncodedObject],
4673 plan: &[PlannedEntry],
4674 order: &[usize],
4675 compression_level: u32,
4676) -> Result<Vec<Vec<u8>>> {
4677 if order.is_empty() {
4678 return Ok(Vec::new());
4679 }
4680
4681 let worker_count = std::thread::available_parallelism()
4682 .map(|threads| threads.get())
4683 .unwrap_or(1)
4684 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
4685 .min(order.len());
4686 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
4687 let mut payloads = Vec::with_capacity(order.len());
4688 for &idx in order {
4689 payloads.push(compressed_payload(
4690 planned_payload(objects, plan, idx),
4691 compression_level,
4692 )?);
4693 }
4694 return Ok(payloads);
4695 }
4696
4697 let chunk_len = order.len().div_ceil(worker_count);
4698 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
4699 std::thread::scope(|scope| {
4700 let mut handles = Vec::new();
4701 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
4702 let chunk_start = chunk_idx * chunk_len;
4703 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
4704 let mut chunk_payloads = Vec::with_capacity(chunk.len());
4705 for (offset, &idx) in chunk.iter().enumerate() {
4706 chunk_payloads.push((
4707 chunk_start + offset,
4708 compressed_payload(planned_payload(objects, plan, idx), compression_level)?,
4709 ));
4710 }
4711 Ok(chunk_payloads)
4712 }));
4713 }
4714
4715 let mut first_error = None;
4716 for handle in handles {
4717 match handle.join() {
4718 Ok(Ok(chunk_payloads)) => {
4719 if first_error.is_none() {
4720 for (pos, payload) in chunk_payloads {
4721 payloads[pos] = payload;
4722 }
4723 }
4724 }
4725 Ok(Err(err)) => {
4726 first_error.get_or_insert(err);
4727 }
4728 Err(_) => {
4729 first_error.get_or_insert_with(|| {
4730 GitError::InvalidObject("pack compression worker panicked".into())
4731 });
4732 }
4733 }
4734 }
4735
4736 match first_error {
4737 Some(err) => Err(err),
4738 None => Ok(()),
4739 }
4740 })?;
4741 Ok(payloads)
4742}
4743
4744fn planned_payload<'a>(
4745 objects: &'a [&'a EncodedObject],
4746 plan: &'a [PlannedEntry],
4747 idx: usize,
4748) -> &'a [u8] {
4749 match &plan[idx].base {
4750 PlannedBase::None => &objects[idx].body,
4751 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
4752 }
4753}
4754
4755fn compressed_payload(body: &[u8], compression_level: u32) -> Result<Vec<u8>> {
4756 let mut out = Vec::new();
4757 write_compressed_payload(&mut out, body, compression_level)?;
4758 Ok(out)
4759}
4760
4761const DELTA_MAX_EXTERNAL_BASES: usize = 64;
4764
4765struct DeltaWindowEntry<'a> {
4766 idx: usize,
4767 index: DeltaIndex<'a>,
4768}
4769
4770fn delta_type_rank(object_type: ObjectType) -> u8 {
4773 match object_type {
4774 ObjectType::Commit => 0,
4775 ObjectType::Tree => 1,
4776 ObjectType::Blob => 2,
4777 ObjectType::Tag => 3,
4778 }
4779}
4780
4781fn plan_pack_deltas(
4811 objects: &[&EncodedObject],
4812 object_ids: &[ObjectId],
4813 options: &PackWriteOptions,
4814) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4815 let count = objects.len();
4816 let mut plan: Vec<PlannedEntry> = (0..count)
4817 .map(|_| PlannedEntry {
4818 base: PlannedBase::None,
4819 })
4820 .collect();
4821
4822 let mut order: Vec<usize> = (0..count).collect();
4826 if options.reorder && options.depth > 0 {
4827 order.sort_by(|&left, &right| {
4828 delta_type_rank(objects[left].object_type)
4829 .cmp(&delta_type_rank(objects[right].object_type))
4830 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4831 .then_with(|| {
4832 object_ids[left]
4833 .as_bytes()
4834 .cmp(object_ids[right].as_bytes())
4835 })
4836 });
4837 }
4838
4839 if options.depth == 0 {
4840 return Ok((plan, order));
4841 }
4842
4843 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4846 Vec::with_capacity(options.thin_bases.len());
4847 for (oid, object) in &options.thin_bases {
4848 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4849 }
4850
4851 let mut depth = vec![0usize; count];
4854 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4856 std::collections::VecDeque::new();
4857
4858 for &idx in &order {
4859 let target = &objects[idx].body;
4860 let target_type = objects[idx].object_type;
4861
4862 let mut best_delta: Option<Vec<u8>> = None;
4863 let mut best_base = PlannedBase::None;
4864
4865 for base_entry in window.iter().rev() {
4867 let base_idx = base_entry.idx;
4868 if objects[base_idx].object_type != target_type {
4869 continue;
4870 }
4871 if depth[base_idx] + 1 > options.depth {
4874 continue;
4875 }
4876 let Some(delta) = base_entry.index.delta(target) else {
4877 continue;
4878 };
4879 if !delta_is_acceptable(&delta, target.len()) {
4880 continue;
4881 }
4882 if best_delta
4883 .as_ref()
4884 .is_none_or(|current| delta.len() < current.len())
4885 {
4886 best_delta = Some(delta);
4887 best_base = PlannedBase::InPack {
4888 base_idx,
4889 delta: Vec::new(),
4890 };
4891 }
4892 }
4893
4894 for (base_oid, base_type, base_index) in
4897 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4898 {
4899 if *base_type != target_type {
4900 continue;
4901 }
4902 let Some(delta) = base_index.delta(target) else {
4903 continue;
4904 };
4905 if !delta_is_acceptable(&delta, target.len()) {
4906 continue;
4907 }
4908 if best_delta
4909 .as_ref()
4910 .is_none_or(|current| delta.len() < current.len())
4911 {
4912 best_delta = Some(delta);
4913 best_base = PlannedBase::External {
4914 base_oid: *base_oid,
4915 delta: Vec::new(),
4916 };
4917 }
4918 }
4919
4920 if let Some(delta) = best_delta {
4921 match best_base {
4922 PlannedBase::InPack { base_idx, .. } => {
4923 depth[idx] = depth[base_idx] + 1;
4924 plan[idx].base = PlannedBase::InPack { base_idx, delta };
4925 }
4926 PlannedBase::External { base_oid, .. } => {
4927 depth[idx] = 1;
4928 plan[idx].base = PlannedBase::External { base_oid, delta };
4929 }
4930 PlannedBase::None => {}
4931 }
4932 }
4933
4934 window.push_back(DeltaWindowEntry {
4936 idx,
4937 index: DeltaIndex::new(&objects[idx].body),
4938 });
4939 while window.len() > options.window {
4940 window.pop_front();
4941 }
4942 }
4943
4944 Ok((plan, order))
4945}
4946
4947fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4952 !delta.is_empty() && delta.len() < target_len
4953}
4954
4955fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4956 loop {
4957 let mut byte = (value as u8) & 0x7f;
4958 value >>= 7;
4959 if value != 0 {
4960 byte |= 0x80;
4961 }
4962 out.push(byte);
4963 if value == 0 {
4964 break;
4965 }
4966 }
4967}
4968
4969fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4970 while size != 0 {
4971 let chunk = size.min(0x10000);
4972 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4973 let mut command = 0x80u8;
4974 let mut payload = [0u8; 7];
4975 let mut payload_len = 0usize;
4976 for idx in 0..4 {
4977 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4978 if byte != 0 {
4979 command |= 1 << idx;
4980 payload[payload_len] = byte;
4981 payload_len += 1;
4982 }
4983 }
4984 for idx in 0..3 {
4985 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4986 if byte != 0 {
4987 command |= 0x10 << idx;
4988 payload[payload_len] = byte;
4989 payload_len += 1;
4990 }
4991 }
4992 out.push(command);
4993 out.extend_from_slice(&payload[..payload_len]);
4994 offset += chunk;
4995 size -= chunk;
4996 }
4997}
4998
4999fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
5000 while !bytes.is_empty() {
5001 let chunk_len = bytes.len().min(0x7f);
5002 out.push(chunk_len as u8);
5003 out.extend_from_slice(&bytes[..chunk_len]);
5004 bytes = &bytes[chunk_len..];
5005 }
5006}
5007
5008fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
5009 let mut value = 0u64;
5010 let mut shift = 0u32;
5011 loop {
5012 let Some(byte) = delta.get(*cursor).copied() else {
5013 return Err(GitError::InvalidObject("truncated delta size".into()));
5014 };
5015 *cursor += 1;
5016 value = value
5017 .checked_add(
5018 u64::from(byte & 0x7f)
5019 .checked_shl(shift)
5020 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
5021 )
5022 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5023 if byte & 0x80 == 0 {
5024 return Ok(value);
5025 }
5026 shift = shift
5027 .checked_add(7)
5028 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5029 }
5030}
5031
5032fn read_delta_copy_value(
5033 delta: &[u8],
5034 cursor: &mut usize,
5035 command: u8,
5036 masks: &[u8],
5037) -> Result<u64> {
5038 let mut value = 0u64;
5039 for (shift, mask) in masks.iter().enumerate() {
5040 if command & mask != 0 {
5041 let Some(byte) = delta.get(*cursor).copied() else {
5042 return Err(GitError::InvalidObject(
5043 "truncated delta copy command".into(),
5044 ));
5045 };
5046 *cursor += 1;
5047 value |= u64::from(byte) << (shift * 8);
5048 }
5049 }
5050 Ok(value)
5051}
5052
5053fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8], compression_level: u32) -> Result<()> {
5054 let mut compressor = Compress::new(Compression::new(compression_level.min(9)), true);
5055 out.reserve(zlib_compress_bound(body.len()));
5056 let status = compressor
5057 .compress_vec(body, out, FlushCompress::Finish)
5058 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
5059 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
5060 return Err(GitError::InvalidObject(
5061 "zlib compression did not finish pack entry".into(),
5062 ));
5063 }
5064 Ok(())
5065}
5066
5067fn zlib_compress_bound(len: usize) -> usize {
5068 len.saturating_add(len >> 12)
5069 .saturating_add(len >> 14)
5070 .saturating_add(len >> 25)
5071 .saturating_add(13)
5072}
5073
5074fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
5075 let type_code = match object_type {
5076 ObjectType::Commit => 1,
5077 ObjectType::Tree => 2,
5078 ObjectType::Blob => 3,
5079 ObjectType::Tag => 4,
5080 };
5081 write_pack_entry_header_kind(out, type_code, size);
5082}
5083
5084fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
5085 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
5086 size >>= 4;
5087 if size != 0 {
5088 byte |= 0x80;
5089 }
5090 out.push(byte);
5091 while size != 0 {
5092 let mut byte = (size as u8) & 0x7f;
5093 size >>= 7;
5094 if size != 0 {
5095 byte |= 0x80;
5096 }
5097 out.push(byte);
5098 }
5099}
5100
5101fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
5102 if relative == 0 {
5103 return Err(GitError::InvalidFormat(
5104 "ofs-delta relative offset cannot be zero".into(),
5105 ));
5106 }
5107 let mut value = relative;
5108 let mut bytes = vec![(value & 0x7f) as u8];
5109 value >>= 7;
5110 while value != 0 {
5111 value -= 1;
5112 bytes.push(((value & 0x7f) as u8) | 0x80);
5113 value >>= 7;
5114 }
5115 bytes.reverse();
5116 out.extend_from_slice(&bytes);
5117 Ok(())
5118}
5119
5120fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
5121 let Some(byte) = bytes.get(*offset).copied() else {
5122 return Err(GitError::InvalidFormat(
5123 "truncated pack entry header".into(),
5124 ));
5125 };
5126 *offset += 1;
5127 Ok(byte)
5128}
5129
5130fn u16_be(bytes: &[u8]) -> u16 {
5131 u16::from_be_bytes([bytes[0], bytes[1]])
5132}
5133
5134fn u32_be(bytes: &[u8]) -> u32 {
5135 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
5136}
5137
5138fn u64_be(bytes: &[u8]) -> u64 {
5139 u64::from_be_bytes([
5140 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
5141 ])
5142}
5143
5144fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
5145 let mut fanout = [0u32; 256];
5146 let mut previous = 0u32;
5147 for slot in &mut fanout {
5148 *slot = u32_be(&bytes[*offset..*offset + 4]);
5149 if *slot < previous {
5150 return Err(GitError::InvalidFormat(
5151 "pack index fanout is not monotonic".into(),
5152 ));
5153 }
5154 previous = *slot;
5155 *offset += 4;
5156 }
5157 Ok(fanout)
5158}
5159
5160fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
5161 let expected_min = if oid_bytes[0] == 0 {
5162 0
5163 } else {
5164 fanout[usize::from(oid_bytes[0] - 1)]
5165 };
5166 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
5167 return Err(GitError::InvalidFormat(
5168 "pack index object id is outside its fanout bucket".into(),
5169 ));
5170 }
5171 Ok(())
5172}
5173
5174fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
5175 if raw_offset & 0x8000_0000 == 0 {
5176 return Ok(u64::from(raw_offset));
5177 }
5178 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
5179 let large_start = large_idx
5180 .checked_mul(8)
5181 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5182 let large_end = large_start
5183 .checked_add(8)
5184 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5185 if large_end > large_offset_table.len() {
5186 return Err(GitError::InvalidFormat(
5187 "pack index large offset points past table".into(),
5188 ));
5189 }
5190 Ok(u64_be(&large_offset_table[large_start..large_end]))
5191}
5192
5193fn checked_range(
5194 start: usize,
5195 count: usize,
5196 width: usize,
5197 total: usize,
5198) -> Result<std::ops::Range<usize>> {
5199 let len = count
5200 .checked_mul(width)
5201 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
5202 let end = start
5203 .checked_add(len)
5204 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
5205 if end > total {
5206 return Err(GitError::InvalidFormat("truncated pack index table".into()));
5207 }
5208 Ok(start..end)
5209}
5210
5211fn validate_position_permutation(positions: &[u32]) -> Result<()> {
5212 let mut seen = vec![false; positions.len()];
5213 for position in positions {
5214 let idx = *position as usize;
5215 if idx >= positions.len() {
5216 return Err(GitError::InvalidFormat(
5217 "reverse index position points past object table".into(),
5218 ));
5219 }
5220 if seen[idx] {
5221 return Err(GitError::InvalidFormat(
5222 "reverse index position is duplicated".into(),
5223 ));
5224 }
5225 seen[idx] = true;
5226 }
5227 Ok(())
5228}
5229
5230fn parse_midx_pack_names(
5231 bytes: &[u8],
5232 chunks: &[MultiPackIndexChunk],
5233 pack_count: usize,
5234 version: u8,
5235) -> Result<Vec<String>> {
5236 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
5237 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
5238 let mut names = Vec::with_capacity(pack_count);
5239 let mut offset = 0usize;
5240 while names.len() < pack_count {
5241 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
5242 return Err(GitError::InvalidFormat(
5243 "fatal: multi-pack-index pack-name chunk is too short".into(),
5244 ));
5245 };
5246 let name_bytes = &data[offset..offset + relative_end];
5247 if name_bytes.is_empty() {
5248 return Err(GitError::InvalidFormat(
5249 "multi-pack-index PNAM entry is empty".into(),
5250 ));
5251 }
5252 let name = std::str::from_utf8(name_bytes)
5253 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
5254 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
5255 return Err(GitError::InvalidFormat(
5256 "multi-pack-index PNAM entry contains a path separator".into(),
5257 ));
5258 }
5259 names.push(name.to_string());
5260 offset += relative_end + 1;
5261 }
5262 let padding = &data[offset..];
5263 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
5264 return Err(GitError::InvalidFormat(
5265 "multi-pack-index PNAM padding is invalid".into(),
5266 ));
5267 }
5268 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
5269 return Err(GitError::InvalidFormat(
5270 "multi-pack-index v1 PNAM entries are not sorted".into(),
5271 ));
5272 }
5273 Ok(names)
5274}
5275
5276fn parse_midx_oid_fanout(
5277 bytes: &[u8],
5278 chunks: &[MultiPackIndexChunk],
5279) -> Result<([u32; 256], usize)> {
5280 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
5281 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
5282 if data.len() != 256 * 4 {
5283 return Err(GitError::InvalidFormat(
5284 "error: multi-pack-index OID fanout is of the wrong size\nfatal: multi-pack-index required OID fanout chunk missing or corrupted".into(),
5285 ));
5286 }
5287 let mut fanout = [0u32; 256];
5288 let mut previous = 0u32;
5289 for (idx, slot) in fanout.iter_mut().enumerate() {
5290 let start = idx * 4;
5291 *slot = u32_be(&data[start..start + 4]);
5292 if *slot < previous {
5293 return Err(GitError::InvalidFormat(format!(
5294 "error: oid fanout out of order: fanout[{}] = {:x} > {:x} = fanout[{idx}]\nfatal: multi-pack-index required OID fanout chunk missing or corrupted",
5295 idx - 1,
5296 previous,
5297 *slot
5298 )));
5299 }
5300 previous = *slot;
5301 }
5302 Ok((fanout, fanout[255] as usize))
5303}
5304
5305fn parse_midx_object_ids(
5306 bytes: &[u8],
5307 chunks: &[MultiPackIndexChunk],
5308 format: ObjectFormat,
5309 object_count: usize,
5310 fanout: &[u32; 256],
5311) -> Result<Vec<ObjectId>> {
5312 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
5313 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
5314 let expected_len = object_count
5315 .checked_mul(format.raw_len())
5316 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
5317 if data.len() != expected_len {
5318 return Err(GitError::InvalidFormat(
5319 "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
5320 ));
5321 }
5322
5323 let mut ids = Vec::with_capacity(object_count);
5324 let mut counts = [0u32; 256];
5325 let mut previous_oid: Option<ObjectId> = None;
5326 for idx in 0..object_count {
5327 let start = idx * format.raw_len();
5328 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
5329 if let Some(previous) = &previous_oid
5330 && previous.as_bytes() >= oid.as_bytes()
5331 {
5332 return Err(GitError::InvalidFormat(
5333 "multi-pack-index OIDL object ids are not strictly sorted".into(),
5334 ));
5335 }
5336 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
5337 .checked_add(1)
5338 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
5339 previous_oid = Some(oid);
5340 ids.push(oid);
5341 }
5342
5343 let mut running = 0u32;
5344 for (idx, count) in counts.iter().enumerate() {
5345 running = running
5346 .checked_add(*count)
5347 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
5348 if fanout[idx] != running {
5349 return Err(GitError::InvalidFormat(
5350 "multi-pack-index OIDF fanout does not match OIDL".into(),
5351 ));
5352 }
5353 }
5354 Ok(ids)
5355}
5356
5357fn parse_midx_object_offsets(
5358 bytes: &[u8],
5359 chunks: &[MultiPackIndexChunk],
5360 object_ids: Vec<ObjectId>,
5361 pack_count: u32,
5362) -> Result<Vec<MultiPackIndexEntry>> {
5363 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
5364 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
5365 let expected_len = object_ids
5366 .len()
5367 .checked_mul(8)
5368 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
5369 if data.len() != expected_len {
5370 return Err(GitError::InvalidFormat(
5371 "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
5372 ));
5373 }
5374 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
5375 if let Some(large_offsets) = large_offsets
5376 && large_offsets.len() % 8 != 0
5377 {
5378 return Err(GitError::InvalidFormat(
5379 "multi-pack-index LOFF chunk has invalid length".into(),
5380 ));
5381 }
5382
5383 let mut entries = Vec::with_capacity(object_ids.len());
5384 for (idx, oid) in object_ids.into_iter().enumerate() {
5385 let start = idx * 8;
5386 let pack_int_id = u32_be(&data[start..start + 4]);
5387 if pack_int_id >= pack_count {
5388 return Err(GitError::InvalidFormat(
5389 "multi-pack-index object points past pack table".into(),
5390 ));
5391 }
5392 let raw_offset = u32_be(&data[start + 4..start + 8]);
5393 let offset = if raw_offset & 0x8000_0000 == 0 {
5394 u64::from(raw_offset)
5395 } else {
5396 let Some(large_offsets) = large_offsets else {
5397 return Err(GitError::InvalidFormat(
5398 "multi-pack-index large offset missing LOFF chunk".into(),
5399 ));
5400 };
5401 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
5402 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
5403 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
5404 })?;
5405 let large_end = large_start.checked_add(8).ok_or_else(|| {
5406 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
5407 })?;
5408 if large_end > large_offsets.len() {
5409 return Err(GitError::InvalidFormat(
5410 "fatal: multi-pack-index large offset out of bounds".into(),
5411 ));
5412 }
5413 u64_be(&large_offsets[large_start..large_end])
5414 };
5415 entries.push(MultiPackIndexEntry {
5416 oid,
5417 pack_int_id,
5418 offset,
5419 force_large_offset: raw_offset & 0x8000_0000 != 0,
5420 });
5421 }
5422 Ok(entries)
5423}
5424
5425fn parse_midx_reverse_index(
5426 bytes: &[u8],
5427 chunks: &[MultiPackIndexChunk],
5428 object_count: usize,
5429) -> Result<Option<Vec<u32>>> {
5430 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
5431 return Ok(None);
5432 };
5433 let expected_len = object_count
5434 .checked_mul(4)
5435 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
5436 if data.len() != expected_len {
5437 return Err(GitError::InvalidFormat(
5438 "multi-pack-index reverse-index chunk is the wrong size".into(),
5439 ));
5440 }
5441 let mut positions = Vec::with_capacity(object_count);
5442 for idx in 0..object_count {
5443 let start = idx * 4;
5444 positions.push(u32_be(&data[start..start + 4]));
5445 }
5446 validate_position_permutation(&positions)?;
5447 Ok(Some(positions))
5448}
5449
5450fn parse_midx_bitmapped_packs(
5451 bytes: &[u8],
5452 chunks: &[MultiPackIndexChunk],
5453 pack_count: usize,
5454 object_count: usize,
5455) -> Result<Option<Vec<MultiPackBitmapPack>>> {
5456 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
5457 return Ok(None);
5458 };
5459 let expected_len = pack_count
5460 .checked_mul(8)
5461 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
5462 if data.len() != expected_len {
5463 return Err(GitError::InvalidFormat(
5464 "multi-pack-index BTMP chunk has invalid length".into(),
5465 ));
5466 }
5467 let mut entries = Vec::with_capacity(pack_count);
5468 for idx in 0..pack_count {
5469 let start = idx * 8;
5470 let bitmap_pos = u32_be(&data[start..start + 4]);
5471 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
5472 let bitmap_end = u64::from(bitmap_pos)
5473 .checked_add(u64::from(bitmap_nr))
5474 .ok_or_else(|| {
5475 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
5476 })?;
5477 if bitmap_end > object_count as u64 {
5478 return Err(GitError::InvalidFormat(
5479 "multi-pack-index BTMP range points past object table".into(),
5480 ));
5481 }
5482 entries.push(MultiPackBitmapPack {
5483 bitmap_pos,
5484 bitmap_nr,
5485 });
5486 }
5487 Ok(Some(entries))
5488}
5489
5490fn midx_chunk_data<'a>(
5491 bytes: &'a [u8],
5492 chunks: &[MultiPackIndexChunk],
5493 id: [u8; 4],
5494 required: bool,
5495) -> Result<Option<&'a [u8]>> {
5496 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
5497 if required {
5498 return Err(GitError::InvalidFormat(format!(
5499 "multi-pack-index missing {} chunk",
5500 std::str::from_utf8(&id).unwrap_or("required")
5501 )));
5502 }
5503 return Ok(None);
5504 };
5505 let start = usize::try_from(chunk.offset)
5506 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
5507 let len = usize::try_from(chunk.len)
5508 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
5509 let end = start
5510 .checked_add(len)
5511 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
5512 let Some(data) = bytes.get(start..end) else {
5513 return Err(GitError::InvalidFormat(
5514 "multi-pack-index chunk extends past file".into(),
5515 ));
5516 };
5517 Ok(Some(data))
5518}
5519
5520fn hash_function_id(format: ObjectFormat) -> u32 {
5521 match format {
5522 ObjectFormat::Sha1 => 1,
5523 ObjectFormat::Sha256 => 2,
5524 }
5525}
5526
5527const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
5530
5531const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
5534
5535const EWAH_ALL_ONES: u64 = u64::MAX;
5537
5538impl EwahBitmap {
5539 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
5553 let required_words = bit_size.div_ceil(64) as usize;
5554 if required_words > words.len() {
5555 return Err(GitError::InvalidFormat(format!(
5556 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
5557 words.len()
5558 )));
5559 }
5560 let significant = &words[..required_words];
5563 let mut builder = EwahBuilder::new(bit_size);
5564 for &word in significant {
5565 if word == 0 {
5566 builder.add_empty_words(false, 1);
5567 } else if word == EWAH_ALL_ONES {
5568 builder.add_empty_words(true, 1);
5569 } else {
5570 builder.add_literal(word);
5571 }
5572 }
5573 builder.finish()
5574 }
5575
5576 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
5582 let word_count = bit_size.div_ceil(64) as usize;
5583 let mut words = vec![0u64; word_count];
5584 for &position in positions {
5585 if position >= bit_size {
5586 return Err(GitError::InvalidFormat(format!(
5587 "EWAH bit position {position} out of range for bit_size {bit_size}"
5588 )));
5589 }
5590 let word_index = (position / 64) as usize;
5591 let bit_index = position % 64;
5592 words[word_index] |= 1u64 << bit_index;
5593 }
5594 Self::from_words(bit_size, &words)
5595 }
5596
5597 pub fn empty() -> Self {
5600 Self {
5601 bit_size: 0,
5602 words: Vec::new(),
5603 rlw_position: 0,
5604 }
5605 }
5606
5607 pub fn to_words(&self) -> Result<Vec<u64>> {
5613 let mut out = Vec::new();
5614 let mut word_idx = 0usize;
5615 while word_idx < self.words.len() {
5616 let rlw = self.words[word_idx];
5617 let run_bit = rlw & 1;
5618 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
5619 let literal_words = (rlw >> 33) as usize;
5620 word_idx += 1;
5621 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
5622 for _ in 0..run_words {
5623 out.push(fill);
5624 }
5625 let literal_end = word_idx
5626 .checked_add(literal_words)
5627 .filter(|end| *end <= self.words.len())
5628 .ok_or_else(|| {
5629 GitError::InvalidFormat("EWAH literal words extend past word table".into())
5630 })?;
5631 out.extend_from_slice(&self.words[word_idx..literal_end]);
5632 word_idx = literal_end;
5633 }
5634 let required_words = (self.bit_size as usize).div_ceil(64);
5635 if out.len() < required_words {
5636 out.resize(required_words, 0);
5637 }
5638 out.truncate(required_words);
5639 Ok(out)
5640 }
5641
5642 pub fn to_positions(&self) -> Result<Vec<u32>> {
5644 let words = self.to_words()?;
5645 let mut positions = Vec::new();
5646 for (word_index, word) in words.iter().enumerate() {
5647 let mut remaining = *word;
5648 while remaining != 0 {
5649 let bit = remaining.trailing_zeros();
5650 let position = (word_index as u64) * 64 + u64::from(bit);
5651 if position < u64::from(self.bit_size) {
5652 positions.push(position as u32);
5654 }
5655 remaining &= remaining - 1;
5656 }
5657 }
5658 Ok(positions)
5659 }
5660
5661 pub fn to_bytes(&self) -> Vec<u8> {
5665 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
5666 self.append_bytes(&mut out);
5667 out
5668 }
5669
5670 fn append_bytes(&self, out: &mut Vec<u8>) {
5671 out.extend_from_slice(&self.bit_size.to_be_bytes());
5672 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
5673 for word in &self.words {
5674 out.extend_from_slice(&word.to_be_bytes());
5675 }
5676 out.extend_from_slice(&self.rlw_position.to_be_bytes());
5677 }
5678}
5679
5680struct EwahBuilder {
5688 bit_size: u32,
5689 words: Vec<u64>,
5690 rlw_position: usize,
5691}
5692
5693impl EwahBuilder {
5694 fn new(bit_size: u32) -> Self {
5695 Self {
5697 bit_size,
5698 words: vec![0u64],
5699 rlw_position: 0,
5700 }
5701 }
5702
5703 fn rlw(&self) -> u64 {
5704 self.words[self.rlw_position]
5705 }
5706
5707 fn set_rlw(&mut self, value: u64) {
5708 self.words[self.rlw_position] = value;
5709 }
5710
5711 fn rlw_running_len(&self) -> u64 {
5712 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
5713 }
5714
5715 fn rlw_running_bit(&self) -> bool {
5716 self.rlw() & 1 == 1
5717 }
5718
5719 fn rlw_literal_len(&self) -> u64 {
5720 self.rlw() >> 33
5721 }
5722
5723 fn set_running_bit(&mut self, bit: bool) {
5724 let mut value = self.rlw();
5725 value &= !1;
5726 value |= u64::from(bit);
5727 self.set_rlw(value);
5728 }
5729
5730 fn set_running_len(&mut self, len: u64) {
5731 let mut value = self.rlw();
5732 value &= !(EWAH_MAX_RUNNING_LEN << 1);
5733 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
5734 self.set_rlw(value);
5735 }
5736
5737 fn set_literal_len(&mut self, len: u64) {
5738 let mut value = self.rlw();
5739 value &= (1u64 << 33) - 1;
5740 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
5741 self.set_rlw(value);
5742 }
5743
5744 fn push_rlw(&mut self) {
5746 self.rlw_position = self.words.len();
5747 self.words.push(0);
5748 }
5749
5750 fn add_empty_words(&mut self, value: bool, mut number: u64) {
5758 while number > 0 {
5759 let can_extend = self.rlw_literal_len() == 0
5763 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
5764 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
5765 if !can_extend {
5766 self.push_rlw();
5767 }
5768 if self.rlw_running_len() == 0 {
5769 self.set_running_bit(value);
5770 }
5771 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
5772 let take = available.min(number);
5773 self.set_running_len(self.rlw_running_len() + take);
5774 number -= take;
5775 }
5776 }
5777
5778 fn add_literal(&mut self, word: u64) {
5781 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
5782 self.push_rlw();
5783 }
5784 let literal_len = self.rlw_literal_len();
5785 self.set_literal_len(literal_len + 1);
5786 self.words.push(word);
5787 }
5788
5789 fn finish(self) -> Result<EwahBitmap> {
5790 let rlw_position = u32::try_from(self.rlw_position)
5791 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
5792 if self.words.len() > u32::MAX as usize {
5793 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
5794 }
5795 Ok(EwahBitmap {
5796 bit_size: self.bit_size,
5797 words: self.words,
5798 rlw_position,
5799 })
5800 }
5801}
5802
5803#[derive(Debug, Clone)]
5816pub struct PackBitmapWriter {
5817 format: ObjectFormat,
5818 pack_checksum: ObjectId,
5819 object_count: u32,
5820 commit_positions: Vec<u32>,
5821 tree_positions: Vec<u32>,
5822 blob_positions: Vec<u32>,
5823 tag_positions: Vec<u32>,
5824 name_hash_cache: Option<Vec<u32>>,
5825 selected: Vec<SelectedCommit>,
5826}
5827
5828#[derive(Debug, Clone)]
5829struct SelectedCommit {
5830 commit_index_position: u32,
5834 flags: u8,
5835 reachable: Vec<u32>,
5836}
5837
5838impl PackBitmapWriter {
5839 pub const FLAG_NONE: u8 = 0;
5843
5844 pub fn new(
5851 format: ObjectFormat,
5852 pack_checksum: ObjectId,
5853 object_types: &[ObjectType],
5854 ) -> Result<Self> {
5855 if object_types.len() > u32::MAX as usize {
5856 return Err(GitError::InvalidFormat(
5857 "too many objects for a pack bitmap".into(),
5858 ));
5859 }
5860 if pack_checksum.format() != format {
5861 return Err(GitError::InvalidObjectId(
5862 "pack checksum format does not match bitmap format".into(),
5863 ));
5864 }
5865 let object_count = object_types.len() as u32;
5866 let mut commit_positions = Vec::new();
5867 let mut tree_positions = Vec::new();
5868 let mut blob_positions = Vec::new();
5869 let mut tag_positions = Vec::new();
5870 for (index, object_type) in object_types.iter().enumerate() {
5871 let position = index as u32;
5872 match object_type {
5873 ObjectType::Commit => commit_positions.push(position),
5874 ObjectType::Tree => tree_positions.push(position),
5875 ObjectType::Blob => blob_positions.push(position),
5876 ObjectType::Tag => tag_positions.push(position),
5877 }
5878 }
5879 Ok(Self {
5880 format,
5881 pack_checksum,
5882 object_count,
5883 commit_positions,
5884 tree_positions,
5885 blob_positions,
5886 tag_positions,
5887 name_hash_cache: None,
5888 selected: Vec::new(),
5889 })
5890 }
5891
5892 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5898 if cache.len() != self.object_count as usize {
5899 return Err(GitError::InvalidFormat(format!(
5900 "name hash cache has {} entries but pack has {} objects",
5901 cache.len(),
5902 self.object_count
5903 )));
5904 }
5905 self.name_hash_cache = Some(cache);
5906 Ok(self)
5907 }
5908
5909 pub fn add_commit(
5921 &mut self,
5922 commit_position: u32,
5923 commit_index_position: u32,
5924 reachable: &[u32],
5925 ) -> Result<()> {
5926 if commit_position >= self.object_count {
5927 return Err(GitError::InvalidFormat(format!(
5928 "commit position {commit_position} out of range for {} objects",
5929 self.object_count
5930 )));
5931 }
5932 if commit_index_position >= self.object_count {
5933 return Err(GitError::InvalidFormat(format!(
5934 "commit index position {commit_index_position} out of range for {} objects",
5935 self.object_count
5936 )));
5937 }
5938 if !self.commit_positions.contains(&commit_position) {
5939 return Err(GitError::InvalidFormat(format!(
5940 "bitmap commit position {commit_position} is not a commit object"
5941 )));
5942 }
5943 for &position in reachable {
5944 if position >= self.object_count {
5945 return Err(GitError::InvalidFormat(format!(
5946 "reachable position {position} out of range for {} objects",
5947 self.object_count
5948 )));
5949 }
5950 }
5951 let mut reachable = reachable.to_vec();
5952 reachable.push(commit_position);
5953 self.selected.push(SelectedCommit {
5954 commit_index_position,
5955 flags: Self::FLAG_NONE,
5956 reachable,
5957 });
5958 Ok(())
5959 }
5960
5961 pub fn build(&self) -> Result<PackBitmapIndex> {
5968 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5969 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5970 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5971 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5972
5973 let mut entries = Vec::with_capacity(self.selected.len());
5974 for selected in &self.selected {
5975 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5976 entries.push(PackBitmapEntry {
5977 object_position: selected.commit_index_position,
5978 xor_offset: 0,
5979 flags: selected.flags,
5980 bitmap,
5981 });
5982 }
5983
5984 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5985 if self.name_hash_cache.is_some() {
5986 options |= PackBitmapIndex::OPTION_HASH_CACHE;
5987 }
5988
5989 let placeholder_checksum = ObjectId::null(self.format);
5994 Ok(PackBitmapIndex {
5995 version: 1,
5996 format: self.format,
5997 options,
5998 pack_checksum: self.pack_checksum.clone(),
5999 index_checksum: placeholder_checksum,
6000 type_bitmaps: PackBitmapTypeBitmaps {
6001 commits,
6002 trees,
6003 blobs,
6004 tags,
6005 },
6006 entries,
6007 name_hash_cache: self.name_hash_cache.clone(),
6008 })
6009 }
6010
6011 pub fn write(&self) -> Result<Vec<u8>> {
6014 self.build()?.write()
6015 }
6016}
6017
6018impl PackBitmapIndex {
6019 pub fn write(&self) -> Result<Vec<u8>> {
6033 if self.version != 1 {
6034 return Err(GitError::Unsupported(format!(
6035 "bitmap index version {}",
6036 self.version
6037 )));
6038 }
6039 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
6040 if self.options & !known_options != 0 {
6041 return Err(GitError::Unsupported(format!(
6042 "bitmap index options {:#06x}",
6043 self.options & !known_options
6044 )));
6045 }
6046 if self.pack_checksum.format() != self.format {
6047 return Err(GitError::InvalidObjectId(
6048 "bitmap pack checksum format does not match index format".into(),
6049 ));
6050 }
6051 if self.entries.len() > u32::MAX as usize {
6052 return Err(GitError::InvalidFormat(
6053 "too many bitmap index entries".into(),
6054 ));
6055 }
6056 let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
6057 match (&self.name_hash_cache, want_cache) {
6058 (Some(_), false) => {
6059 return Err(GitError::InvalidFormat(
6060 "name hash cache present without OPTION_HASH_CACHE".into(),
6061 ));
6062 }
6063 (None, true) => {
6064 return Err(GitError::InvalidFormat(
6065 "OPTION_HASH_CACHE set without a name hash cache".into(),
6066 ));
6067 }
6068 _ => {}
6069 }
6070
6071 let mut out = Vec::new();
6072 out.extend_from_slice(b"BITM");
6073 out.extend_from_slice(&self.version.to_be_bytes());
6074 out.extend_from_slice(&self.options.to_be_bytes());
6075 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
6076 out.extend_from_slice(self.pack_checksum.as_bytes());
6077
6078 self.type_bitmaps.commits.append_bytes(&mut out);
6079 self.type_bitmaps.trees.append_bytes(&mut out);
6080 self.type_bitmaps.blobs.append_bytes(&mut out);
6081 self.type_bitmaps.tags.append_bytes(&mut out);
6082
6083 for (idx, entry) in self.entries.iter().enumerate() {
6084 if entry.xor_offset as usize > idx {
6085 return Err(GitError::InvalidFormat(
6086 "bitmap index entry has invalid XOR offset".into(),
6087 ));
6088 }
6089 out.extend_from_slice(&entry.object_position.to_be_bytes());
6090 out.push(entry.xor_offset);
6091 out.push(entry.flags);
6092 entry.bitmap.append_bytes(&mut out);
6093 }
6094
6095 if let Some(cache) = &self.name_hash_cache {
6096 for value in cache {
6097 out.extend_from_slice(&value.to_be_bytes());
6098 }
6099 }
6100
6101 let checksum = sley_core::digest_bytes(self.format, &out)?;
6102 out.extend_from_slice(checksum.as_bytes());
6103 Ok(out)
6104 }
6105}
6106
6107pub fn write_bitmap(
6116 format: ObjectFormat,
6117 pack_checksum: ObjectId,
6118 object_types: &[ObjectType],
6119 commits: &[(u32, u32, Vec<u32>)],
6120 name_hash_cache: Option<Vec<u32>>,
6121) -> Result<Vec<u8>> {
6122 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
6123 if let Some(cache) = name_hash_cache {
6124 writer = writer.with_name_hash_cache(cache)?;
6125 }
6126 for (commit_position, commit_index_position, reachable) in commits {
6127 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
6128 }
6129 writer.write()
6130}
6131
6132#[cfg(test)]
6133mod tests {
6134 use super::*;
6135 use flate2::Compression;
6136 use flate2::read::ZlibDecoder;
6137 use flate2::write::ZlibEncoder;
6138 use std::fs;
6139 use std::io::Read;
6140 use std::io::Write;
6141 use std::path::{Path, PathBuf};
6142 use std::process::Command;
6143 use std::time::{SystemTime, UNIX_EPOCH};
6144
6145 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
6146 PackWriteOptions::new()
6147 .with_prefer_ofs_delta(prefer_ofs_delta)
6148 .with_reorder(false)
6149 }
6150
6151 #[test]
6152 fn parses_single_blob_pack() {
6153 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6154 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6155 assert_eq!(parsed.version, 2);
6156 assert_eq!(parsed.entries.len(), 1);
6157 let object = &parsed.entries[0].object;
6158 assert_eq!(object.object_type, ObjectType::Blob);
6159 assert_eq!(object.body, b"hello\n");
6160 assert_eq!(
6161 parsed.entries[0].entry.oid.to_hex(),
6162 "ce013625030ba8dba906f756967f9e9ca394464a"
6163 );
6164 }
6165
6166 #[test]
6167 fn parses_single_blob_pack_sha256() {
6168 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
6169 let parsed =
6170 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
6171 assert_eq!(parsed.version, 2);
6172 assert_eq!(parsed.entries.len(), 1);
6173 let object = &parsed.entries[0].object;
6174 assert_eq!(object.object_type, ObjectType::Blob);
6175 assert_eq!(object.body, b"hello\n");
6176 assert_eq!(
6177 parsed.entries[0].entry.oid,
6178 object
6179 .object_id(ObjectFormat::Sha256)
6180 .expect("test operation should succeed")
6181 );
6182 }
6183
6184 #[test]
6185 fn parses_bundle_pack_payload_with_bundle_format() {
6186 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
6187 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
6188 .expect("test operation should succeed");
6189 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6190 .into_bytes()
6191 .into_iter()
6192 .chain(pack)
6193 .collect::<Vec<_>>();
6194 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6195 .expect("test operation should succeed");
6196
6197 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
6198 assert_eq!(parsed.entries.len(), 1);
6199 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
6200 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
6201 }
6202
6203 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
6209 let mut pack = Vec::new();
6210 pack.extend_from_slice(b"PACK");
6211 pack.extend_from_slice(&2u32.to_be_bytes());
6212 pack.extend_from_slice(&1u32.to_be_bytes());
6213 write_pack_entry_header_kind(&mut pack, 3, declared_size);
6215 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6216 encoder
6217 .write_all(real_body)
6218 .expect("test operation should succeed");
6219 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6220 let checksum =
6221 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6222 pack.extend_from_slice(checksum.as_bytes());
6223 pack
6224 }
6225
6226 #[test]
6239 fn rejects_decompression_bomb_header_without_oom() {
6240 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
6241 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
6242 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
6243 let result = handle.join();
6244 assert!(
6246 result.is_ok(),
6247 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
6248 );
6249 let parse_result = result.expect("parse thread should not panic on a bomb header");
6251 assert!(
6252 parse_result.is_err(),
6253 "bomb header (declared={declared}) should be rejected as invalid"
6254 );
6255 }
6256 }
6257
6258 fn lying_result_size_delta_pack(
6265 format: ObjectFormat,
6266 declared_result_size: u64,
6267 delta_kind: DeltaKind,
6268 ) -> Vec<u8> {
6269 let base = b"hello";
6270 let result = b"hello world"; let mut delta = Vec::new();
6274 write_delta_varint(&mut delta, base.len() as u64);
6275 write_delta_varint(&mut delta, declared_result_size);
6276 let suffix = &result[base.len()..];
6278 delta.push(0x90); delta.push(base.len() as u8);
6280 delta.push(suffix.len() as u8);
6281 delta.extend_from_slice(suffix);
6282
6283 let mut pack = Vec::new();
6284 pack.extend_from_slice(b"PACK");
6285 pack.extend_from_slice(&2u32.to_be_bytes());
6286 pack.extend_from_slice(&2u32.to_be_bytes());
6287
6288 let base_offset = pack.len();
6289 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
6290 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6291 encoder
6292 .write_all(base)
6293 .expect("test operation should succeed");
6294 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6295
6296 let delta_offset = pack.len();
6297 write_pack_entry_header_kind(
6298 &mut pack,
6299 match delta_kind {
6300 DeltaKind::Offset => 6,
6301 DeltaKind::Ref => 7,
6302 },
6303 delta.len() as u64,
6304 );
6305 match delta_kind {
6306 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
6307 DeltaKind::Ref => {
6308 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
6309 .expect("test operation should succeed");
6310 pack.extend_from_slice(base_oid.as_bytes());
6311 }
6312 }
6313 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6314 encoder
6315 .write_all(&delta)
6316 .expect("test operation should succeed");
6317 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6318
6319 let checksum =
6320 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6321 pack.extend_from_slice(checksum.as_bytes());
6322 pack
6323 }
6324
6325 #[test]
6335 fn rejects_delta_result_size_bomb_without_oom() {
6336 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
6337 for &declared in bombs {
6338 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
6339 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
6340 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
6341 let join_result = handle.join();
6342 assert!(
6343 join_result.is_ok(),
6344 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
6345 instead of erroring cleanly"
6346 );
6347 let parse_result =
6348 join_result.expect("parse thread should not panic on a delta bomb");
6349 assert!(
6350 parse_result.is_err(),
6351 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
6352 as invalid (result.len() != declared)"
6353 );
6354 }
6355 }
6356 }
6357
6358 #[test]
6362 fn applies_legitimate_delta_after_result_size_bound() {
6363 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
6364 let base = b"hello";
6365 let result = b"hello world";
6366 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
6367 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
6368 assert_eq!(parsed.entries.len(), 2);
6369 assert_eq!(parsed.entries[0].object.body, base);
6370 assert_eq!(parsed.entries[1].object.body, result);
6371 }
6372 }
6373
6374 #[test]
6375 fn bounded_inflate_reserve_caps_attacker_declared_size() {
6376 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
6378 assert_eq!(
6380 bounded_inflate_reserve(usize::MAX, usize::MAX),
6381 MAX_INFLATE_RESERVE
6382 );
6383 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
6387 assert_eq!(bounded_inflate_reserve(0, 0), 64);
6389 }
6390
6391 #[test]
6392 fn rejects_bundle_pack_payload_with_wrong_object_format() {
6393 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
6394 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
6395 .expect("test operation should succeed");
6396 let bundle_bytes =
6397 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
6398 .into_bytes()
6399 .into_iter()
6400 .chain(pack)
6401 .collect::<Vec<_>>();
6402 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6403 .expect("test operation should succeed");
6404
6405 assert!(PackFile::parse_bundle(&bundle).is_err());
6406 }
6407
6408 fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
6409 let owned = PackIndex::parse(index, format).expect("test operation should succeed");
6410 let view = PackIndexView::parse(index, format).expect("test operation should succeed");
6411 let owned_view =
6412 PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
6413 .expect("test operation should succeed");
6414
6415 assert_eq!(view.version, owned.version);
6416 assert_eq!(view.count, owned.entries.len());
6417 assert_eq!(view.count(), owned.entries.len());
6418 assert_eq!(view.fanout(), &owned.fanout);
6419 assert_eq!(view.pack_checksum, owned.pack_checksum);
6420 assert_eq!(view.index_checksum, owned.index_checksum);
6421 assert_eq!(owned_view.version, owned.version);
6422 assert_eq!(owned_view.count(), owned.entries.len());
6423 assert_eq!(owned_view.fanout(), &owned.fanout);
6424 assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
6425 assert_eq!(owned_view.index_checksum, owned.index_checksum);
6426 for entry in &owned.entries {
6427 let owned_found = owned
6428 .find(&entry.oid)
6429 .expect("test operation should succeed");
6430 let expected = Some(PackIndexLookup {
6431 crc32: owned_found.crc32,
6432 offset: owned_found.offset,
6433 });
6434 assert_eq!(view.find(&entry.oid), expected);
6435 assert_eq!(owned_view.find(&entry.oid), expected);
6436 }
6437 }
6438
6439 #[test]
6440 fn writes_pack_and_index_that_round_trip() {
6441 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
6442 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6443 .expect("test operation should succeed");
6444 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6445 let index =
6446 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6447 let oid = object
6448 .object_id(ObjectFormat::Sha1)
6449 .expect("test operation should succeed");
6450 assert_eq!(pack.entries[0].object, object);
6451 assert_eq!(index.pack_checksum, pack.checksum);
6452 assert_eq!(
6453 index
6454 .find(&oid)
6455 .expect("test operation should succeed")
6456 .offset,
6457 12
6458 );
6459 }
6460
6461 #[test]
6462 fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
6463 let objects = (0..8)
6464 .map(|idx| {
6465 EncodedObject::new(
6466 ObjectType::Blob,
6467 format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
6468 )
6469 })
6470 .collect::<Vec<_>>();
6471 let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
6472 .expect("test operation should succeed");
6473
6474 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
6475
6476 let view =
6477 PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
6478 let missing = sley_core::object_id_for_bytes(
6479 ObjectFormat::Sha1,
6480 "blob",
6481 b"not present in borrowed index\n",
6482 )
6483 .expect("test operation should succeed");
6484 assert_eq!(view.find(&missing), None);
6485 }
6486
6487 #[test]
6488 fn writes_sha256_pack_and_index_that_round_trip() {
6489 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
6490 let written =
6491 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6492 .expect("test operation should succeed");
6493 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
6494 .expect("test operation should succeed");
6495 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
6496 .expect("test operation should succeed");
6497 let oid = object
6498 .object_id(ObjectFormat::Sha256)
6499 .expect("test operation should succeed");
6500 assert_eq!(pack.entries[0].object, object);
6501 assert_eq!(index.pack_checksum, pack.checksum);
6502 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
6503 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
6504 assert_eq!(
6505 index
6506 .find(&oid)
6507 .expect("test operation should succeed")
6508 .offset,
6509 12
6510 );
6511 }
6512
6513 #[test]
6514 fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
6515 let objects = (0..4)
6516 .map(|idx| {
6517 EncodedObject::new(
6518 ObjectType::Blob,
6519 format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
6520 )
6521 })
6522 .collect::<Vec<_>>();
6523 let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
6524 .expect("test operation should succeed");
6525
6526 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
6527 }
6528
6529 #[test]
6530 fn indexes_existing_sha256_pack_bytes() {
6531 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
6532 let written =
6533 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6534 .expect("test operation should succeed");
6535
6536 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
6537 .expect("test operation should succeed");
6538 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
6539 .expect("test operation should succeed");
6540
6541 assert_eq!(indexed.pack_checksum, written.checksum);
6542 assert_eq!(indexed.entries, written.entries);
6543 assert_eq!(index.pack_checksum, written.checksum);
6544 assert_eq!(index.entries, written.entries);
6545 }
6546
6547 #[test]
6548 fn indexes_existing_delta_pack_bytes() {
6549 let (base, changed) = similar_blob_objects();
6550 let options = delta_pack_options(true);
6551 let written = PackFile::write_packed_with_options(
6552 &[base, changed.clone()],
6553 ObjectFormat::Sha1,
6554 &options,
6555 )
6556 .expect("test operation should succeed");
6557
6558 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
6559 .expect("test operation should succeed");
6560 let index =
6561 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
6562 let changed_oid = changed
6563 .object_id(ObjectFormat::Sha1)
6564 .expect("test operation should succeed");
6565
6566 assert_eq!(indexed.pack_checksum, written.checksum);
6567 assert_eq!(indexed.entries, written.entries);
6568 assert_eq!(
6569 index
6570 .find(&changed_oid)
6571 .expect("test operation should succeed")
6572 .offset,
6573 written.entries[1].offset
6574 );
6575 assert_eq!(
6576 index
6577 .find(&changed_oid)
6578 .expect("test operation should succeed")
6579 .crc32,
6580 written.entries[1].crc32
6581 );
6582 }
6583
6584 #[test]
6585 fn writes_ref_delta_pack_and_index_that_round_trip() {
6586 let (base, changed) = similar_blob_objects();
6587 let options = delta_pack_options(false);
6588 let written = PackFile::write_packed_with_options(
6589 &[base.clone(), changed.clone()],
6590 ObjectFormat::Sha1,
6591 &options,
6592 )
6593 .expect("test operation should succeed");
6594 let mut second_offset = written.entries[1].offset as usize;
6595 let header = parse_entry_header(&written.pack, &mut second_offset)
6596 .expect("test operation should succeed");
6597 assert_eq!(header.kind, PackObjectKind::RefDelta);
6598
6599 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6600 let index =
6601 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6602 let oid = changed
6603 .object_id(ObjectFormat::Sha1)
6604 .expect("test operation should succeed");
6605 assert_eq!(pack.entries[0].object, base);
6606 assert_eq!(pack.entries[1].object, changed);
6607 assert_eq!(index.pack_checksum, pack.checksum);
6608 assert_eq!(
6609 index
6610 .find(&oid)
6611 .expect("test operation should succeed")
6612 .offset,
6613 written.entries[1].offset
6614 );
6615 }
6616
6617 #[test]
6618 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
6619 let (base, changed) = similar_blob_objects();
6620 let options = delta_pack_options(true);
6621 let written = PackFile::write_packed_with_options(
6622 &[base, changed.clone()],
6623 ObjectFormat::Sha1,
6624 &options,
6625 )
6626 .expect("test operation should succeed");
6627 let mut second = written.entries[1].offset as usize;
6629 assert_eq!(
6630 parse_entry_header(&written.pack, &mut second)
6631 .expect("test operation should succeed")
6632 .kind,
6633 PackObjectKind::OfsDelta
6634 );
6635 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6637 for po in &parsed.entries {
6638 let got =
6639 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
6640 Ok(None)
6641 })
6642 .expect("test operation should succeed");
6643 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6644 }
6645 }
6646
6647 #[derive(Default)]
6650 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
6651
6652 impl HeaderTypeCache for MapHeaderTypeCache {
6653 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
6654 self.0.get(&pack_offset).copied()
6655 }
6656 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
6657 self.0.insert(pack_offset, header);
6658 }
6659 }
6660
6661 #[test]
6662 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
6663 let (base, changed) = similar_blob_objects();
6664 let options = delta_pack_options(true);
6665 let written =
6666 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
6667 .expect("test operation should succeed");
6668 let mut second = written.entries[1].offset as usize;
6670 assert_eq!(
6671 parse_entry_header(&written.pack, &mut second)
6672 .expect("test operation should succeed")
6673 .kind,
6674 PackObjectKind::OfsDelta
6675 );
6676
6677 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6678 let mut cache = MapHeaderTypeCache::default();
6679 for po in &parsed.entries {
6680 let uncached =
6681 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
6682 Ok(None)
6683 })
6684 .expect("test operation should succeed");
6685 assert_eq!(
6687 uncached,
6688 (po.object.object_type, po.object.body.len() as u64),
6689 "uncached header at offset {}",
6690 po.entry.offset
6691 );
6692 let cold = read_object_header_at_with_cache(
6694 &written.pack,
6695 po.entry.offset,
6696 ObjectFormat::Sha1,
6697 |_| Ok(None),
6698 &mut cache,
6699 )
6700 .expect("test operation should succeed");
6701 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
6702 }
6703 for po in &parsed.entries {
6706 let warm = read_object_header_at_with_cache(
6707 &written.pack,
6708 po.entry.offset,
6709 ObjectFormat::Sha1,
6710 |_| panic!("warm cache must not re-walk the chain"),
6711 &mut cache,
6712 )
6713 .expect("test operation should succeed");
6714 assert_eq!(
6715 warm,
6716 (po.object.object_type, po.object.body.len() as u64),
6717 "warm cache at offset {}",
6718 po.entry.offset
6719 );
6720 }
6721 }
6722
6723 #[test]
6724 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
6725 let (base, changed) = similar_blob_objects();
6726 let options = delta_pack_options(false);
6727 let written = PackFile::write_packed_with_options(
6728 &[base, changed.clone()],
6729 ObjectFormat::Sha1,
6730 &options,
6731 )
6732 .expect("test operation should succeed");
6733 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6734 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
6735 .entries
6736 .iter()
6737 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
6738 .collect();
6739 for po in &parsed.entries {
6740 let got =
6741 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
6742 Ok(by_oid.get(oid).cloned())
6743 })
6744 .expect("test operation should succeed");
6745 assert_eq!(*got, po.object);
6746 }
6747 }
6748
6749 #[derive(Default)]
6753 struct CountingDeltaCache {
6754 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
6755 hits: std::cell::Cell<usize>,
6756 inserts: std::cell::Cell<usize>,
6757 }
6758
6759 impl PackDeltaCache for CountingDeltaCache {
6760 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
6761 let hit = self.map.borrow().get(&offset).cloned();
6762 if hit.is_some() {
6763 self.hits.set(self.hits.get() + 1);
6764 }
6765 hit
6766 }
6767 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
6768 self.inserts.set(self.inserts.get() + 1);
6769 self.map.borrow_mut().insert(offset, object);
6770 }
6771 }
6772
6773 #[test]
6774 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
6775 let mut objects = Vec::new();
6778 for idx in 0..8u32 {
6779 let mut body = vec![b'x'; 4096];
6780 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
6781 objects.push(EncodedObject::new(ObjectType::Blob, body));
6782 }
6783 let options = delta_pack_options(true);
6784 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6785 .expect("test operation should succeed");
6786 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6787
6788 let cache = CountingDeltaCache::default();
6789 for _ in 0..2 {
6792 for po in &parsed.entries {
6793 let got = read_object_at_with_cache_arc(
6794 &written.pack,
6795 po.entry.offset,
6796 ObjectFormat::Sha1,
6797 |_| Ok(None),
6798 &cache,
6799 )
6800 .expect("test operation should succeed");
6801 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6802 }
6803 }
6804 assert!(cache.hits.get() > 0, "cache never served a warm object");
6807 }
6808
6809 #[test]
6810 fn writes_ofs_delta_pack_and_index_that_round_trip() {
6811 let (base, changed) = similar_blob_objects();
6812 let options = delta_pack_options(true);
6813 let written = PackFile::write_packed_with_options(
6814 &[base.clone(), changed.clone()],
6815 ObjectFormat::Sha1,
6816 &options,
6817 )
6818 .expect("test operation should succeed");
6819 let mut second_offset = written.entries[1].offset as usize;
6820 let header = parse_entry_header(&written.pack, &mut second_offset)
6821 .expect("test operation should succeed");
6822 assert_eq!(header.kind, PackObjectKind::OfsDelta);
6823
6824 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6825 let index =
6826 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6827 let oid = changed
6828 .object_id(ObjectFormat::Sha1)
6829 .expect("test operation should succeed");
6830 assert_eq!(pack.entries[0].object, base);
6831 assert_eq!(pack.entries[1].object, changed);
6832 assert_eq!(index.pack_checksum, pack.checksum);
6833 assert_eq!(
6834 index
6835 .find(&oid)
6836 .expect("test operation should succeed")
6837 .offset,
6838 written.entries[1].offset
6839 );
6840 }
6841
6842 #[test]
6843 fn resolves_ofs_delta_pack_entry() {
6844 let base = b"hello";
6845 let result = b"hello world";
6846 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6847 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6848 assert_eq!(parsed.entries.len(), 2);
6849 assert_eq!(parsed.entries[0].object.body, base);
6850 assert_eq!(parsed.entries[1].object.body, result);
6851 assert_eq!(
6852 parsed.entries[1].entry.oid,
6853 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6854 .expect("test operation should succeed")
6855 );
6856 }
6857
6858 #[test]
6859 fn resolves_ref_delta_pack_entry() {
6860 let base = b"hello";
6861 let result = b"hello world";
6862 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6863 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6864 assert_eq!(parsed.entries.len(), 2);
6865 assert_eq!(parsed.entries[0].object.body, base);
6866 assert_eq!(parsed.entries[1].object.body, result);
6867 assert_eq!(
6868 parsed.entries[1].entry.oid,
6869 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6870 .expect("test operation should succeed")
6871 );
6872 }
6873
6874 #[test]
6875 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6876 let base = b"hello";
6877 let result = b"hello world";
6878 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6879 assert!(PackFile::parse_sha1(&pack).is_err());
6880
6881 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6882 .expect("test operation should succeed");
6883 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6884 if oid == &base_oid {
6885 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6886 } else {
6887 Ok(None)
6888 }
6889 })
6890 .expect("test operation should succeed");
6891 assert_eq!(parsed.entries.len(), 1);
6892 assert_eq!(parsed.entries[0].object.body, result);
6893 assert_eq!(
6894 parsed.entries[0].entry.oid,
6895 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6896 .expect("test operation should succeed")
6897 );
6898 }
6899
6900 #[test]
6901 fn rejects_bad_pack_checksum() {
6902 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6903 let last = pack.len() - 1;
6904 pack[last] ^= 1;
6905 assert!(PackFile::parse_sha1(&pack).is_err());
6906 }
6907
6908 #[test]
6909 fn raw_pack_index_rejects_bad_pack_checksum() {
6910 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6911 let last = pack.len() - 1;
6912 pack[last] ^= 1;
6913 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6914 }
6915
6916 #[test]
6917 fn pack_index_writer_rejects_duplicate_object_ids() {
6918 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6919 .expect("test operation should succeed");
6920 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6921 .expect("test operation should succeed");
6922 let entries = vec![
6923 PackIndexEntry {
6924 oid,
6925 crc32: 1,
6926 offset: 12,
6927 },
6928 PackIndexEntry {
6929 oid,
6930 crc32: 2,
6931 offset: 24,
6932 },
6933 ];
6934 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6935 }
6936
6937 #[test]
6938 fn parses_single_entry_pack_index() {
6939 let oid = ObjectId::from_hex(
6940 ObjectFormat::Sha1,
6941 "ce013625030ba8dba906f756967f9e9ca394464a",
6942 )
6943 .expect("test operation should succeed");
6944 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6945 .expect("test operation should succeed");
6946 let index = single_entry_index(
6947 ObjectFormat::Sha1,
6948 oid,
6949 0x1234_5678,
6950 12,
6951 pack_checksum.clone(),
6952 );
6953 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6954 assert_eq!(parsed.version, 2);
6955 assert_eq!(parsed.pack_checksum, pack_checksum);
6956 assert_eq!(parsed.entries.len(), 1);
6957 assert_eq!(
6958 parsed
6959 .find(&oid)
6960 .expect("test operation should succeed")
6961 .offset,
6962 12
6963 );
6964 assert_eq!(
6965 parsed
6966 .find(&oid)
6967 .expect("test operation should succeed")
6968 .crc32,
6969 0x1234_5678
6970 );
6971 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6972 }
6973
6974 #[test]
6975 fn parses_single_entry_pack_index_v1() {
6976 let oid = ObjectId::from_hex(
6977 ObjectFormat::Sha1,
6978 "ce013625030ba8dba906f756967f9e9ca394464a",
6979 )
6980 .expect("test operation should succeed");
6981 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6982 .expect("test operation should succeed");
6983 let index =
6984 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6985 let parsed =
6986 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6987 assert_eq!(parsed.version, 1);
6988 assert_eq!(parsed.pack_checksum, pack_checksum);
6989 assert_eq!(parsed.entries.len(), 1);
6990 assert_eq!(
6991 parsed
6992 .find(&oid)
6993 .expect("test operation should succeed")
6994 .offset,
6995 0x1234_5678
6996 );
6997 assert_eq!(
6998 parsed
6999 .find(&oid)
7000 .expect("test operation should succeed")
7001 .crc32,
7002 0
7003 );
7004 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7005 }
7006
7007 #[test]
7008 fn rejects_bad_pack_index_v1_checksum() {
7009 let oid = ObjectId::from_hex(
7010 ObjectFormat::Sha1,
7011 "ce013625030ba8dba906f756967f9e9ca394464a",
7012 )
7013 .expect("test operation should succeed");
7014 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7015 .expect("test operation should succeed");
7016 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
7017 let last = index.len() - 1;
7018 index[last] ^= 1;
7019 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
7020 }
7021
7022 #[test]
7023 fn pack_index_view_reads_v2_large_offsets() {
7024 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
7025 .expect("test operation should succeed");
7026 let second =
7027 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
7028 .expect("test operation should succeed");
7029 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7030 .expect("test operation should succeed");
7031 let entries = vec![
7032 PackIndexEntry {
7033 oid: first,
7034 crc32: 0x1111_2222,
7035 offset: 0x8000_0000,
7036 },
7037 PackIndexEntry {
7038 oid: second,
7039 crc32: 0x3333_4444,
7040 offset: 0x1_0000_0042,
7041 },
7042 ];
7043 let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
7044 .expect("test operation should succeed");
7045
7046 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7047 let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
7048 .expect("test operation should succeed");
7049 for entry in entries {
7050 assert_eq!(
7051 view.find(&entry.oid),
7052 Some(PackIndexLookup {
7053 crc32: entry.crc32,
7054 offset: entry.offset,
7055 })
7056 );
7057 }
7058 }
7059
7060 #[test]
7061 fn pack_index_view_default_parse_checks_index_checksum() {
7062 let oid = ObjectId::from_hex(
7063 ObjectFormat::Sha1,
7064 "ce013625030ba8dba906f756967f9e9ca394464a",
7065 )
7066 .expect("test operation should succeed");
7067 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7068 .expect("test operation should succeed");
7069 let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
7070 let last = index.len() - 1;
7071 index[last] ^= 1;
7072
7073 assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
7074 let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
7075 .expect("test operation should succeed");
7076 let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
7077 Arc::from(index.clone().into_boxed_slice()),
7078 ObjectFormat::Sha1,
7079 )
7080 .expect("test operation should succeed");
7081 assert_eq!(
7082 view.find(&oid),
7083 Some(PackIndexLookup {
7084 crc32: 0x1234_5678,
7085 offset: 12,
7086 })
7087 );
7088 assert_eq!(
7089 trusted_view.find(&oid),
7090 Some(PackIndexLookup {
7091 crc32: 0x1234_5678,
7092 offset: 12,
7093 })
7094 );
7095 }
7096
7097 #[test]
7098 fn parses_pack_reverse_index() {
7099 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7100 .expect("test operation should succeed");
7101 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
7102 .expect("test operation should succeed");
7103 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
7104 .expect("test operation should succeed");
7105 assert_eq!(parsed.version, 1);
7106 assert_eq!(parsed.format, ObjectFormat::Sha1);
7107 assert_eq!(parsed.positions, vec![2, 0, 1]);
7108 assert_eq!(parsed.pack_checksum, pack_checksum);
7109 assert_eq!(
7110 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
7111 .expect("test operation should succeed"),
7112 reverse_index
7113 );
7114 }
7115
7116 #[test]
7117 fn rejects_bad_pack_reverse_index_checksum() {
7118 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7119 .expect("test operation should succeed");
7120 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
7121 .expect("test operation should succeed");
7122 let last = reverse_index.len() - 1;
7123 reverse_index[last] ^= 1;
7124 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
7125 }
7126
7127 #[test]
7128 fn rejects_bad_pack_reverse_index_positions() {
7129 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7130 .expect("test operation should succeed");
7131 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
7132 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
7133 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
7134 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
7135 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7136 .expect("test operation should succeed");
7137 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
7138 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
7139 }
7140
7141 #[test]
7142 fn parses_pack_mtimes() {
7143 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7144 .expect("test operation should succeed");
7145 let mtimes = PackMtimes::write(
7146 ObjectFormat::Sha1,
7147 &[1, 1_700_000_000, u32::MAX],
7148 &pack_checksum,
7149 )
7150 .expect("test operation should succeed");
7151 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
7152 .expect("test operation should succeed");
7153 assert_eq!(parsed.version, 1);
7154 assert_eq!(parsed.format, ObjectFormat::Sha1);
7155 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
7156 assert_eq!(parsed.pack_checksum, pack_checksum);
7157 assert_eq!(
7158 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
7159 .expect("test operation should succeed"),
7160 mtimes
7161 );
7162 }
7163
7164 #[test]
7165 fn rejects_bad_pack_mtimes_checksum() {
7166 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7167 .expect("test operation should succeed");
7168 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
7169 .expect("test operation should succeed");
7170 let last = mtimes.len() - 1;
7171 mtimes[last] ^= 1;
7172 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
7173 }
7174
7175 #[test]
7176 fn rejects_bad_pack_mtimes_shape() {
7177 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7178 .expect("test operation should succeed");
7179 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
7180 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
7181
7182 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
7183 wrong_hash[11] = 2;
7184 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
7185 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
7186 .expect("test operation should succeed");
7187 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
7188 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
7189 }
7190
7191 #[test]
7192 fn parses_multi_pack_index_header_and_chunk_lookup() {
7193 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7194 .expect("test operation should succeed");
7195 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7196 .expect("test operation should succeed");
7197 let chunks = midx_chunks_with_pack_names(
7198 ObjectFormat::Sha1,
7199 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7200 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
7201 );
7202 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
7203 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
7204 .expect("test operation should succeed");
7205 assert_eq!(parsed.version, 2);
7206 assert_eq!(parsed.format, ObjectFormat::Sha1);
7207 assert_eq!(parsed.pack_count, 2);
7208 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
7209 assert_eq!(parsed.object_count, 2);
7210 assert_eq!(parsed.objects.len(), 2);
7211 assert_eq!(
7212 parsed
7213 .find(&first)
7214 .expect("test operation should succeed")
7215 .pack_int_id,
7216 0
7217 );
7218 assert_eq!(
7219 parsed
7220 .find(&first)
7221 .expect("test operation should succeed")
7222 .offset,
7223 12
7224 );
7225 assert_eq!(
7226 parsed
7227 .find(&second)
7228 .expect("test operation should succeed")
7229 .pack_int_id,
7230 1
7231 );
7232 assert_eq!(
7233 parsed
7234 .find(&second)
7235 .expect("test operation should succeed")
7236 .offset,
7237 0x1_0000_0000
7238 );
7239 assert_eq!(parsed.reverse_index, None);
7240 assert_eq!(parsed.bitmapped_packs, None);
7241 assert_eq!(parsed.chunks.len(), 5);
7242 assert_eq!(parsed.chunks[0].id, *b"PNAM");
7243 assert_eq!(parsed.chunks[0].offset, 84);
7244 assert_eq!(parsed.chunks[0].len, 24);
7245 assert_eq!(parsed.chunks[1].id, *b"OIDF");
7246 assert_eq!(parsed.chunks[1].offset, 108);
7247 assert_eq!(parsed.chunks[1].len, 1024);
7248 }
7249
7250 #[test]
7251 fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
7252 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7253 .expect("test operation should succeed");
7254 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7255 .expect("test operation should succeed");
7256 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7257 .expect("test operation should succeed");
7258 let chunks = midx_chunks_with_pack_names(
7259 ObjectFormat::Sha1,
7260 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7261 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
7262 );
7263 let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
7264 let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
7265 .expect("test operation should succeed");
7266
7267 assert!(lookup.contains(&first));
7268 assert!(lookup.contains(&second));
7269 assert!(!lookup.contains(&missing));
7270
7271 let first_entry = lookup
7272 .find(&first)
7273 .expect("test operation should succeed")
7274 .expect("object should be present");
7275 assert_eq!(
7276 lookup.pack_name(first_entry.pack_int_id),
7277 Some("pack-a.idx")
7278 );
7279 assert_eq!(first_entry.offset, 12);
7280
7281 let second_entry = lookup
7282 .find(&second)
7283 .expect("test operation should succeed")
7284 .expect("object should be present");
7285 assert_eq!(
7286 lookup.pack_name(second_entry.pack_int_id),
7287 Some("pack-b.idx")
7288 );
7289 assert_eq!(second_entry.offset, 0x1_0000_0000);
7290 assert!(
7291 lookup
7292 .find(&missing)
7293 .expect("test operation should succeed")
7294 .is_none()
7295 );
7296 }
7297
7298 #[test]
7299 fn rejects_bad_multi_pack_index_checksum() {
7300 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
7301 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
7302 let last = midx.len() - 1;
7303 midx[last] ^= 1;
7304 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
7305 }
7306
7307 #[test]
7308 fn rejects_bad_multi_pack_index_shape() {
7309 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
7310 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
7311 wrong_hash[5] = 2;
7312 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
7313 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
7314 .expect("test operation should succeed");
7315 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
7316 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
7317
7318 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
7319 missing_terminator[12] = b'B';
7320 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
7321 let checksum =
7322 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
7323 .expect("test operation should succeed");
7324 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
7325 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
7326
7327 let mut bad_offset = multi_pack_index(
7328 ObjectFormat::Sha1,
7329 2,
7330 0,
7331 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
7332 );
7333 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
7334 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
7335 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
7336 .expect("test operation should succeed");
7337 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
7338 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
7339 }
7340
7341 #[test]
7342 fn rejects_bad_multi_pack_index_pack_names() {
7343 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
7344 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
7345
7346 let too_few = multi_pack_index(
7347 ObjectFormat::Sha1,
7348 2,
7349 2,
7350 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
7351 );
7352 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
7353
7354 let bad_padding = multi_pack_index(
7355 ObjectFormat::Sha1,
7356 2,
7357 1,
7358 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
7359 );
7360 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
7361
7362 let unsorted_v1 = multi_pack_index(
7363 ObjectFormat::Sha1,
7364 1,
7365 2,
7366 &midx_chunks_with_pack_names(
7367 ObjectFormat::Sha1,
7368 b"pack-b.idx\0pack-a.idx\0".to_vec(),
7369 &[],
7370 ),
7371 );
7372 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
7373
7374 let unsorted_v2 = multi_pack_index(
7375 ObjectFormat::Sha1,
7376 2,
7377 2,
7378 &midx_chunks_with_pack_names(
7379 ObjectFormat::Sha1,
7380 b"pack-b.idx\0pack-a.idx\0".to_vec(),
7381 &[],
7382 ),
7383 );
7384 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
7385 .expect("test operation should succeed");
7386 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
7387 }
7388
7389 #[test]
7390 fn rejects_bad_multi_pack_index_object_tables() {
7391 let oid_a = ObjectId::from_hex(
7392 ObjectFormat::Sha1,
7393 "1111111111111111111111111111111111111111",
7394 )
7395 .expect("test operation should succeed");
7396 let oid_b = ObjectId::from_hex(
7397 ObjectFormat::Sha1,
7398 "2222222222222222222222222222222222222222",
7399 )
7400 .expect("test operation should succeed");
7401
7402 let missing_oidf = multi_pack_index(
7403 ObjectFormat::Sha1,
7404 2,
7405 1,
7406 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
7407 );
7408 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
7409
7410 let bad_fanout = vec![
7411 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
7412 (*b"OIDF", vec![0; 256 * 4]),
7413 (*b"OIDL", oid_a.as_bytes().to_vec()),
7414 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
7415 ];
7416 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
7417 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
7418
7419 let mut unsorted = Vec::new();
7420 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
7421 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
7422 let mut oid_lookup = Vec::new();
7423 oid_lookup.extend_from_slice(oid_b.as_bytes());
7424 oid_lookup.extend_from_slice(oid_a.as_bytes());
7425 unsorted.push((*b"OIDL", oid_lookup));
7426 unsorted.push((
7427 *b"OOFF",
7428 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
7429 ));
7430 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
7431 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
7432
7433 let bad_pack = multi_pack_index(
7434 ObjectFormat::Sha1,
7435 2,
7436 1,
7437 &midx_chunks_with_pack_names(
7438 ObjectFormat::Sha1,
7439 b"pack-a.idx\0\0".to_vec(),
7440 &[(oid_a.clone(), 1, 12)],
7441 ),
7442 );
7443 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
7444
7445 let mut large_offsets = Vec::new();
7446 let missing_loff = vec![
7447 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
7448 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
7449 (*b"OIDL", oid_a.as_bytes().to_vec()),
7450 (
7451 *b"OOFF",
7452 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
7453 ),
7454 ];
7455 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
7456 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
7457
7458 let mut bad_loff =
7459 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
7460 bad_loff.push((*b"LOFF", vec![0]));
7461 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
7462 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
7463 }
7464
7465 #[test]
7466 fn parses_multi_pack_index_bitmap_chunks() {
7467 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7468 .expect("test operation should succeed");
7469 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7470 .expect("test operation should succeed");
7471 let mut chunks = midx_chunks_with_pack_names(
7472 ObjectFormat::Sha1,
7473 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7474 &[(first, 0, 12), (second, 1, 24)],
7475 );
7476 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
7477 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
7478 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
7479
7480 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
7481 .expect("test operation should succeed");
7482 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
7483 assert_eq!(
7484 parsed.bitmapped_packs,
7485 Some(vec![
7486 MultiPackBitmapPack {
7487 bitmap_pos: 0,
7488 bitmap_nr: 1,
7489 },
7490 MultiPackBitmapPack {
7491 bitmap_pos: 1,
7492 bitmap_nr: 1,
7493 },
7494 ])
7495 );
7496 }
7497
7498 #[test]
7499 fn writes_multi_pack_index_that_round_trips() {
7500 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7501 .expect("test operation should succeed");
7502 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7503 .expect("test operation should succeed");
7504 let bytes = MultiPackIndex::write(
7505 ObjectFormat::Sha1,
7506 2,
7507 &["pack-b.idx".into(), "pack-a.idx".into()],
7508 &[
7509 MultiPackIndexEntry {
7510 oid: second.clone(),
7511 pack_int_id: 0,
7512 offset: 0x1_0000_0000,
7513 force_large_offset: false,
7514 },
7515 MultiPackIndexEntry {
7516 oid: first.clone(),
7517 pack_int_id: 1,
7518 offset: 12,
7519 force_large_offset: false,
7520 },
7521 ],
7522 )
7523 .expect("test operation should succeed");
7524
7525 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
7526 .expect("test operation should succeed");
7527 assert_eq!(parsed.version, 2);
7528 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
7529 assert_eq!(parsed.object_count, 2);
7530 assert_eq!(
7531 parsed
7532 .find(&first)
7533 .expect("test operation should succeed")
7534 .pack_int_id,
7535 1
7536 );
7537 assert_eq!(
7538 parsed
7539 .find(&first)
7540 .expect("test operation should succeed")
7541 .offset,
7542 12
7543 );
7544 assert_eq!(
7545 parsed
7546 .find(&second)
7547 .expect("test operation should succeed")
7548 .pack_int_id,
7549 0
7550 );
7551 assert_eq!(
7552 parsed
7553 .find(&second)
7554 .expect("test operation should succeed")
7555 .offset,
7556 0x1_0000_0000
7557 );
7558 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
7559 }
7560
7561 #[test]
7562 fn write_multi_pack_index_rejects_invalid_inputs() {
7563 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
7564 .expect("test operation should succeed");
7565 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
7566 assert!(
7567 MultiPackIndex::write(
7568 ObjectFormat::Sha1,
7569 1,
7570 &["pack-b.idx".into(), "pack-a.idx".into()],
7571 &[],
7572 )
7573 .is_err()
7574 );
7575 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
7576 assert!(
7577 MultiPackIndex::write(
7578 ObjectFormat::Sha1,
7579 2,
7580 &["pack-a.idx".into()],
7581 &[MultiPackIndexEntry {
7582 oid,
7583 pack_int_id: 1,
7584 offset: 12,
7585 force_large_offset: false,
7586 }],
7587 )
7588 .is_err()
7589 );
7590 assert!(
7591 MultiPackIndex::write(
7592 ObjectFormat::Sha1,
7593 2,
7594 &["pack-a.idx".into()],
7595 &[
7596 MultiPackIndexEntry {
7597 oid,
7598 pack_int_id: 0,
7599 offset: 12,
7600 force_large_offset: false,
7601 },
7602 MultiPackIndexEntry {
7603 oid,
7604 pack_int_id: 0,
7605 offset: 24,
7606 force_large_offset: false,
7607 },
7608 ],
7609 )
7610 .is_err()
7611 );
7612 }
7613
7614 #[test]
7615 fn rejects_bad_multi_pack_index_bitmap_chunks() {
7616 let oid_a = ObjectId::from_hex(
7617 ObjectFormat::Sha1,
7618 "1111111111111111111111111111111111111111",
7619 )
7620 .expect("test operation should succeed");
7621 let oid_b = ObjectId::from_hex(
7622 ObjectFormat::Sha1,
7623 "2222222222222222222222222222222222222222",
7624 )
7625 .expect("test operation should succeed");
7626
7627 let mut duplicate_ridx = midx_chunks_with_pack_names(
7628 ObjectFormat::Sha1,
7629 b"pack-a.idx\0\0".to_vec(),
7630 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
7631 );
7632 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
7633 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
7634 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
7635
7636 let mut short_btmp = midx_chunks_with_pack_names(
7637 ObjectFormat::Sha1,
7638 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7639 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
7640 );
7641 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
7642 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
7643 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
7644
7645 let mut out_of_range_btmp = midx_chunks_with_pack_names(
7646 ObjectFormat::Sha1,
7647 b"pack-a.idx\0\0".to_vec(),
7648 &[(oid_a, 0, 12), (oid_b, 0, 24)],
7649 );
7650 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
7651 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
7652 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
7653 }
7654
7655 #[test]
7656 fn parses_pack_bitmap_index_with_hash_cache() {
7657 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7658 .expect("test operation should succeed");
7659 let bitmap = pack_bitmap_index(
7660 ObjectFormat::Sha1,
7661 3,
7662 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
7663 &pack_checksum,
7664 &[(2, 0, 1, &[0b101])],
7665 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
7666 );
7667
7668 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
7669 .expect("test operation should succeed");
7670 assert_eq!(parsed.version, 1);
7671 assert_eq!(parsed.format, ObjectFormat::Sha1);
7672 assert_eq!(
7673 parsed.options,
7674 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
7675 );
7676 assert_eq!(parsed.pack_checksum, pack_checksum);
7677 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
7678 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
7679 assert_eq!(parsed.entries.len(), 1);
7680 let entry = parsed
7681 .entry_for_index_position(2)
7682 .expect("test operation should succeed");
7683 assert_eq!(entry.xor_offset, 0);
7684 assert_eq!(entry.flags, 1);
7685 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
7686 assert_eq!(
7687 parsed.name_hash_cache,
7688 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
7689 );
7690 }
7691
7692 #[test]
7693 fn parses_pack_bitmap_index_sha256() {
7694 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7695 .expect("test operation should succeed");
7696 let bitmap = pack_bitmap_index(
7697 ObjectFormat::Sha256,
7698 2,
7699 PackBitmapIndex::OPTION_FULL_DAG,
7700 &pack_checksum,
7701 &[(0, 0, 0, &[0b11])],
7702 None,
7703 );
7704
7705 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
7706 .expect("test operation should succeed");
7707 assert_eq!(parsed.version, 1);
7708 assert_eq!(parsed.format, ObjectFormat::Sha256);
7709 assert_eq!(parsed.pack_checksum, pack_checksum);
7710 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7711 assert_eq!(parsed.entries[0].object_position, 0);
7712 assert_eq!(parsed.name_hash_cache, None);
7713 }
7714
7715 #[test]
7716 fn parses_upstream_git_written_pack_bitmap_index() {
7717 let root = unique_temp_dir("git-pack-bitmap-upstream");
7718 fs::create_dir_all(&root).expect("test operation should succeed");
7719 {
7720 run_git_success(&root, &["init", "-q", "-b", "main"]);
7721 run_git_success(
7722 &root,
7723 &[
7724 "-c",
7725 "user.name=Example User",
7726 "-c",
7727 "user.email=example@example.invalid",
7728 "commit",
7729 "--allow-empty",
7730 "-q",
7731 "-m",
7732 "one",
7733 ],
7734 );
7735 run_git_success(
7736 &root,
7737 &[
7738 "-c",
7739 "user.name=Example User",
7740 "-c",
7741 "user.email=example@example.invalid",
7742 "commit",
7743 "--allow-empty",
7744 "-q",
7745 "-m",
7746 "two",
7747 ],
7748 );
7749 run_git_success(&root, &["repack", "-adb"]);
7750 let pack_dir = root.join(".git").join("objects").join("pack");
7751 let idx_path = single_path_with_extension(&pack_dir, "idx");
7752 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
7753 let index = PackIndex::parse(
7754 &fs::read(idx_path).expect("test operation should succeed"),
7755 ObjectFormat::Sha1,
7756 )
7757 .expect("test operation should succeed");
7758 let bitmap = PackBitmapIndex::parse(
7759 &fs::read(bitmap_path).expect("test operation should succeed"),
7760 ObjectFormat::Sha1,
7761 index.entries.len(),
7762 )
7763 .expect("test operation should succeed");
7764 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
7765 assert!(!bitmap.entries.is_empty());
7766 };
7767 let _ = fs::remove_dir_all(&root);
7768 }
7769
7770 #[test]
7771 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
7772 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7773 .expect("test operation should succeed");
7774 let bitmap = pack_bitmap_index(
7775 ObjectFormat::Sha1,
7776 1,
7777 PackBitmapIndex::OPTION_FULL_DAG,
7778 &pack_checksum,
7779 &[(0, 0, 0, &[1])],
7780 None,
7781 );
7782
7783 let mut bad_signature = bitmap.clone();
7784 bad_signature[0] = b'X';
7785 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
7786
7787 let mut bad_version = bitmap.clone();
7788 bad_version[5] = 2;
7789 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
7790 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
7791
7792 let mut bad_option = bitmap.clone();
7793 bad_option[7] = 0x20;
7794 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
7795 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
7796
7797 let mut bad_checksum = bitmap;
7798 let last = bad_checksum.len() - 1;
7799 bad_checksum[last] ^= 1;
7800 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
7801 }
7802
7803 #[test]
7804 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
7805 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7806 .expect("test operation should succeed");
7807 let bitmap = pack_bitmap_index(
7808 ObjectFormat::Sha1,
7809 2,
7810 PackBitmapIndex::OPTION_FULL_DAG,
7811 &pack_checksum,
7812 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7813 None,
7814 );
7815
7816 let mut truncated = bitmap.clone();
7817 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7818 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7819 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7820
7821 let mut out_of_range_position = pack_bitmap_index(
7822 ObjectFormat::Sha1,
7823 2,
7824 PackBitmapIndex::OPTION_FULL_DAG,
7825 &pack_checksum,
7826 &[(2, 0, 0, &[0b01])],
7827 None,
7828 );
7829 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7830 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7831 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7832
7833 let invalid_xor = pack_bitmap_index(
7834 ObjectFormat::Sha1,
7835 2,
7836 PackBitmapIndex::OPTION_FULL_DAG,
7837 &pack_checksum,
7838 &[(0, 1, 0, &[0b01])],
7839 None,
7840 );
7841 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7842 }
7843
7844 #[test]
7845 fn parses_single_entry_pack_index_sha256() {
7846 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7847 .expect("test operation should succeed");
7848 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7849 .expect("test operation should succeed");
7850 let index = single_entry_index(
7851 ObjectFormat::Sha256,
7852 oid,
7853 0x1234_5678,
7854 12,
7855 pack_checksum.clone(),
7856 );
7857 let parsed =
7858 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7859 assert_eq!(parsed.version, 2);
7860 assert_eq!(parsed.pack_checksum, pack_checksum);
7861 assert_eq!(parsed.entries.len(), 1);
7862 assert_eq!(
7863 parsed
7864 .find(&oid)
7865 .expect("test operation should succeed")
7866 .offset,
7867 12
7868 );
7869 assert_eq!(
7870 parsed
7871 .find(&oid)
7872 .expect("test operation should succeed")
7873 .crc32,
7874 0x1234_5678
7875 );
7876 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7877 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7878 }
7879
7880 #[test]
7881 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7882 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7883 }
7884
7885 #[test]
7886 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7887 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7888 }
7889
7890 #[test]
7891 fn write_packed_rejects_duplicate_objects() {
7892 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7893 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7894 }
7895
7896 #[test]
7897 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7898 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7899 let sha1 = object
7900 .object_id(ObjectFormat::Sha1)
7901 .expect("test operation should succeed");
7902 let sha256 = object
7903 .object_id(ObjectFormat::Sha256)
7904 .expect("test operation should succeed");
7905 let duplicate = [
7906 PackInput {
7907 oid: &sha1,
7908 object: &object,
7909 },
7910 PackInput {
7911 oid: &sha1,
7912 object: &object,
7913 },
7914 ];
7915 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7916
7917 let wrong_format = [PackInput {
7918 oid: &sha256,
7919 object: &object,
7920 }];
7921 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7922 }
7923
7924 #[test]
7925 fn write_packed_with_known_ids_to_writer_matches_in_memory_pack() {
7926 let objects = similar_blob_family(6);
7927 let object_ids = objects
7928 .iter()
7929 .map(|object| {
7930 object
7931 .object_id(ObjectFormat::Sha1)
7932 .expect("test operation should succeed")
7933 })
7934 .collect::<Vec<_>>();
7935 let inputs = objects
7936 .iter()
7937 .zip(&object_ids)
7938 .map(|(object, oid)| PackInput { oid, object })
7939 .collect::<Vec<_>>();
7940 let options = PackWriteOptions::new();
7941 let in_memory = PackFile::write_packed_with_known_ids_and_options(
7942 &inputs,
7943 ObjectFormat::Sha1,
7944 &options,
7945 )
7946 .expect("test operation should succeed");
7947 let mut written = Vec::new();
7948 let streamed = PackFile::write_packed_with_known_ids_to_writer(
7949 &inputs,
7950 ObjectFormat::Sha1,
7951 &options,
7952 &mut written,
7953 )
7954 .expect("test operation should succeed");
7955
7956 assert_eq!(written, in_memory.pack);
7957 assert_eq!(streamed.index, in_memory.index);
7958 assert_eq!(streamed.checksum, in_memory.checksum);
7959 assert_eq!(streamed.entries, in_memory.entries);
7960 assert_eq!(streamed.delta_count, in_memory.delta_count);
7961 assert_eq!(streamed.pack_size, in_memory.pack.len() as u64);
7962 }
7963
7964 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7965 let objects = similar_blob_family(8);
7966 let packed =
7967 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7968 let undeltified =
7969 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7970
7971 assert!(
7974 packed.pack.len() < undeltified.pack.len(),
7975 "expected delta pack ({}) smaller than undeltified pack ({})",
7976 packed.pack.len(),
7977 undeltified.pack.len()
7978 );
7979
7980 let kinds = pack_entry_kinds(&packed.pack, format);
7982 let delta_count = kinds
7983 .iter()
7984 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7985 .count();
7986 assert!(
7987 delta_count >= 1,
7988 "expected at least one delta entry, found kinds {kinds:?}"
7989 );
7990
7991 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7993 assert_eq!(parsed.entries.len(), objects.len());
7994 for object in &objects {
7995 let oid = object
7996 .object_id(format)
7997 .expect("test operation should succeed");
7998 let found = parsed
7999 .entries
8000 .iter()
8001 .find(|entry| entry.entry.oid == oid)
8002 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
8003 assert_eq!(&found.object, object, "object {oid} did not round-trip");
8004 }
8005
8006 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
8008 assert_eq!(index.pack_checksum, packed.checksum);
8009 for object in &objects {
8010 let oid = object
8011 .object_id(format)
8012 .expect("test operation should succeed");
8013 assert!(index.find(&oid).is_some(), "index missing {oid}");
8014 }
8015 }
8016
8017 #[test]
8018 fn write_packed_emits_ofs_delta_by_default() {
8019 let objects = similar_blob_family(6);
8020 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
8021 .expect("test operation should succeed");
8022 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
8023 assert!(
8024 kinds.contains(&PackObjectKind::OfsDelta),
8025 "expected an ofs-delta entry by default, found {kinds:?}"
8026 );
8027 assert!(
8028 !kinds.contains(&PackObjectKind::RefDelta),
8029 "default self-contained pack must not use ref-delta, found {kinds:?}"
8030 );
8031 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
8033 }
8034
8035 #[test]
8036 fn write_packed_can_emit_ref_delta() {
8037 let objects = similar_blob_family(6);
8038 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
8039 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
8040 .expect("test operation should succeed");
8041 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
8042 assert!(
8043 kinds.contains(&PackObjectKind::RefDelta),
8044 "expected a ref-delta entry, found {kinds:?}"
8045 );
8046 assert!(
8047 !kinds.contains(&PackObjectKind::OfsDelta),
8048 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
8049 );
8050
8051 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
8054 .expect("test operation should succeed");
8055 assert_eq!(parsed.entries.len(), objects.len());
8056 }
8057
8058 #[test]
8059 fn write_packed_bounds_delta_chain_depth() {
8060 let objects = incremental_blob_chain(20);
8064 let format = ObjectFormat::Sha1;
8065
8066 for max_depth in [1usize, 2, 5] {
8067 let options = PackWriteOptions::new()
8068 .with_window(20)
8069 .with_depth(max_depth);
8070 let packed = PackFile::write_packed_with_options(&objects, format, &options)
8071 .expect("test operation should succeed");
8072
8073 let depths = pack_entry_depths(&packed.pack, format);
8074 let observed = depths.iter().copied().max().unwrap_or(0);
8075 assert!(
8076 observed <= max_depth,
8077 "max chain depth {observed} exceeded bound {max_depth}"
8078 );
8079
8080 let parsed =
8082 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
8083 for object in &objects {
8084 let oid = object
8085 .object_id(format)
8086 .expect("test operation should succeed");
8087 let found = parsed
8088 .entries
8089 .iter()
8090 .find(|entry| entry.entry.oid == oid)
8091 .expect("test operation should succeed");
8092 assert_eq!(&found.object, object);
8093 }
8094 }
8095 }
8096
8097 #[test]
8098 fn write_packed_depth_zero_stores_everything_undeltified() {
8099 let objects = similar_blob_family(5);
8100 let options = PackWriteOptions::new().with_depth(0);
8101 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
8102 .expect("test operation should succeed");
8103 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
8104 assert!(
8105 kinds
8106 .iter()
8107 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
8108 "depth 0 must disable deltas, found {kinds:?}"
8109 );
8110 }
8111
8112 #[test]
8113 fn write_thin_uses_external_base_and_round_trips_sha1() {
8114 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
8115 }
8116
8117 #[test]
8118 fn write_thin_uses_external_base_and_round_trips_sha256() {
8119 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
8120 }
8121
8122 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
8123 let base = blob_with_marker("EXTERNAL-BASE");
8126 let target = blob_with_marker("EXTERNAL-TARGET");
8127 let base_oid = base
8128 .object_id(format)
8129 .expect("test operation should succeed");
8130
8131 let mut external = HashMap::new();
8132 external.insert(base_oid, base.clone());
8133 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
8134 .expect("test operation should succeed");
8135
8136 let kinds = pack_entry_kinds(&packed.pack, format);
8138 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
8139
8140 let mut offset = 12usize;
8142 let header =
8143 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
8144 assert_eq!(header.kind, PackObjectKind::RefDelta);
8145 let referenced =
8146 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
8147 .expect("test operation should succeed");
8148 assert_eq!(referenced, base_oid);
8149
8150 assert!(PackFile::parse(&packed.pack, format).is_err());
8152
8153 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
8155 if oid == &base_oid {
8156 Ok(Some(base.clone()))
8157 } else {
8158 Ok(None)
8159 }
8160 })
8161 .expect("test operation should succeed");
8162 assert_eq!(parsed.entries.len(), 1);
8163 assert_eq!(parsed.entries[0].object, target);
8164 }
8165
8166 #[test]
8167 fn write_packed_preserves_distinct_objects_with_no_similarity() {
8168 let objects = vec![
8171 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
8172 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
8173 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
8174 ];
8175 let format = ObjectFormat::Sha1;
8176 let packed =
8177 PackFile::write_packed(&objects, format).expect("test operation should succeed");
8178 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
8179 assert_eq!(parsed.entries.len(), objects.len());
8180 for object in &objects {
8181 let oid = object
8182 .object_id(format)
8183 .expect("test operation should succeed");
8184 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
8185 }
8186 }
8187
8188 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
8192 let mut common_head = Vec::new();
8193 for _ in 0..200 {
8194 common_head.extend_from_slice(b"shared header line for delta testing\n");
8195 }
8196 let mut common_tail = Vec::new();
8197 for _ in 0..200 {
8198 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
8199 }
8200 (0..count)
8201 .map(|idx| {
8202 let mut body = common_head.clone();
8203 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
8204 body.extend_from_slice(&common_tail);
8205 EncodedObject::new(ObjectType::Blob, body)
8206 })
8207 .collect()
8208 }
8209
8210 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
8213 let mut body = Vec::new();
8214 for _ in 0..100 {
8215 body.extend_from_slice(b"baseline content shared across the whole chain\n");
8216 }
8217 let mut objects = Vec::with_capacity(count);
8218 for idx in 0..count {
8219 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
8220 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
8221 }
8222 objects
8223 }
8224
8225 fn blob_with_marker(marker: &str) -> EncodedObject {
8226 let mut body = Vec::new();
8227 for _ in 0..150 {
8228 body.extend_from_slice(b"common body shared between base and target\n");
8229 }
8230 body.extend_from_slice(marker.as_bytes());
8231 body.push(b'\n');
8232 for _ in 0..150 {
8233 body.extend_from_slice(b"more common body shared between objects\n");
8234 }
8235 EncodedObject::new(ObjectType::Blob, body)
8236 }
8237
8238 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
8240 pack_entry_descriptors(pack, format)
8241 .into_iter()
8242 .map(|descriptor| descriptor.kind)
8243 .collect()
8244 }
8245
8246 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
8250 let descriptors = pack_entry_descriptors(pack, format);
8251 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
8252 let mut depths = Vec::with_capacity(descriptors.len());
8253 for descriptor in &descriptors {
8254 let depth = match &descriptor.base {
8255 EntryBase::None => 0,
8256 EntryBase::Offset(base_offset) => {
8257 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
8258 }
8259 EntryBase::Ref => 1,
8263 };
8264 depth_by_offset.insert(descriptor.offset, depth);
8265 depths.push(depth);
8266 }
8267 depths
8268 }
8269
8270 struct EntryDescriptor {
8271 offset: u64,
8272 kind: PackObjectKind,
8273 base: EntryBase,
8274 }
8275
8276 enum EntryBase {
8277 None,
8278 Offset(u64),
8279 Ref,
8280 }
8281
8282 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
8283 let trailer_offset = pack.len() - format.raw_len();
8284 let count = u32_be(&pack[8..12]) as usize;
8285 let mut offset = 12usize;
8286 let mut descriptors = Vec::with_capacity(count);
8287 for _ in 0..count {
8288 let entry_offset = offset as u64;
8289 let header =
8290 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
8291 let base = match header.kind {
8292 PackObjectKind::OfsDelta => {
8293 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
8294 .expect("test operation should succeed");
8295 EntryBase::Offset(base_offset)
8296 }
8297 PackObjectKind::RefDelta => {
8298 offset += format.raw_len();
8299 EntryBase::Ref
8300 }
8301 _ => EntryBase::None,
8302 };
8303 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
8304 let mut body = Vec::new();
8305 decoder
8306 .read_to_end(&mut body)
8307 .expect("test operation should succeed");
8308 offset += decoder.total_in() as usize;
8309 descriptors.push(EntryDescriptor {
8310 offset: entry_offset,
8311 kind: header.kind,
8312 base,
8313 });
8314 }
8315 descriptors
8316 }
8317
8318 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
8319 let mut base = Vec::new();
8320 for _ in 0..300 {
8321 base.extend_from_slice(b"common payload\n");
8322 }
8323 base.extend_from_slice(b"base\n");
8324 let mut changed = Vec::new();
8325 for _ in 0..300 {
8326 changed.extend_from_slice(b"common payload\n");
8327 }
8328 changed.extend_from_slice(b"changed\n");
8329 (
8330 EncodedObject::new(ObjectType::Blob, base),
8331 EncodedObject::new(ObjectType::Blob, changed),
8332 )
8333 }
8334
8335 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
8336 let mut pack = Vec::new();
8337 pack.extend_from_slice(b"PACK");
8338 pack.extend_from_slice(&2u32.to_be_bytes());
8339 pack.extend_from_slice(&1u32.to_be_bytes());
8340 write_entry_header(&mut pack, object_type, body.len() as u64);
8341 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8342 encoder
8343 .write_all(body)
8344 .expect("test operation should succeed");
8345 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8346 let checksum =
8347 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
8348 pack.extend_from_slice(checksum.as_bytes());
8349 pack
8350 }
8351
8352 #[derive(Clone, Copy, Debug)]
8353 enum DeltaKind {
8354 Offset,
8355 Ref,
8356 }
8357
8358 fn two_object_delta_pack(
8359 format: ObjectFormat,
8360 base: &[u8],
8361 result: &[u8],
8362 delta_kind: DeltaKind,
8363 ) -> Vec<u8> {
8364 let mut pack = Vec::new();
8365 pack.extend_from_slice(b"PACK");
8366 pack.extend_from_slice(&2u32.to_be_bytes());
8367 pack.extend_from_slice(&2u32.to_be_bytes());
8368
8369 let base_offset = pack.len();
8370 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
8371 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8372 encoder
8373 .write_all(base)
8374 .expect("test operation should succeed");
8375 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8376
8377 let delta = append_suffix_delta(base, result);
8378 let delta_offset = pack.len();
8379 write_pack_entry_header_kind(
8380 &mut pack,
8381 match delta_kind {
8382 DeltaKind::Offset => 6,
8383 DeltaKind::Ref => 7,
8384 },
8385 delta.len() as u64,
8386 );
8387 match delta_kind {
8388 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
8389 DeltaKind::Ref => {
8390 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
8391 .expect("test operation should succeed");
8392 pack.extend_from_slice(base_oid.as_bytes());
8393 }
8394 }
8395 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8396 encoder
8397 .write_all(&delta)
8398 .expect("test operation should succeed");
8399 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8400
8401 let checksum =
8402 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
8403 pack.extend_from_slice(checksum.as_bytes());
8404 pack
8405 }
8406
8407 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
8408 let mut pack = Vec::new();
8409 pack.extend_from_slice(b"PACK");
8410 pack.extend_from_slice(&2u32.to_be_bytes());
8411 pack.extend_from_slice(&1u32.to_be_bytes());
8412
8413 let delta = append_suffix_delta(base, result);
8414 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
8415 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
8416 .expect("test operation should succeed");
8417 pack.extend_from_slice(base_oid.as_bytes());
8418 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8419 encoder
8420 .write_all(&delta)
8421 .expect("test operation should succeed");
8422 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8423
8424 let checksum =
8425 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
8426 pack.extend_from_slice(checksum.as_bytes());
8427 pack
8428 }
8429
8430 fn unique_temp_dir(name: &str) -> PathBuf {
8431 let nanos = SystemTime::now()
8432 .duration_since(UNIX_EPOCH)
8433 .expect("test operation should succeed")
8434 .as_nanos();
8435 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
8436 }
8437
8438 fn run_git_success(cwd: &Path, args: &[&str]) {
8439 let output = Command::new("git")
8440 .current_dir(cwd)
8441 .args(args)
8442 .output()
8443 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
8444 assert!(
8445 output.status.success(),
8446 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
8447 output.status.code(),
8448 String::from_utf8_lossy(&output.stdout),
8449 String::from_utf8_lossy(&output.stderr)
8450 );
8451 }
8452
8453 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
8454 let mut paths = fs::read_dir(dir)
8455 .expect("test operation should succeed")
8456 .map(|entry| entry.expect("test operation should succeed").path())
8457 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
8458 .collect::<Vec<_>>();
8459 assert_eq!(paths.len(), 1, "expected one .{extension} file");
8460 paths.remove(0)
8461 }
8462
8463 fn pack_bitmap_index(
8464 format: ObjectFormat,
8465 object_count: u32,
8466 options: u16,
8467 pack_checksum: &ObjectId,
8468 entries: &[(u32, u8, u8, &[u64])],
8469 name_hash_cache: Option<&[u32]>,
8470 ) -> Vec<u8> {
8471 let mut out = Vec::new();
8472 out.extend_from_slice(b"BITM");
8473 out.extend_from_slice(&1u16.to_be_bytes());
8474 out.extend_from_slice(&options.to_be_bytes());
8475 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
8476 out.extend_from_slice(pack_checksum.as_bytes());
8477 write_test_ewah(&mut out, object_count, &[0b001]);
8478 write_test_ewah(&mut out, object_count, &[0b010]);
8479 write_test_ewah(&mut out, object_count, &[0b100]);
8480 write_test_ewah(&mut out, object_count, &[0]);
8481 for (position, xor_offset, flags, words) in entries {
8482 out.extend_from_slice(&position.to_be_bytes());
8483 out.push(*xor_offset);
8484 out.push(*flags);
8485 write_test_ewah(&mut out, object_count, words);
8486 }
8487 if let Some(cache) = name_hash_cache {
8488 for value in cache {
8489 out.extend_from_slice(&value.to_be_bytes());
8490 }
8491 }
8492 let checksum =
8493 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8494 out.extend_from_slice(checksum.as_bytes());
8495 out
8496 }
8497
8498 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
8499 out.extend_from_slice(&bit_size.to_be_bytes());
8500 let words = ewah_literal_words(literals);
8501 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
8502 for word in words {
8503 out.extend_from_slice(&word.to_be_bytes());
8504 }
8505 out.extend_from_slice(&0u32.to_be_bytes());
8506 }
8507
8508 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
8509 let rlw = (literals.len() as u64) << 33;
8510 let mut words = vec![rlw];
8511 words.extend_from_slice(literals);
8512 words
8513 }
8514
8515 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
8516 let checksum_offset = bytes.len() - format.raw_len();
8517 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
8518 .expect("test operation should succeed");
8519 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
8520 }
8521
8522 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
8523 assert!(result.starts_with(base));
8524 let suffix = &result[base.len()..];
8525 assert!(base.len() < 0x10000);
8526 assert!(suffix.len() < 0x80);
8527 let mut delta = Vec::new();
8528 write_delta_varint(&mut delta, base.len() as u64);
8529 write_delta_varint(&mut delta, result.len() as u64);
8530 delta.push(0x90);
8531 delta.push(base.len() as u8);
8532 delta.push(suffix.len() as u8);
8533 delta.extend_from_slice(suffix);
8534 delta
8535 }
8536
8537 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
8538 loop {
8539 let mut byte = (value as u8) & 0x7f;
8540 value >>= 7;
8541 if value != 0 {
8542 byte |= 0x80;
8543 }
8544 out.push(byte);
8545 if value == 0 {
8546 break;
8547 }
8548 }
8549 }
8550
8551 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
8552 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
8553 size >>= 4;
8554 if size != 0 {
8555 byte |= 0x80;
8556 }
8557 out.push(byte);
8558 while size != 0 {
8559 let mut byte = (size as u8) & 0x7f;
8560 size >>= 7;
8561 if size != 0 {
8562 byte |= 0x80;
8563 }
8564 out.push(byte);
8565 }
8566 }
8567
8568 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
8569 assert!(relative < 0x80);
8570 out.push(relative as u8);
8571 }
8572
8573 fn single_entry_index(
8574 format: ObjectFormat,
8575 oid: ObjectId,
8576 crc32: u32,
8577 offset: u32,
8578 pack_checksum: ObjectId,
8579 ) -> Vec<u8> {
8580 let mut index = Vec::new();
8581 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
8582 index.extend_from_slice(&2u32.to_be_bytes());
8583 for idx in 0..256 {
8584 let count = if idx >= usize::from(oid.as_bytes()[0]) {
8585 1u32
8586 } else {
8587 0u32
8588 };
8589 index.extend_from_slice(&count.to_be_bytes());
8590 }
8591 index.extend_from_slice(oid.as_bytes());
8592 index.extend_from_slice(&crc32.to_be_bytes());
8593 index.extend_from_slice(&offset.to_be_bytes());
8594 index.extend_from_slice(pack_checksum.as_bytes());
8595 let checksum =
8596 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
8597 index.extend_from_slice(checksum.as_bytes());
8598 index
8599 }
8600
8601 fn single_entry_index_v1(
8602 format: ObjectFormat,
8603 oid: ObjectId,
8604 offset: u32,
8605 pack_checksum: ObjectId,
8606 ) -> Vec<u8> {
8607 let mut index = Vec::new();
8608 for idx in 0..256 {
8609 let count = if idx >= usize::from(oid.as_bytes()[0]) {
8610 1u32
8611 } else {
8612 0u32
8613 };
8614 index.extend_from_slice(&count.to_be_bytes());
8615 }
8616 index.extend_from_slice(&offset.to_be_bytes());
8617 index.extend_from_slice(oid.as_bytes());
8618 index.extend_from_slice(pack_checksum.as_bytes());
8619 let checksum =
8620 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
8621 index.extend_from_slice(checksum.as_bytes());
8622 index
8623 }
8624
8625 fn pack_reverse_index(
8626 format: ObjectFormat,
8627 positions: &[u32],
8628 pack_checksum: ObjectId,
8629 ) -> Vec<u8> {
8630 let mut reverse_index = Vec::new();
8631 reverse_index.extend_from_slice(b"RIDX");
8632 reverse_index.extend_from_slice(&1u32.to_be_bytes());
8633 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
8634 for position in positions {
8635 reverse_index.extend_from_slice(&position.to_be_bytes());
8636 }
8637 reverse_index.extend_from_slice(pack_checksum.as_bytes());
8638 let checksum =
8639 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
8640 reverse_index.extend_from_slice(checksum.as_bytes());
8641 reverse_index
8642 }
8643
8644 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
8645 let mut out = Vec::new();
8646 out.extend_from_slice(b"MTME");
8647 out.extend_from_slice(&1u32.to_be_bytes());
8648 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
8649 for mtime in mtimes {
8650 out.extend_from_slice(&mtime.to_be_bytes());
8651 }
8652 out.extend_from_slice(pack_checksum.as_bytes());
8653 let checksum =
8654 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8655 out.extend_from_slice(checksum.as_bytes());
8656 out
8657 }
8658
8659 fn midx_chunks_with_pack_names(
8660 _format: ObjectFormat,
8661 pack_names: Vec<u8>,
8662 entries: &[(ObjectId, u32, u64)],
8663 ) -> Vec<([u8; 4], Vec<u8>)> {
8664 let mut entries = entries.to_vec();
8665 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
8666 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
8667 let mut large_offsets = Vec::new();
8668 let mut chunks = vec![
8669 (*b"PNAM", pack_names),
8670 (*b"OIDF", midx_oid_fanout(&object_ids)),
8671 (*b"OIDL", midx_oid_lookup(&object_ids)),
8672 (
8673 *b"OOFF",
8674 midx_ooff_entries(
8675 &entries
8676 .iter()
8677 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
8678 .collect::<Vec<_>>(),
8679 &mut large_offsets,
8680 ),
8681 ),
8682 ];
8683 if !large_offsets.is_empty() {
8684 chunks.push((*b"LOFF", large_offsets));
8685 }
8686 chunks
8687 }
8688
8689 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
8690 let mut counts = [0u32; 256];
8691 for oid in object_ids {
8692 counts[oid.as_bytes()[0] as usize] += 1;
8693 }
8694 let mut running = 0u32;
8695 let mut out = Vec::new();
8696 for count in counts {
8697 running += count;
8698 out.extend_from_slice(&running.to_be_bytes());
8699 }
8700 out
8701 }
8702
8703 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
8704 let mut out = Vec::new();
8705 for oid in object_ids {
8706 out.extend_from_slice(oid.as_bytes());
8707 }
8708 out
8709 }
8710
8711 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
8712 let mut out = Vec::new();
8713 for (pack_int_id, offset) in entries {
8714 out.extend_from_slice(&pack_int_id.to_be_bytes());
8715 if *offset < 0x8000_0000 {
8716 out.extend_from_slice(&(*offset as u32).to_be_bytes());
8717 } else {
8718 let large_idx = (large_offsets.len() / 8) as u32;
8719 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
8720 large_offsets.extend_from_slice(&offset.to_be_bytes());
8721 }
8722 }
8723 out
8724 }
8725
8726 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
8727 let mut out = Vec::new();
8728 for value in values {
8729 out.extend_from_slice(&value.to_be_bytes());
8730 }
8731 out
8732 }
8733
8734 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
8735 let mut out = Vec::new();
8736 for (bitmap_pos, bitmap_nr) in entries {
8737 out.extend_from_slice(&bitmap_pos.to_be_bytes());
8738 out.extend_from_slice(&bitmap_nr.to_be_bytes());
8739 }
8740 out
8741 }
8742
8743 fn multi_pack_index(
8744 format: ObjectFormat,
8745 version: u8,
8746 pack_count: u32,
8747 chunks: &[([u8; 4], Vec<u8>)],
8748 ) -> Vec<u8> {
8749 let lookup_len = (chunks.len() + 1) * 12;
8750 let mut out = Vec::new();
8751 out.extend_from_slice(b"MIDX");
8752 out.push(version);
8753 out.push(hash_function_id(format) as u8);
8754 out.push(chunks.len() as u8);
8755 out.push(0);
8756 out.extend_from_slice(&pack_count.to_be_bytes());
8757 let mut chunk_offset = (12 + lookup_len) as u64;
8758 for (id, data) in chunks {
8759 out.extend_from_slice(id);
8760 out.extend_from_slice(&chunk_offset.to_be_bytes());
8761 chunk_offset += data.len() as u64;
8762 }
8763 out.extend_from_slice(&[0, 0, 0, 0]);
8764 out.extend_from_slice(&chunk_offset.to_be_bytes());
8765 for (_id, data) in chunks {
8766 out.extend_from_slice(data);
8767 }
8768 let checksum =
8769 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8770 out.extend_from_slice(checksum.as_bytes());
8771 out
8772 }
8773
8774 fn pack_checksum_sha1() -> ObjectId {
8777 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
8778 }
8779
8780 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
8781 let mut offset = 0usize;
8784 let checksum_offset = bytes.len();
8785 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
8786 .expect("test operation should succeed")
8787 }
8788
8789 #[test]
8790 fn ewah_encodes_single_literal_word_matching_helper() {
8791 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
8795 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
8796 assert_eq!(ewah.rlw_position, 0);
8797 assert_eq!(ewah.bit_size, 64);
8798 }
8799
8800 #[test]
8801 fn ewah_byte_layout_is_big_endian() {
8802 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
8803 .expect("test operation should succeed");
8804 let bytes = ewah.to_bytes();
8805 let mut expected = Vec::new();
8806 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
8810 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
8812 }
8813
8814 #[test]
8815 fn ewah_empty_bitmap_serialises_like_git() {
8816 let ewah = EwahBitmap::empty();
8817 let bytes = ewah.to_bytes();
8818 assert_eq!(bytes, vec![0u8; 12]);
8820 let parsed = parse_ewah_bytes(&bytes);
8822 assert_eq!(parsed, ewah);
8823 assert!(
8824 parsed
8825 .to_positions()
8826 .expect("test operation should succeed")
8827 .is_empty()
8828 );
8829 }
8830
8831 #[test]
8832 fn ewah_compresses_clean_zero_run() {
8833 let ewah =
8836 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
8837 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
8838 let rlw = ewah.words[0];
8839 assert_eq!(rlw & 1, 0, "run bit should be zero");
8840 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8841 assert_eq!(rlw >> 33, 1, "literal length should be 1");
8842 assert_eq!(ewah.words[1], 0b1);
8843 }
8844
8845 #[test]
8846 fn ewah_compresses_clean_ones_run() {
8847 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
8848 .expect("test operation should succeed");
8849 assert_eq!(ewah.words.len(), 1);
8851 let rlw = ewah.words[0];
8852 assert_eq!(rlw & 1, 1, "run bit should be one");
8853 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8854 assert_eq!(rlw >> 33, 0, "no literals");
8855 }
8856
8857 #[test]
8858 fn ewah_run_then_literal_then_run_roundtrips() {
8859 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8860 let bit_size = (words.len() * 64) as u32;
8861 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8862 assert_eq!(
8863 ewah.to_words().expect("test operation should succeed"),
8864 words
8865 );
8866 }
8867
8868 #[test]
8869 fn ewah_drops_trailing_clean_zero_words() {
8870 let words = vec![0b1, 0, 0, 0];
8873 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8874 assert_eq!(ewah.bit_size, 1);
8876 assert_eq!(
8877 ewah.to_words().expect("test operation should succeed"),
8878 vec![0b1]
8879 );
8880 }
8881
8882 #[test]
8883 fn ewah_from_positions_roundtrips_via_positions() {
8884 let positions = [0u32, 1, 63, 64, 65, 200, 511];
8885 let ewah =
8886 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8887 let mut decoded = ewah.to_positions().expect("test operation should succeed");
8888 decoded.sort_unstable();
8889 assert_eq!(decoded, positions);
8890 }
8891
8892 #[test]
8893 fn ewah_from_positions_dedupes_and_orders() {
8894 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8895 .expect("test operation should succeed");
8896 assert_eq!(
8897 ewah.to_positions().expect("test operation should succeed"),
8898 vec![5, 100]
8899 );
8900 }
8901
8902 #[test]
8903 fn ewah_huge_zero_run_spans_multiple_rlws() {
8904 let mut builder = EwahBuilder::new(0);
8909 builder.add_empty_words(false, 0xffff_ffff);
8910 builder.add_empty_words(false, 5);
8911 let ewah = builder.finish().expect("test operation should succeed");
8912 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8913 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8914 assert_eq!(ewah.words[1] & 1, 0);
8915 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8916 assert_eq!(ewah.rlw_position, 1);
8917 }
8918
8919 #[test]
8920 fn ewah_from_words_rejects_oversized_bit_size() {
8921 assert!(EwahBitmap::from_words(65, &[0]).is_err());
8923 }
8924
8925 #[test]
8926 fn ewah_from_positions_rejects_out_of_range() {
8927 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8928 }
8929
8930 #[test]
8931 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8932 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8935 let bit_size = (words.len() * 64) as u32;
8936 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8937 let bytes = ewah.to_bytes();
8938 let parsed = parse_ewah_bytes(&bytes);
8939 assert_eq!(parsed, ewah);
8940 assert_eq!(
8941 parsed.to_words().expect("test operation should succeed"),
8942 words
8943 );
8944 }
8945
8946 #[test]
8947 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8948 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8950 let bytes = write_bitmap(
8951 ObjectFormat::Sha1,
8952 pack_checksum_sha1(),
8953 &object_types,
8954 &[(0u32, 0u32, vec![1u32, 2u32])],
8955 None,
8956 )
8957 .expect("test operation should succeed");
8958 assert_eq!(&bytes[..4], b"BITM");
8959
8960 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8961 .expect("test operation should succeed");
8962 assert_eq!(parsed.version, 1);
8963 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8964 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8965 assert_eq!(
8966 parsed
8967 .type_bitmaps
8968 .commits
8969 .to_positions()
8970 .expect("test operation should succeed"),
8971 vec![0]
8972 );
8973 assert_eq!(
8974 parsed
8975 .type_bitmaps
8976 .trees
8977 .to_positions()
8978 .expect("test operation should succeed"),
8979 vec![1]
8980 );
8981 assert_eq!(
8982 parsed
8983 .type_bitmaps
8984 .blobs
8985 .to_positions()
8986 .expect("test operation should succeed"),
8987 vec![2]
8988 );
8989 assert!(
8990 parsed
8991 .type_bitmaps
8992 .tags
8993 .to_positions()
8994 .expect("test operation should succeed")
8995 .is_empty()
8996 );
8997 assert_eq!(parsed.entries.len(), 1);
8998 let entry = parsed
8999 .entry_for_index_position(0)
9000 .expect("test operation should succeed");
9001 assert_eq!(entry.xor_offset, 0);
9002 assert_eq!(entry.flags, 0);
9003 assert_eq!(
9004 entry
9005 .bitmap
9006 .to_positions()
9007 .expect("test operation should succeed"),
9008 vec![0, 1, 2]
9009 );
9010 assert_eq!(parsed.name_hash_cache, None);
9011 }
9012
9013 #[test]
9014 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
9015 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
9016 .expect("test operation should succeed");
9017 let object_types = [ObjectType::Commit, ObjectType::Tree];
9018 let bytes = write_bitmap(
9019 ObjectFormat::Sha256,
9020 pack_checksum.clone(),
9021 &object_types,
9022 &[(0u32, 0u32, vec![1u32])],
9023 None,
9024 )
9025 .expect("test operation should succeed");
9026 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
9027 .expect("test operation should succeed");
9028 assert_eq!(parsed.format, ObjectFormat::Sha256);
9029 assert_eq!(parsed.pack_checksum, pack_checksum);
9030 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
9031 assert_eq!(
9032 parsed.entries[0]
9033 .bitmap
9034 .to_positions()
9035 .expect("test operation should succeed"),
9036 vec![0, 1]
9037 );
9038 }
9039
9040 #[test]
9041 fn pack_bitmap_index_write_includes_name_hash_cache() {
9042 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
9043 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
9044 let bytes = write_bitmap(
9045 ObjectFormat::Sha1,
9046 pack_checksum_sha1(),
9047 &object_types,
9048 &[(0u32, 0u32, vec![1u32, 2u32])],
9049 Some(cache.clone()),
9050 )
9051 .expect("test operation should succeed");
9052 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
9053 .expect("test operation should succeed");
9054 assert_eq!(
9055 parsed.options,
9056 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
9057 );
9058 assert_eq!(parsed.name_hash_cache, Some(cache));
9059 }
9060
9061 #[test]
9062 fn pack_bitmap_writer_supports_multiple_commits() {
9063 let object_types = [
9064 ObjectType::Commit,
9065 ObjectType::Commit,
9066 ObjectType::Tree,
9067 ObjectType::Blob,
9068 ];
9069 let mut writer =
9070 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
9071 .expect("test operation should succeed");
9072 writer
9073 .add_commit(0, 0, &[2, 3])
9074 .expect("test operation should succeed");
9075 writer
9076 .add_commit(1, 1, &[2])
9077 .expect("test operation should succeed");
9078 let bytes = writer.write().expect("test operation should succeed");
9079 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
9080 .expect("test operation should succeed");
9081 assert_eq!(parsed.entries.len(), 2);
9082 assert_eq!(
9083 parsed
9084 .type_bitmaps
9085 .commits
9086 .to_positions()
9087 .expect("test operation should succeed"),
9088 vec![0, 1]
9089 );
9090 let first = parsed
9091 .entry_for_index_position(0)
9092 .expect("test operation should succeed");
9093 assert_eq!(
9094 first
9095 .bitmap
9096 .to_positions()
9097 .expect("test operation should succeed"),
9098 vec![0, 2, 3]
9099 );
9100 let second = parsed
9101 .entry_for_index_position(1)
9102 .expect("test operation should succeed");
9103 assert_eq!(
9104 second
9105 .bitmap
9106 .to_positions()
9107 .expect("test operation should succeed"),
9108 vec![1, 2]
9109 );
9110 }
9111
9112 #[test]
9113 fn pack_bitmap_index_recomputes_checksum_on_write() {
9114 let object_types = [ObjectType::Commit, ObjectType::Blob];
9117 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
9118 .expect("test operation should succeed");
9119 let mut index = writer.build().expect("test operation should succeed");
9120 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
9122 index.entries.clear(); index.entries.push(PackBitmapEntry {
9124 object_position: 0,
9125 xor_offset: 0,
9126 flags: 0,
9127 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
9128 });
9129 let bytes = index.write().expect("test operation should succeed");
9130 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
9132 .expect("test operation should succeed");
9133 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
9134 }
9135
9136 #[test]
9137 fn pack_bitmap_writer_rejects_non_commit_selection() {
9138 let object_types = [ObjectType::Commit, ObjectType::Blob];
9139 let mut writer =
9140 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
9141 .expect("test operation should succeed");
9142 assert!(writer.add_commit(1, 1, &[]).is_err());
9144 assert!(writer.add_commit(5, 5, &[]).is_err());
9146 assert!(writer.add_commit(0, 5, &[]).is_err());
9148 assert!(writer.add_commit(0, 0, &[9]).is_err());
9150 }
9151
9152 #[test]
9153 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
9154 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
9155 .expect("test operation should succeed");
9156 assert!(
9157 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
9158 .is_err()
9159 );
9160 }
9161
9162 #[test]
9163 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
9164 let writer = PackBitmapWriter::new(
9165 ObjectFormat::Sha1,
9166 pack_checksum_sha1(),
9167 &[ObjectType::Commit],
9168 )
9169 .expect("test operation should succeed");
9170 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
9171 }
9172
9173 #[test]
9174 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
9175 let mut index = PackBitmapWriter::new(
9176 ObjectFormat::Sha1,
9177 pack_checksum_sha1(),
9178 &[ObjectType::Commit],
9179 )
9180 .expect("test operation should succeed")
9181 .build()
9182 .expect("test operation should succeed");
9183 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
9185 assert!(index.write().is_err());
9186 index.options = PackBitmapIndex::OPTION_FULL_DAG;
9188 index.name_hash_cache = Some(vec![0]);
9189 assert!(index.write().is_err());
9190 }
9191
9192 #[test]
9193 fn write_bitmap_roundtrips_through_upstream_git_parser() {
9194 let root = unique_temp_dir("git-pack-bitmap-writer");
9198 fs::create_dir_all(&root).expect("test operation should succeed");
9199 {
9200 run_git_success(&root, &["init", "-q", "-b", "main"]);
9201 run_git_success(
9202 &root,
9203 &[
9204 "-c",
9205 "user.name=Example User",
9206 "-c",
9207 "user.email=example@example.invalid",
9208 "commit",
9209 "--allow-empty",
9210 "-q",
9211 "-m",
9212 "one",
9213 ],
9214 );
9215 run_git_success(&root, &["repack", "-adb"]);
9216 let pack_dir = root.join(".git").join("objects").join("pack");
9217 let idx_path = single_path_with_extension(&pack_dir, "idx");
9218 let index = PackIndex::parse(
9219 &fs::read(idx_path).expect("test operation should succeed"),
9220 ObjectFormat::Sha1,
9221 )
9222 .expect("test operation should succeed");
9223 let pack_path = single_path_with_extension(&pack_dir, "pack");
9225 let pack =
9226 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
9227 .expect("test operation should succeed");
9228 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
9231 offsets.sort_unstable();
9232 let position_of = |offset: u64| -> u32 {
9233 offsets
9234 .iter()
9235 .position(|value| *value == offset)
9236 .expect("test operation should succeed") as u32
9237 };
9238 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
9239 for entry in &index.entries {
9240 let position = position_of(entry.offset) as usize;
9241 if let Some(parsed) = pack
9243 .entries
9244 .iter()
9245 .find(|po| po.entry.offset == entry.offset)
9246 {
9247 object_types[position] = parsed.object.object_type;
9248 }
9249 }
9250 let commit_position = object_types
9252 .iter()
9253 .position(|ty| *ty == ObjectType::Commit)
9254 .expect("test operation should succeed") as u32;
9255 let commit_index_position = index
9257 .entries
9258 .iter()
9259 .position(|entry| position_of(entry.offset) == commit_position)
9260 .expect("test operation should succeed")
9261 as u32;
9262 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
9263 let bytes = write_bitmap(
9264 ObjectFormat::Sha1,
9265 index.pack_checksum.clone(),
9266 &object_types,
9267 &[(commit_position, commit_index_position, reachable)],
9268 None,
9269 )
9270 .expect("test operation should succeed");
9271 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
9272 .expect("test operation should succeed");
9273 assert_eq!(parsed.pack_checksum, index.pack_checksum);
9274 assert_eq!(parsed.entries.len(), 1);
9275 assert_eq!(
9276 parsed.entries[0]
9277 .bitmap
9278 .to_positions()
9279 .expect("test operation should succeed")
9280 .len(),
9281 index.entries.len()
9282 );
9283 };
9284 let _ = fs::remove_dir_all(&root);
9285 }
9286}