1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::sync::Arc;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct PackEntry {
16 pub oid: ObjectId,
17 pub compressed_size: u64,
18 pub uncompressed_size: u64,
19 pub offset: u64,
20}
21
22pub const DEFAULT_PACK_WINDOW: usize = 10;
28
29pub const DEFAULT_PACK_DEPTH: usize = 50;
35
36const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
40
41const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
45
46#[derive(Debug, Clone)]
52pub struct PackWriteOptions {
53 pub window: usize,
56 pub depth: usize,
58 pub prefer_ofs_delta: bool,
62 pub thin_bases: HashMap<ObjectId, EncodedObject>,
67 pub reorder: bool,
73}
74
75impl Default for PackWriteOptions {
76 fn default() -> Self {
77 Self::new()
78 }
79}
80
81impl PackWriteOptions {
82 pub fn new() -> Self {
86 Self {
87 window: DEFAULT_PACK_WINDOW,
88 depth: DEFAULT_PACK_DEPTH,
89 prefer_ofs_delta: true,
90 thin_bases: HashMap::new(),
91 reorder: true,
92 }
93 }
94
95 pub fn with_window(mut self, window: usize) -> Self {
97 self.window = window;
98 self
99 }
100
101 pub fn with_depth(mut self, depth: usize) -> Self {
103 self.depth = depth;
104 self
105 }
106
107 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
110 self.prefer_ofs_delta = prefer_ofs_delta;
111 self
112 }
113
114 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
116 self.thin_bases = thin_bases;
117 self
118 }
119
120 pub fn with_reorder(mut self, reorder: bool) -> Self {
123 self.reorder = reorder;
124 self
125 }
126}
127
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct RepackPolicy {
130 pub write_bitmaps: bool,
131 pub cruft_packs: bool,
132 pub geometric_factor: Option<u8>,
133}
134
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct PackFile {
137 pub version: u32,
138 pub entries: Vec<PackObject>,
139 pub checksum: ObjectId,
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct PackObject {
144 pub entry: PackEntry,
145 pub object: EncodedObject,
146}
147
148#[derive(Debug, Clone, PartialEq, Eq)]
149pub struct PackWrite {
150 pub pack: Vec<u8>,
151 pub index: Vec<u8>,
152 pub checksum: ObjectId,
153 pub entries: Vec<PackIndexEntry>,
154}
155
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
157pub struct PackInput<'a> {
158 pub oid: &'a ObjectId,
159 pub object: &'a EncodedObject,
160}
161
162#[derive(Debug, Clone, PartialEq, Eq)]
163pub struct PackIndexBuild {
164 pub index: Vec<u8>,
165 pub pack_checksum: ObjectId,
166 pub entries: Vec<PackIndexEntry>,
167}
168
169#[derive(Debug, Clone, PartialEq, Eq)]
170pub struct PackIndex {
171 pub version: u32,
172 pub fanout: [u32; 256],
173 pub entries: Vec<PackIndexEntry>,
174 pub pack_checksum: ObjectId,
175 pub index_checksum: ObjectId,
176}
177
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub struct PackIndexEntry {
180 pub oid: ObjectId,
181 pub crc32: u32,
182 pub offset: u64,
183}
184
185#[derive(Debug, Clone, PartialEq, Eq)]
186pub struct PackReverseIndex {
187 pub version: u32,
188 pub format: ObjectFormat,
189 pub positions: Vec<u32>,
190 pub pack_checksum: ObjectId,
191 pub index_checksum: ObjectId,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct PackMtimes {
196 pub version: u32,
197 pub format: ObjectFormat,
198 pub mtimes: Vec<u32>,
199 pub pack_checksum: ObjectId,
200 pub index_checksum: ObjectId,
201}
202
203#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct PackBitmapIndex {
205 pub version: u16,
206 pub format: ObjectFormat,
207 pub options: u16,
208 pub pack_checksum: ObjectId,
209 pub index_checksum: ObjectId,
210 pub type_bitmaps: PackBitmapTypeBitmaps,
211 pub entries: Vec<PackBitmapEntry>,
212 pub name_hash_cache: Option<Vec<u32>>,
213}
214
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub struct PackBitmapTypeBitmaps {
217 pub commits: EwahBitmap,
218 pub trees: EwahBitmap,
219 pub blobs: EwahBitmap,
220 pub tags: EwahBitmap,
221}
222
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct PackBitmapEntry {
225 pub object_position: u32,
230 pub xor_offset: u8,
231 pub flags: u8,
232 pub bitmap: EwahBitmap,
235}
236
237#[derive(Debug, Clone, PartialEq, Eq)]
238pub struct EwahBitmap {
239 pub bit_size: u32,
240 pub words: Vec<u64>,
241 pub rlw_position: u32,
242}
243
244#[derive(Debug, Clone, PartialEq, Eq)]
245pub struct MultiPackIndex {
246 pub version: u8,
247 pub format: ObjectFormat,
248 pub pack_count: u32,
249 pub pack_names: Vec<String>,
250 pub object_count: u32,
251 pub fanout: [u32; 256],
252 pub objects: Vec<MultiPackIndexEntry>,
253 pub reverse_index: Option<Vec<u32>>,
254 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
255 pub chunks: Vec<MultiPackIndexChunk>,
256 pub checksum: ObjectId,
257}
258
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct MultiPackIndexEntry {
261 pub oid: ObjectId,
262 pub pack_int_id: u32,
263 pub offset: u64,
264}
265
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub struct MultiPackBitmapPack {
268 pub bitmap_pos: u32,
269 pub bitmap_nr: u32,
270}
271
272#[derive(Debug, Clone, PartialEq, Eq)]
273pub struct MultiPackIndexChunk {
274 pub id: [u8; 4],
275 pub offset: u64,
276 pub len: u64,
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Eq)]
280enum PackObjectKind {
281 Commit,
282 Tree,
283 Blob,
284 Tag,
285 OfsDelta,
286 RefDelta,
287}
288
289#[derive(Debug, Clone, PartialEq, Eq)]
290enum ParsedPackEntry {
291 Resolved(PackObject),
292 Delta {
293 base: DeltaBase,
294 compressed_size: u64,
295 delta_size: u64,
296 offset: u64,
297 delta: Vec<u8>,
298 },
299}
300
301#[derive(Debug, Clone, PartialEq, Eq)]
302enum DeltaBase {
303 Offset(u64),
304 Ref(ObjectId),
305}
306
307impl PackFile {
308 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
309 Self::parse(bytes, ObjectFormat::Sha1)
310 }
311
312 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
313 Self::parse_with_base(bytes, format, |_| Ok(None))
314 }
315
316 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
317 Self::parse(&bundle.pack, bundle.format)
318 }
319
320 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
321 let PackIndexBuild {
322 index,
323 pack_checksum,
324 entries,
325 } = PackIndex::write_v2_for_pack(bytes, format)?;
326 Ok(PackWrite {
327 pack: bytes.to_vec(),
328 index,
329 checksum: pack_checksum,
330 entries,
331 })
332 }
333
334 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
335 where
336 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
337 {
338 Self::parse_with_base(bytes, format, external_base)
339 }
340
341 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
342 where
343 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
344 {
345 let trailer_len = format.raw_len();
346 if bytes.len() < 12 + trailer_len {
347 return Err(GitError::InvalidFormat("pack file too short".into()));
348 }
349 let trailer_offset = bytes.len() - trailer_len;
350 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
351 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
352 if checksum != expected {
353 return Err(GitError::InvalidFormat(format!(
354 "pack checksum mismatch: expected {expected}, got {checksum}"
355 )));
356 }
357
358 if &bytes[..4] != b"PACK" {
359 return Err(GitError::InvalidFormat("missing PACK signature".into()));
360 }
361 let version = u32_be(&bytes[4..8]);
362 if version != 2 && version != 3 {
363 return Err(GitError::Unsupported(format!("pack version {version}")));
364 }
365 let count = u32_be(&bytes[8..12]) as usize;
366 let mut offset = 12usize;
367 let mut entries = Vec::with_capacity(count);
368 for _ in 0..count {
369 let entry_offset = offset;
370 let header = parse_entry_header(bytes, &mut offset)?;
371 let base =
372 match header.kind {
373 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
374 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
375 )),
376 PackObjectKind::RefDelta => {
377 let hash_len = format.raw_len();
378 if offset + hash_len > trailer_offset {
379 return Err(GitError::InvalidFormat(
380 "truncated ref-delta base object id".into(),
381 ));
382 }
383 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
384 offset += hash_len;
385 Some(DeltaBase::Ref(oid))
386 }
387 _ => None,
388 };
389 let mut body = Vec::new();
390 let consumed = inflate_into(
391 &bytes[offset..trailer_offset],
392 &mut body,
393 header.size.min(usize::MAX as u64) as usize,
394 )?;
395 if body.len() as u64 != header.size {
396 return Err(GitError::InvalidObject(format!(
397 "pack object declared {} bytes, decoded {}",
398 header.size,
399 body.len()
400 )));
401 }
402 if consumed == 0 {
403 return Err(GitError::InvalidFormat(
404 "empty compressed pack entry".into(),
405 ));
406 }
407 offset = offset
408 .checked_add(consumed)
409 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
410 if offset > trailer_offset {
411 return Err(GitError::InvalidFormat(
412 "pack entry extends past checksum".into(),
413 ));
414 }
415 if let Some(base) = base {
416 entries.push(ParsedPackEntry::Delta {
417 base,
418 compressed_size: consumed as u64,
419 delta_size: header.size,
420 offset: entry_offset as u64,
421 delta: body,
422 });
423 } else {
424 let object_type = match header.kind {
425 PackObjectKind::Commit => ObjectType::Commit,
426 PackObjectKind::Tree => ObjectType::Tree,
427 PackObjectKind::Blob => ObjectType::Blob,
428 PackObjectKind::Tag => ObjectType::Tag,
429 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
430 };
431 let object = EncodedObject::new(object_type, body);
432 let oid = object.object_id(format)?;
433 entries.push(ParsedPackEntry::Resolved(PackObject {
434 entry: PackEntry {
435 oid,
436 compressed_size: consumed as u64,
437 uncompressed_size: header.size,
438 offset: entry_offset as u64,
439 },
440 object,
441 }));
442 }
443 }
444 if offset != trailer_offset {
445 return Err(GitError::InvalidFormat(format!(
446 "pack has {} trailing bytes before checksum",
447 trailer_offset - offset
448 )));
449 }
450 Ok(Self {
451 version,
452 entries: resolve_pack_entries(entries, format, &mut external_base)?,
453 checksum,
454 })
455 }
456
457 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
458 where
459 T: Borrow<EncodedObject>,
460 {
461 Self::write_undeltified(objects, ObjectFormat::Sha1)
462 }
463
464 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
470 where
471 T: Borrow<EncodedObject>,
472 {
473 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
474 Self::write_packed_impl(objects, format, &options)
475 }
476
477 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
486 where
487 T: Borrow<EncodedObject>,
488 {
489 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
490 }
491
492 pub fn write_packed_with_options<T>(
496 objects: &[T],
497 format: ObjectFormat,
498 options: &PackWriteOptions,
499 ) -> Result<PackWrite>
500 where
501 T: Borrow<EncodedObject>,
502 {
503 Self::write_packed_impl(objects, format, options)
504 }
505
506 pub fn write_packed_with_known_ids(
515 inputs: &[PackInput<'_>],
516 format: ObjectFormat,
517 ) -> Result<PackWrite> {
518 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
519 }
520
521 pub fn write_packed_with_known_ids_and_options(
524 inputs: &[PackInput<'_>],
525 format: ObjectFormat,
526 options: &PackWriteOptions,
527 ) -> Result<PackWrite> {
528 if inputs.len() > u32::MAX as usize {
529 return Err(GitError::InvalidFormat("too many pack objects".into()));
530 }
531 let mut objects = Vec::with_capacity(inputs.len());
532 let mut object_ids = Vec::with_capacity(inputs.len());
533 for input in inputs {
534 if input.oid.format() != format {
535 return Err(GitError::InvalidObjectId(format!(
536 "pack object id {} uses {}, pack uses {}",
537 input.oid,
538 input.oid.format().name(),
539 format.name()
540 )));
541 }
542 objects.push(input.object);
543 object_ids.push(*input.oid);
544 }
545 Self::write_packed_from_parts(objects, object_ids, format, options)
546 }
547
548 pub fn write_thin<T>(
557 objects: &[T],
558 format: ObjectFormat,
559 external_bases: HashMap<ObjectId, EncodedObject>,
560 ) -> Result<PackWrite>
561 where
562 T: Borrow<EncodedObject>,
563 {
564 let options = PackWriteOptions::new().with_thin_bases(external_bases);
565 Self::write_packed_impl(objects, format, &options)
566 }
567
568 fn write_packed_impl<T>(
569 objects: &[T],
570 format: ObjectFormat,
571 options: &PackWriteOptions,
572 ) -> Result<PackWrite>
573 where
574 T: Borrow<EncodedObject>,
575 {
576 if objects.len() > u32::MAX as usize {
577 return Err(GitError::InvalidFormat("too many pack objects".into()));
578 }
579 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
580
581 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
584 for object in &objects {
585 object_ids.push(object.object_id(format)?);
586 }
587 Self::write_packed_from_parts(objects, object_ids, format, options)
588 }
589
590 fn write_packed_from_parts(
591 objects: Vec<&EncodedObject>,
592 object_ids: Vec<ObjectId>,
593 format: ObjectFormat,
594 options: &PackWriteOptions,
595 ) -> Result<PackWrite> {
596 let mut seen = HashSet::with_capacity(object_ids.len());
597 for oid in &object_ids {
598 if !seen.insert(oid) {
599 return Err(GitError::InvalidFormat(format!(
600 "pack contains duplicate object id {oid}"
601 )));
602 }
603 }
604
605 for oid in options.thin_bases.keys() {
607 if oid.format() != format {
608 return Err(GitError::InvalidObjectId(
609 "thin pack base object id format does not match pack format".into(),
610 ));
611 }
612 }
613
614 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
620
621 let mut pack = Vec::new();
622 pack.extend_from_slice(b"PACK");
623 pack.extend_from_slice(&2u32.to_be_bytes());
624 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
625
626 let mut index_entries = Vec::with_capacity(objects.len());
627 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
630
631 let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
632
633 for (order_pos, &idx) in order.iter().enumerate() {
634 let offset = pack.len() as u64;
635 let mut entry_bytes = Vec::new();
636 match &plan[idx].base {
637 PlannedBase::None => {
638 write_entry_header(
639 &mut entry_bytes,
640 objects[idx].object_type,
641 objects[idx].body.len() as u64,
642 );
643 }
644 PlannedBase::InPack { base_idx, delta } => {
645 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
646 GitError::InvalidFormat(
647 "in-pack delta base emitted after dependent object".into(),
648 )
649 })?;
650 if options.prefer_ofs_delta {
651 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
652 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
653 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
654 })?;
655 write_ofs_delta_offset(&mut entry_bytes, relative)?;
656 } else {
657 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
658 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
659 }
660 }
661 PlannedBase::External { base_oid, delta } => {
662 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
663 entry_bytes.extend_from_slice(base_oid.as_bytes());
664 }
665 }
666 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
667 let crc32 = crc32fast::hash(&entry_bytes);
668 pack.extend_from_slice(&entry_bytes);
669 written_offsets[idx] = Some(offset);
670 index_entries.push(PackIndexEntry {
671 oid: object_ids[idx].clone(),
672 crc32,
673 offset,
674 });
675 }
676
677 let checksum = sley_core::digest_bytes(format, &pack)?;
678 pack.extend_from_slice(checksum.as_bytes());
679 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
680 Ok(PackWrite {
681 pack,
682 index,
683 checksum,
684 entries: index_entries,
685 })
686 }
687}
688
689impl PackIndex {
690 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
691 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
692 }
693
694 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
695 let trailer_len = format.raw_len();
696 if pack_bytes.len() < 12 + trailer_len {
697 return Err(GitError::InvalidFormat("pack file too short".into()));
698 }
699 let trailer_offset = pack_bytes.len() - trailer_len;
700 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
701 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
702 if pack_checksum != expected {
703 return Err(GitError::InvalidFormat(format!(
704 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
705 )));
706 }
707
708 if &pack_bytes[..4] != b"PACK" {
709 return Err(GitError::InvalidFormat("missing PACK signature".into()));
710 }
711 let version = u32_be(&pack_bytes[4..8]);
712 if version != 2 && version != 3 {
713 return Err(GitError::Unsupported(format!("pack version {version}")));
714 }
715 let count = u32_be(&pack_bytes[8..12]) as usize;
716 let mut offset = 12usize;
717 let mut parsed_entries = Vec::with_capacity(count);
718 let mut raw_entries = Vec::with_capacity(count);
719 for _ in 0..count {
720 let entry_offset = offset;
721 let header = parse_entry_header(pack_bytes, &mut offset)?;
722 let base = match header.kind {
723 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
724 pack_bytes,
725 &mut offset,
726 entry_offset as u64,
727 )?)),
728 PackObjectKind::RefDelta => {
729 let hash_len = format.raw_len();
730 if offset + hash_len > trailer_offset {
731 return Err(GitError::InvalidFormat(
732 "truncated ref-delta base object id".into(),
733 ));
734 }
735 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
736 offset += hash_len;
737 Some(DeltaBase::Ref(oid))
738 }
739 _ => None,
740 };
741 let mut body = Vec::new();
742 let consumed = inflate_into(
743 &pack_bytes[offset..trailer_offset],
744 &mut body,
745 header.size.min(usize::MAX as u64) as usize,
746 )?;
747 if body.len() as u64 != header.size {
748 return Err(GitError::InvalidObject(format!(
749 "pack object declared {} bytes, decoded {}",
750 header.size,
751 body.len()
752 )));
753 }
754 if consumed == 0 {
755 return Err(GitError::InvalidFormat(
756 "empty compressed pack entry".into(),
757 ));
758 }
759 offset = offset
760 .checked_add(consumed)
761 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
762 if offset > trailer_offset {
763 return Err(GitError::InvalidFormat(
764 "pack entry extends past checksum".into(),
765 ));
766 }
767 raw_entries.push((
768 entry_offset as u64,
769 crc32fast::hash(&pack_bytes[entry_offset..offset]),
770 ));
771 if let Some(base) = base {
772 parsed_entries.push(ParsedPackEntry::Delta {
773 base,
774 compressed_size: consumed as u64,
775 delta_size: header.size,
776 offset: entry_offset as u64,
777 delta: body,
778 });
779 } else {
780 let object_type = match header.kind {
781 PackObjectKind::Commit => ObjectType::Commit,
782 PackObjectKind::Tree => ObjectType::Tree,
783 PackObjectKind::Blob => ObjectType::Blob,
784 PackObjectKind::Tag => ObjectType::Tag,
785 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
786 };
787 let object = EncodedObject::new(object_type, body);
788 let oid = object.object_id(format)?;
789 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
790 entry: PackEntry {
791 oid,
792 compressed_size: consumed as u64,
793 uncompressed_size: header.size,
794 offset: entry_offset as u64,
795 },
796 object,
797 }));
798 }
799 }
800 if offset != trailer_offset {
801 return Err(GitError::InvalidFormat(format!(
802 "pack has {} trailing bytes before checksum",
803 trailer_offset - offset
804 )));
805 }
806
807 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
808 let entries = resolved
809 .iter()
810 .zip(raw_entries)
811 .map(|(object, (offset, crc32))| PackIndexEntry {
812 oid: object.entry.oid,
813 crc32,
814 offset,
815 })
816 .collect::<Vec<_>>();
817 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
818 Ok(PackIndexBuild {
819 index,
820 pack_checksum,
821 entries,
822 })
823 }
824
825 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
826 Self::parse(bytes, ObjectFormat::Sha1)
827 }
828
829 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
830 let hash_len = format.raw_len();
831 if bytes.len() < 4 {
832 return Err(GitError::InvalidFormat("pack index too short".into()));
833 }
834 if bytes[..4] != [0xff, b't', b'O', b'c'] {
835 return Self::parse_v1(bytes, format);
836 }
837 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
838 return Err(GitError::InvalidFormat("pack index too short".into()));
839 }
840 let version = u32_be(&bytes[4..8]);
841 if version != 2 {
842 return Err(GitError::Unsupported(format!(
843 "pack index version {version}"
844 )));
845 }
846 let index_checksum_offset = bytes.len() - hash_len;
847 let actual_index_checksum =
848 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
849 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
850 if actual_index_checksum != index_checksum {
851 return Err(GitError::InvalidFormat(format!(
852 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
853 )));
854 }
855
856 let mut offset = 8usize;
857 let mut fanout = [0u32; 256];
858 let mut previous = 0u32;
859 for slot in &mut fanout {
860 *slot = u32_be(&bytes[offset..offset + 4]);
861 if *slot < previous {
862 return Err(GitError::InvalidFormat(
863 "pack index fanout is not monotonic".into(),
864 ));
865 }
866 previous = *slot;
867 offset += 4;
868 }
869 let count = fanout[255] as usize;
870 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
871 offset = oid_table.end;
872 let crc_table = checked_range(offset, count, 4, bytes.len())?;
873 offset = crc_table.end;
874 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
875 offset = small_offset_table.end;
876
877 let large_offset_count = (0..count)
878 .filter(|idx| {
879 let start = small_offset_table.start + idx * 4;
880 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
881 })
882 .count();
883 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
884 offset = large_offset_table.end;
885
886 let expected_trailer_offset = bytes.len() - hash_len * 2;
887 if offset != expected_trailer_offset {
888 return Err(GitError::InvalidFormat(format!(
889 "pack index has {} unexpected bytes before trailer",
890 expected_trailer_offset.saturating_sub(offset)
891 )));
892 }
893 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
894
895 let mut entries = Vec::with_capacity(count);
896 for idx in 0..count {
897 let oid_start = oid_table.start + idx * hash_len;
898 let crc_start = crc_table.start + idx * 4;
899 let offset_start = small_offset_table.start + idx * 4;
900 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
901 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
905 return Err(GitError::InvalidFormat(
906 "pack index object ids are not strictly ascending".into(),
907 ));
908 }
909 let expected_min = if oid_bytes[0] == 0 {
910 0
911 } else {
912 fanout[usize::from(oid_bytes[0] - 1)]
913 };
914 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
915 return Err(GitError::InvalidFormat(
916 "pack index object id is outside its fanout bucket".into(),
917 ));
918 }
919 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
920 let offset = if raw_offset & 0x8000_0000 == 0 {
921 u64::from(raw_offset)
922 } else {
923 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
924 let large_start = large_offset_table.start + large_idx * 8;
925 if large_idx >= large_offset_count {
926 return Err(GitError::InvalidFormat(
927 "pack index large offset points past table".into(),
928 ));
929 }
930 u64_be(&bytes[large_start..large_start + 8])
931 };
932 entries.push(PackIndexEntry {
933 oid: ObjectId::from_raw(format, oid_bytes)?,
934 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
935 offset,
936 });
937 }
938 Ok(Self {
939 version,
940 fanout,
941 entries,
942 pack_checksum,
943 index_checksum,
944 })
945 }
946
947 fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
948 let hash_len = format.raw_len();
949 if bytes.len() < 256 * 4 + 2 * hash_len {
950 return Err(GitError::InvalidFormat("pack index too short".into()));
951 }
952 let index_checksum_offset = bytes.len() - hash_len;
953 let actual_index_checksum =
954 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
955 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
956 if actual_index_checksum != index_checksum {
957 return Err(GitError::InvalidFormat(format!(
958 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
959 )));
960 }
961
962 let mut offset = 0usize;
963 let mut fanout = [0u32; 256];
964 let mut previous = 0u32;
965 for slot in &mut fanout {
966 *slot = u32_be(&bytes[offset..offset + 4]);
967 if *slot < previous {
968 return Err(GitError::InvalidFormat(
969 "pack index fanout is not monotonic".into(),
970 ));
971 }
972 previous = *slot;
973 offset += 4;
974 }
975 let count = fanout[255] as usize;
976 let entry_len = hash_len
977 .checked_add(4)
978 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
979 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
980 offset = entry_table.end;
981 let expected_trailer_offset = bytes.len() - hash_len * 2;
982 if offset != expected_trailer_offset {
983 return Err(GitError::InvalidFormat(format!(
984 "pack index has {} unexpected bytes before trailer",
985 expected_trailer_offset.saturating_sub(offset)
986 )));
987 }
988 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
989
990 let mut entries = Vec::with_capacity(count);
991 let mut previous_oid: Option<ObjectId> = None;
992 for idx in 0..count {
993 let start = entry_table.start + idx * entry_len;
994 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
995 if let Some(previous) = &previous_oid
996 && previous.as_bytes() >= oid.as_bytes()
997 {
998 return Err(GitError::InvalidFormat(
999 "pack index object ids are not strictly sorted".into(),
1000 ));
1001 }
1002 previous_oid = Some(oid);
1003 entries.push(PackIndexEntry {
1004 oid,
1005 crc32: 0,
1006 offset: u64::from(u32_be(&bytes[start..start + 4])),
1007 });
1008 }
1009 Ok(Self {
1010 version: 1,
1011 fanout,
1012 entries,
1013 pack_checksum,
1014 index_checksum,
1015 })
1016 }
1017
1018 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1019 self.entries
1020 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1021 .ok()
1022 .map(|idx| &self.entries[idx])
1023 }
1024
1025 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1026 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1027 }
1028
1029 pub fn write_v2(
1030 format: ObjectFormat,
1031 entries: &[PackIndexEntry],
1032 pack_checksum: &ObjectId,
1033 ) -> Result<Vec<u8>> {
1034 if pack_checksum.format() != format {
1035 return Err(GitError::InvalidObjectId(
1036 "pack checksum format does not match index format".into(),
1037 ));
1038 }
1039 let mut entries = entries.iter().collect::<Vec<_>>();
1040 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1041 for pair in entries.windows(2) {
1042 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1043 return Err(GitError::InvalidFormat(format!(
1044 "pack index contains duplicate object id {}",
1045 pair[0].oid
1046 )));
1047 }
1048 }
1049 let mut fanout = [0u32; 256];
1050 for entry in &entries {
1051 if entry.oid.format() != format {
1052 return Err(GitError::InvalidObjectId(
1053 "pack index entry format does not match index format".into(),
1054 ));
1055 }
1056 let first = entry.oid.as_bytes()[0] as usize;
1057 fanout[first] = fanout[first]
1058 .checked_add(1)
1059 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1060 }
1061 let mut running = 0u32;
1062 for slot in &mut fanout {
1063 running = running
1064 .checked_add(*slot)
1065 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1066 *slot = running;
1067 }
1068
1069 let mut index = Vec::new();
1070 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1071 index.extend_from_slice(&2u32.to_be_bytes());
1072 for count in fanout {
1073 index.extend_from_slice(&count.to_be_bytes());
1074 }
1075 for entry in &entries {
1076 index.extend_from_slice(entry.oid.as_bytes());
1077 }
1078 for entry in &entries {
1079 index.extend_from_slice(&entry.crc32.to_be_bytes());
1080 }
1081
1082 let mut large_offsets = Vec::new();
1083 for entry in &entries {
1084 if entry.offset < 0x8000_0000 {
1085 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1086 } else {
1087 if large_offsets.len() > 0x7fff_ffff {
1088 return Err(GitError::InvalidFormat(
1089 "too many large pack offsets".into(),
1090 ));
1091 }
1092 let large_idx = large_offsets.len() as u32;
1093 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1094 large_offsets.push(entry.offset);
1095 }
1096 }
1097 for offset in large_offsets {
1098 index.extend_from_slice(&offset.to_be_bytes());
1099 }
1100 index.extend_from_slice(pack_checksum.as_bytes());
1101 let index_checksum = sley_core::digest_bytes(format, &index)?;
1102 index.extend_from_slice(index_checksum.as_bytes());
1103 Ok(index)
1104 }
1105}
1106
1107pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1112 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1113 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1114 let mut index_position = vec![0u32; entries.len()];
1115 for (position, &entry) in oid_sorted.iter().enumerate() {
1116 index_position[entry] = position as u32;
1117 }
1118 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1119 by_offset.sort_by_key(|&entry| entries[entry].offset);
1120 by_offset
1121 .into_iter()
1122 .map(|entry| index_position[entry])
1123 .collect()
1124}
1125
1126impl PackReverseIndex {
1127 pub fn write(
1128 format: ObjectFormat,
1129 positions: &[u32],
1130 pack_checksum: &ObjectId,
1131 ) -> Result<Vec<u8>> {
1132 if pack_checksum.format() != format {
1133 return Err(GitError::InvalidObjectId(
1134 "pack checksum format does not match reverse index format".into(),
1135 ));
1136 }
1137 validate_position_permutation(positions)?;
1138
1139 let mut out = Vec::new();
1140 out.extend_from_slice(b"RIDX");
1141 out.extend_from_slice(&1u32.to_be_bytes());
1142 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1143 for position in positions {
1144 out.extend_from_slice(&position.to_be_bytes());
1145 }
1146 out.extend_from_slice(pack_checksum.as_bytes());
1147 let checksum = sley_core::digest_bytes(format, &out)?;
1148 out.extend_from_slice(checksum.as_bytes());
1149 Ok(out)
1150 }
1151
1152 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1153 let hash_len = format.raw_len();
1154 let table_len = object_count
1155 .checked_mul(4)
1156 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1157 let min_len = 12usize
1158 .checked_add(table_len)
1159 .and_then(|len| len.checked_add(hash_len * 2))
1160 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1161 if bytes.len() < min_len {
1162 return Err(GitError::InvalidFormat("reverse index too short".into()));
1163 }
1164 if bytes.len() != min_len {
1165 return Err(GitError::InvalidFormat(format!(
1166 "reverse index has {} trailing bytes",
1167 bytes.len() - min_len
1168 )));
1169 }
1170 if &bytes[..4] != b"RIDX" {
1171 return Err(GitError::InvalidFormat(
1172 "missing reverse index signature".into(),
1173 ));
1174 }
1175 let version = u32_be(&bytes[4..8]);
1176 if version != 1 {
1177 return Err(GitError::Unsupported(format!(
1178 "reverse index version {version}"
1179 )));
1180 }
1181 let hash_id = u32_be(&bytes[8..12]);
1182 if hash_id != hash_function_id(format) {
1183 return Err(GitError::InvalidFormat(format!(
1184 "reverse index hash id {hash_id} does not match {}",
1185 format.name()
1186 )));
1187 }
1188
1189 let index_checksum_offset = bytes.len() - hash_len;
1190 let actual_index_checksum =
1191 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1192 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1193 if actual_index_checksum != index_checksum {
1194 return Err(GitError::InvalidFormat(format!(
1195 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1196 )));
1197 }
1198
1199 let pack_checksum_offset = index_checksum_offset - hash_len;
1200 let pack_checksum =
1201 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1202 let mut positions = Vec::with_capacity(object_count);
1203 let mut offset = 12usize;
1204 for _ in 0..object_count {
1205 let position = u32_be(&bytes[offset..offset + 4]);
1206 positions.push(position);
1207 offset += 4;
1208 }
1209 validate_position_permutation(&positions)?;
1210
1211 Ok(Self {
1212 version,
1213 format,
1214 positions,
1215 pack_checksum,
1216 index_checksum,
1217 })
1218 }
1219}
1220
1221impl PackMtimes {
1222 pub fn write(
1223 format: ObjectFormat,
1224 mtimes: &[u32],
1225 pack_checksum: &ObjectId,
1226 ) -> Result<Vec<u8>> {
1227 if pack_checksum.format() != format {
1228 return Err(GitError::InvalidObjectId(
1229 "pack checksum format does not match mtimes format".into(),
1230 ));
1231 }
1232
1233 let mut out = Vec::new();
1234 out.extend_from_slice(b"MTME");
1235 out.extend_from_slice(&1u32.to_be_bytes());
1236 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1237 for mtime in mtimes {
1238 out.extend_from_slice(&mtime.to_be_bytes());
1239 }
1240 out.extend_from_slice(pack_checksum.as_bytes());
1241 let checksum = sley_core::digest_bytes(format, &out)?;
1242 out.extend_from_slice(checksum.as_bytes());
1243 Ok(out)
1244 }
1245
1246 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1247 let hash_len = format.raw_len();
1248 let table_len = object_count
1249 .checked_mul(4)
1250 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
1251 let expected_len = 12usize
1252 .checked_add(table_len)
1253 .and_then(|len| len.checked_add(hash_len * 2))
1254 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
1255 if bytes.len() < expected_len {
1256 return Err(GitError::InvalidFormat("mtimes file too short".into()));
1257 }
1258 if bytes.len() != expected_len {
1259 return Err(GitError::InvalidFormat(format!(
1260 "mtimes file has {} trailing bytes",
1261 bytes.len() - expected_len
1262 )));
1263 }
1264 if &bytes[..4] != b"MTME" {
1265 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
1266 }
1267 let version = u32_be(&bytes[4..8]);
1268 if version != 1 {
1269 return Err(GitError::Unsupported(format!("mtimes version {version}")));
1270 }
1271 let hash_id = u32_be(&bytes[8..12]);
1272 if hash_id != hash_function_id(format) {
1273 return Err(GitError::InvalidFormat(format!(
1274 "mtimes hash id {hash_id} does not match {}",
1275 format.name()
1276 )));
1277 }
1278
1279 let index_checksum_offset = bytes.len() - hash_len;
1280 let actual_index_checksum =
1281 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1282 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1283 if actual_index_checksum != index_checksum {
1284 return Err(GitError::InvalidFormat(format!(
1285 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1286 )));
1287 }
1288
1289 let pack_checksum_offset = index_checksum_offset - hash_len;
1290 let pack_checksum =
1291 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1292 let mut mtimes = Vec::with_capacity(object_count);
1293 let mut offset = 12usize;
1294 for _ in 0..object_count {
1295 mtimes.push(u32_be(&bytes[offset..offset + 4]));
1296 offset += 4;
1297 }
1298
1299 Ok(Self {
1300 version,
1301 format,
1302 mtimes,
1303 pack_checksum,
1304 index_checksum,
1305 })
1306 }
1307}
1308
1309impl PackBitmapIndex {
1310 pub const OPTION_FULL_DAG: u16 = 0x0001;
1311 pub const OPTION_HASH_CACHE: u16 = 0x0004;
1312
1313 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1314 let hash_len = format.raw_len();
1315 let min_len = 12usize
1316 .checked_add(hash_len * 2)
1317 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1318 if bytes.len() < min_len {
1319 return Err(GitError::InvalidFormat("bitmap index too short".into()));
1320 }
1321 if &bytes[..4] != b"BITM" {
1322 return Err(GitError::InvalidFormat(
1323 "missing bitmap index signature".into(),
1324 ));
1325 }
1326 let version = u16_be(&bytes[4..6]);
1327 if version != 1 {
1328 return Err(GitError::Unsupported(format!(
1329 "bitmap index version {version}"
1330 )));
1331 }
1332 let options = u16_be(&bytes[6..8]);
1333 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
1334 if options & !known_options != 0 {
1335 return Err(GitError::Unsupported(format!(
1336 "bitmap index options {:#06x}",
1337 options & !known_options
1338 )));
1339 }
1340 let entry_count = u32_be(&bytes[8..12]) as usize;
1341 let checksum_offset = bytes.len() - hash_len;
1342 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1343 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1344 if actual_index_checksum != index_checksum {
1345 return Err(GitError::InvalidFormat(format!(
1346 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1347 )));
1348 }
1349
1350 let pack_checksum_end = 12usize
1351 .checked_add(hash_len)
1352 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1353 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
1354 let mut offset = pack_checksum_end;
1355 let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1356 let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1357 let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1358 let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1359
1360 let mut entries = Vec::with_capacity(entry_count);
1361 for idx in 0..entry_count {
1362 if checksum_offset.saturating_sub(offset) < 6 {
1363 return Err(GitError::InvalidFormat(
1364 "truncated bitmap index entry".into(),
1365 ));
1366 }
1367 let object_position = u32_be(&bytes[offset..offset + 4]);
1368 offset += 4;
1369 if object_position as usize >= object_count {
1370 return Err(GitError::InvalidFormat(
1371 "bitmap index entry points past object table".into(),
1372 ));
1373 }
1374 let xor_offset = bytes[offset];
1375 offset += 1;
1376 if xor_offset as usize > idx || xor_offset > 160 {
1377 return Err(GitError::InvalidFormat(
1378 "bitmap index entry has invalid XOR offset".into(),
1379 ));
1380 }
1381 let flags = bytes[offset];
1382 offset += 1;
1383 let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1384 entries.push(PackBitmapEntry {
1385 object_position,
1386 xor_offset,
1387 flags,
1388 bitmap,
1389 });
1390 }
1391
1392 let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
1393 let cache_len = object_count
1394 .checked_mul(4)
1395 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
1396 if checksum_offset.saturating_sub(offset) < cache_len {
1397 return Err(GitError::InvalidFormat(
1398 "truncated bitmap hash cache".into(),
1399 ));
1400 }
1401 let mut cache = Vec::with_capacity(object_count);
1402 for _ in 0..object_count {
1403 cache.push(u32_be(&bytes[offset..offset + 4]));
1404 offset += 4;
1405 }
1406 Some(cache)
1407 } else {
1408 None
1409 };
1410
1411 if offset != checksum_offset {
1412 return Err(GitError::InvalidFormat(format!(
1413 "bitmap index has {} trailing bytes",
1414 checksum_offset - offset
1415 )));
1416 }
1417
1418 Ok(Self {
1419 version,
1420 format,
1421 options,
1422 pack_checksum,
1423 index_checksum,
1424 type_bitmaps: PackBitmapTypeBitmaps {
1425 commits,
1426 trees,
1427 blobs,
1428 tags,
1429 },
1430 entries,
1431 name_hash_cache,
1432 })
1433 }
1434
1435 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
1438 self.entries
1439 .iter()
1440 .find(|entry| entry.object_position == position)
1441 }
1442}
1443
1444fn parse_bitmap_ewah(
1445 bytes: &[u8],
1446 offset: &mut usize,
1447 checksum_offset: usize,
1448 _object_count: usize,
1449) -> Result<EwahBitmap> {
1450 if checksum_offset.saturating_sub(*offset) < 12 {
1451 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
1452 }
1453 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
1454 *offset += 4;
1455 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
1456 *offset += 4;
1457 let words_len = word_count
1458 .checked_mul(8)
1459 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
1460 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
1461 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
1462 }
1463 let mut words = Vec::with_capacity(word_count);
1464 for _ in 0..word_count {
1465 words.push(u64_be(&bytes[*offset..*offset + 8]));
1466 *offset += 8;
1467 }
1468 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
1469 *offset += 4;
1470 validate_ewah_words(bit_size, &words, rlw_position)?;
1471 Ok(EwahBitmap {
1472 bit_size,
1473 words,
1474 rlw_position,
1475 })
1476}
1477
1478fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
1479 if words.is_empty() {
1480 if rlw_position != 0 || bit_size != 0 {
1481 return Err(GitError::InvalidFormat(
1482 "EWAH bitmap has invalid empty RLW".into(),
1483 ));
1484 }
1485 return Ok(());
1486 }
1487 if rlw_position as usize >= words.len() {
1488 return Err(GitError::InvalidFormat(
1489 "EWAH RLW position points past word table".into(),
1490 ));
1491 }
1492 let mut word_idx = 0usize;
1493 let mut decoded_words = 0u64;
1494 while word_idx < words.len() {
1495 let rlw = words[word_idx];
1496 let run_words = (rlw >> 1) & 0xffff_ffff;
1497 let literal_words = (rlw >> 33) as usize;
1498 word_idx += 1;
1499 word_idx = word_idx
1500 .checked_add(literal_words)
1501 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
1502 if word_idx > words.len() {
1503 return Err(GitError::InvalidFormat(
1504 "EWAH literal words extend past word table".into(),
1505 ));
1506 }
1507 decoded_words = decoded_words
1508 .checked_add(run_words)
1509 .and_then(|value| value.checked_add(literal_words as u64))
1510 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
1511 }
1512 let decoded_bits = decoded_words
1513 .checked_mul(64)
1514 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
1515 if decoded_bits < u64::from(bit_size) {
1516 return Err(GitError::InvalidFormat(
1517 "EWAH bitmap decodes fewer bits than declared".into(),
1518 ));
1519 }
1520 Ok(())
1521}
1522
1523impl MultiPackIndex {
1524 pub fn write(
1525 format: ObjectFormat,
1526 version: u8,
1527 pack_names: &[String],
1528 objects: &[MultiPackIndexEntry],
1529 ) -> Result<Vec<u8>> {
1530 Self::write_with_reverse_index(format, version, pack_names, objects, None)
1531 }
1532
1533 pub fn write_with_reverse_index(
1542 format: ObjectFormat,
1543 version: u8,
1544 pack_names: &[String],
1545 objects: &[MultiPackIndexEntry],
1546 preferred_pack: Option<u32>,
1547 ) -> Result<Vec<u8>> {
1548 if let Some(preferred) = preferred_pack
1549 && preferred as usize >= pack_names.len()
1550 {
1551 return Err(GitError::InvalidFormat(format!(
1552 "preferred pack {preferred} out of range for {} packs",
1553 pack_names.len()
1554 )));
1555 }
1556 if version != 1 && version != 2 {
1557 return Err(GitError::Unsupported(format!(
1558 "multi-pack-index version {version}"
1559 )));
1560 }
1561 if pack_names.len() > u32::MAX as usize {
1562 return Err(GitError::InvalidFormat(
1563 "too many multi-pack-index packs".into(),
1564 ));
1565 }
1566 if objects.len() > u32::MAX as usize {
1567 return Err(GitError::InvalidFormat(
1568 "too many multi-pack-index objects".into(),
1569 ));
1570 }
1571 validate_midx_pack_names(pack_names)?;
1572 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
1573 return Err(GitError::InvalidFormat(
1574 "multi-pack-index v1 pack names must be sorted".into(),
1575 ));
1576 }
1577
1578 let mut objects = objects.iter().collect::<Vec<_>>();
1579 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1580 let mut previous_oid: Option<&ObjectId> = None;
1581 for object in &objects {
1582 if object.oid.format() != format {
1583 return Err(GitError::InvalidObjectId(
1584 "multi-pack-index object format does not match index format".into(),
1585 ));
1586 }
1587 if let Some(previous) = previous_oid
1588 && previous.as_bytes() == object.oid.as_bytes()
1589 {
1590 return Err(GitError::InvalidFormat(
1591 "multi-pack-index contains duplicate object ids".into(),
1592 ));
1593 }
1594 if object.pack_int_id as usize >= pack_names.len() {
1595 return Err(GitError::InvalidFormat(
1596 "multi-pack-index object points past pack table".into(),
1597 ));
1598 }
1599 previous_oid = Some(&object.oid);
1600 }
1601
1602 let mut large_offsets = Vec::new();
1603 let mut chunks = vec![
1604 (*b"PNAM", write_midx_pack_names(pack_names)),
1605 (*b"OIDF", write_midx_oid_fanout(&objects)?),
1606 (*b"OIDL", write_midx_oid_lookup(&objects)),
1607 (
1608 *b"OOFF",
1609 write_midx_object_offsets(&objects, &mut large_offsets)?,
1610 ),
1611 ];
1612 if !large_offsets.is_empty() {
1613 chunks.push((*b"LOFF", large_offsets));
1614 }
1615 if let Some(preferred) = preferred_pack {
1616 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
1619 pseudo.sort_by_key(|&midx_pos| {
1620 let object = objects[midx_pos as usize];
1621 (
1622 object.pack_int_id != preferred,
1623 object.pack_int_id,
1624 object.offset,
1625 )
1626 });
1627 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
1628 for midx_pos in pseudo {
1629 ridx.extend_from_slice(&midx_pos.to_be_bytes());
1630 }
1631 chunks.push((*b"RIDX", ridx));
1632 }
1633 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
1634 }
1635
1636 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1637 let hash_len = format.raw_len();
1638 if bytes.len() < 12 + 12 + hash_len {
1639 return Err(GitError::InvalidFormat(
1640 "multi-pack-index file too short".into(),
1641 ));
1642 }
1643 if &bytes[..4] != b"MIDX" {
1644 return Err(GitError::InvalidFormat(
1645 "missing multi-pack-index signature".into(),
1646 ));
1647 }
1648 let version = bytes[4];
1649 if version != 1 && version != 2 {
1650 return Err(GitError::Unsupported(format!(
1651 "multi-pack-index version {version}"
1652 )));
1653 }
1654 let hash_id = bytes[5];
1655 if u32::from(hash_id) != hash_function_id(format) {
1656 return Err(GitError::InvalidFormat(format!(
1657 "multi-pack-index hash id {hash_id} does not match {}",
1658 format.name()
1659 )));
1660 }
1661 let chunk_count = bytes[6] as usize;
1662 let base_midx_count = bytes[7];
1663 if base_midx_count != 0 {
1664 return Err(GitError::Unsupported(format!(
1665 "multi-pack-index base count {base_midx_count}"
1666 )));
1667 }
1668 let pack_count = u32_be(&bytes[8..12]);
1669 let lookup_len = (chunk_count + 1)
1670 .checked_mul(12)
1671 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
1672 let data_start = 12usize
1673 .checked_add(lookup_len)
1674 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
1675 let checksum_offset = bytes.len() - hash_len;
1676 if data_start > checksum_offset {
1677 return Err(GitError::InvalidFormat(
1678 "truncated multi-pack-index chunk lookup".into(),
1679 ));
1680 }
1681
1682 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1683 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1684 if actual_checksum != checksum {
1685 return Err(GitError::InvalidFormat(format!(
1686 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
1687 )));
1688 }
1689
1690 let mut entries = Vec::with_capacity(chunk_count + 1);
1691 let mut offset = 12usize;
1692 for _ in 0..=chunk_count {
1693 let id = [
1694 bytes[offset],
1695 bytes[offset + 1],
1696 bytes[offset + 2],
1697 bytes[offset + 3],
1698 ];
1699 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
1700 entries.push((id, chunk_offset));
1701 offset += 12;
1702 }
1703 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
1704 return Err(GitError::InvalidFormat(
1705 "multi-pack-index chunk lookup is empty".into(),
1706 ));
1707 };
1708 if terminator_id != [0, 0, 0, 0] {
1709 return Err(GitError::InvalidFormat(
1710 "multi-pack-index chunk lookup missing terminator".into(),
1711 ));
1712 }
1713 if terminator_offset != checksum_offset as u64 {
1714 return Err(GitError::InvalidFormat(
1715 "multi-pack-index terminator does not point at checksum".into(),
1716 ));
1717 }
1718
1719 let mut chunks = Vec::with_capacity(chunk_count);
1720 let mut previous_offset = data_start as u64;
1721 for pair in entries.windows(2) {
1722 let (id, chunk_offset) = pair[0];
1723 let (_next_id, next_offset) = pair[1];
1724 if id == [0, 0, 0, 0] {
1725 return Err(GitError::InvalidFormat(
1726 "multi-pack-index chunk id is zero before terminator".into(),
1727 ));
1728 }
1729 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
1730 return Err(GitError::InvalidFormat(
1731 "multi-pack-index chunk offsets are not monotonic".into(),
1732 ));
1733 }
1734 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
1735 return Err(GitError::InvalidFormat(
1736 "multi-pack-index chunk length is invalid".into(),
1737 ));
1738 }
1739 chunks.push(MultiPackIndexChunk {
1740 id,
1741 offset: chunk_offset,
1742 len: next_offset - chunk_offset,
1743 });
1744 previous_offset = chunk_offset;
1745 }
1746
1747 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
1748 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
1749 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
1750 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
1751 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
1752 let bitmapped_packs =
1753 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
1754
1755 Ok(Self {
1756 version,
1757 format,
1758 pack_count,
1759 pack_names,
1760 object_count: object_count as u32,
1761 fanout,
1762 objects,
1763 reverse_index,
1764 bitmapped_packs,
1765 chunks,
1766 checksum,
1767 })
1768 }
1769
1770 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
1771 self.objects
1772 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1773 .ok()
1774 .map(|idx| &self.objects[idx])
1775 }
1776}
1777
1778fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
1779 for name in pack_names {
1780 if name.is_empty() {
1781 return Err(GitError::InvalidFormat(
1782 "multi-pack-index pack name is empty".into(),
1783 ));
1784 }
1785 if name
1786 .bytes()
1787 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
1788 {
1789 return Err(GitError::InvalidFormat(
1790 "multi-pack-index pack name contains an invalid byte".into(),
1791 ));
1792 }
1793 }
1794 Ok(())
1795}
1796
1797fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
1798 let mut out = Vec::new();
1799 for name in pack_names {
1800 out.extend_from_slice(name.as_bytes());
1801 out.push(0);
1802 }
1803 while out.len() % 4 != 0 {
1804 out.push(0);
1805 }
1806 out
1807}
1808
1809fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
1810 let mut counts = [0u32; 256];
1811 for object in objects {
1812 let first = object.oid.as_bytes()[0] as usize;
1813 counts[first] = counts[first]
1814 .checked_add(1)
1815 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
1816 }
1817 let mut running = 0u32;
1818 let mut out = Vec::with_capacity(256 * 4);
1819 for count in counts {
1820 running = running
1821 .checked_add(count)
1822 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
1823 out.extend_from_slice(&running.to_be_bytes());
1824 }
1825 Ok(out)
1826}
1827
1828fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
1829 let mut out = Vec::new();
1830 for object in objects {
1831 out.extend_from_slice(object.oid.as_bytes());
1832 }
1833 out
1834}
1835
1836fn write_midx_object_offsets(
1837 objects: &[&MultiPackIndexEntry],
1838 large_offsets: &mut Vec<u8>,
1839) -> Result<Vec<u8>> {
1840 let mut out = Vec::new();
1841 for object in objects {
1842 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
1843 if object.offset < 0x8000_0000 {
1844 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
1845 } else {
1846 let large_idx = large_offsets.len() / 8;
1847 if large_idx > 0x7fff_ffff {
1848 return Err(GitError::InvalidFormat(
1849 "too many multi-pack-index large offsets".into(),
1850 ));
1851 }
1852 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
1853 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
1854 }
1855 }
1856 Ok(out)
1857}
1858
1859fn write_multi_pack_index_chunks(
1860 format: ObjectFormat,
1861 version: u8,
1862 pack_count: u32,
1863 chunks: &[([u8; 4], Vec<u8>)],
1864) -> Result<Vec<u8>> {
1865 if chunks.len() > u8::MAX as usize {
1866 return Err(GitError::InvalidFormat(
1867 "too many multi-pack-index chunks".into(),
1868 ));
1869 }
1870 let lookup_len = (chunks.len() + 1)
1871 .checked_mul(12)
1872 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
1873 let mut out = Vec::new();
1874 out.extend_from_slice(b"MIDX");
1875 out.push(version);
1876 out.push(hash_function_id(format) as u8);
1877 out.push(chunks.len() as u8);
1878 out.push(0);
1879 out.extend_from_slice(&pack_count.to_be_bytes());
1880 let mut chunk_offset = (12usize)
1881 .checked_add(lookup_len)
1882 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
1883 as u64;
1884 for (id, data) in chunks {
1885 out.extend_from_slice(id);
1886 out.extend_from_slice(&chunk_offset.to_be_bytes());
1887 chunk_offset = chunk_offset
1888 .checked_add(data.len() as u64)
1889 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
1890 }
1891 out.extend_from_slice(&[0, 0, 0, 0]);
1892 out.extend_from_slice(&chunk_offset.to_be_bytes());
1893 for (_id, data) in chunks {
1894 out.extend_from_slice(data);
1895 }
1896 let checksum = sley_core::digest_bytes(format, &out)?;
1897 out.extend_from_slice(checksum.as_bytes());
1898 Ok(out)
1899}
1900
1901#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1902struct EntryHeader {
1903 kind: PackObjectKind,
1904 size: u64,
1905}
1906
1907pub trait PackDeltaCache {
1921 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
1923 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
1925}
1926
1927struct NoopDeltaCache;
1930
1931impl PackDeltaCache for NoopDeltaCache {
1932 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
1933 None
1934 }
1935 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
1936}
1937
1938thread_local! {
1944 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
1945}
1946
1947const MAX_INFLATE_EXPANSION: usize = 1032;
1958
1959const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
1965
1966fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
1974 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
1975 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
1977}
1978
1979fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
1988 INFLATE.with(|cell| {
1989 let mut decompress = cell.borrow_mut();
1990 decompress.reset(true);
1991 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
1992 let mut input = compressed;
1993 let mut consumed_total = 0usize;
1994 loop {
1995 if out.len() == out.capacity() {
1998 out.reserve(out.len().max(64));
1999 }
2000 let before_in = decompress.total_in();
2001 let before_out = decompress.total_out();
2002 let status = decompress
2003 .decompress_vec(input, out, flate2::FlushDecompress::None)
2004 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2005 let consumed = (decompress.total_in() - before_in) as usize;
2006 let produced = decompress.total_out() - before_out;
2007 input = &input[consumed..];
2008 consumed_total += consumed;
2009 match status {
2010 flate2::Status::StreamEnd => return Ok(consumed_total),
2011 _ if consumed == 0 && produced == 0 => {
2012 return Err(GitError::InvalidObject("truncated zlib stream".into()));
2013 }
2014 _ => {}
2015 }
2016 }
2017 })
2018}
2019
2020fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
2024 INFLATE.with(|cell| {
2025 let mut decompress = cell.borrow_mut();
2026 decompress.reset(true);
2027 out.reserve(max_out.max(16));
2028 let mut input = compressed;
2029 while out.len() < max_out {
2030 if out.len() == out.capacity() {
2031 out.reserve(out.len().max(16));
2032 }
2033 let before_in = decompress.total_in();
2034 let before_out = decompress.total_out();
2035 let status = decompress
2036 .decompress_vec(input, out, flate2::FlushDecompress::None)
2037 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2038 let consumed = (decompress.total_in() - before_in) as usize;
2039 let produced = decompress.total_out() - before_out;
2040 input = &input[consumed..];
2041 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
2042 break;
2043 }
2044 }
2045 Ok(())
2046 })
2047}
2048
2049pub fn read_object_at_arc<F>(
2057 pack_bytes: &[u8],
2058 offset: u64,
2059 format: ObjectFormat,
2060 resolve_ref_base: F,
2061) -> Result<Arc<EncodedObject>>
2062where
2063 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2064{
2065 read_object_at_with_cache_arc(
2066 pack_bytes,
2067 offset,
2068 format,
2069 resolve_ref_base,
2070 &NoopDeltaCache,
2071 )
2072}
2073
2074pub fn read_object_at_with_cache_arc<F, C>(
2083 pack_bytes: &[u8],
2084 offset: u64,
2085 format: ObjectFormat,
2086 mut resolve_ref_base: F,
2087 cache: &C,
2088) -> Result<Arc<EncodedObject>>
2089where
2090 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2091 C: PackDeltaCache + ?Sized,
2092{
2093 read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
2094}
2095
2096fn read_object_at_inner<F, C>(
2097 pack_bytes: &[u8],
2098 offset: u64,
2099 format: ObjectFormat,
2100 resolve_ref_base: &mut F,
2101 cache: &C,
2102) -> Result<Arc<EncodedObject>>
2103where
2104 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2105 C: PackDeltaCache + ?Sized,
2106{
2107 if let Some(object) = cache.get(offset) {
2110 return Ok(object);
2111 }
2112 let trailer_offset = pack_bytes
2113 .len()
2114 .checked_sub(format.raw_len())
2115 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
2116 let mut cursor = usize::try_from(offset)
2117 .ok()
2118 .filter(|&value| value < trailer_offset)
2119 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
2120 let header = parse_entry_header(pack_bytes, &mut cursor)?;
2121 let base = match header.kind {
2122 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
2123 pack_bytes,
2124 &mut cursor,
2125 offset,
2126 )?)),
2127 PackObjectKind::RefDelta => {
2128 let hash_len = format.raw_len();
2129 if cursor + hash_len > trailer_offset {
2130 return Err(GitError::InvalidFormat(
2131 "truncated ref-delta base object id".into(),
2132 ));
2133 }
2134 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
2135 cursor += hash_len;
2136 Some(DeltaBase::Ref(oid))
2137 }
2138 _ => None,
2139 };
2140 let mut body = Vec::new();
2141 inflate_into(
2142 &pack_bytes[cursor..trailer_offset],
2143 &mut body,
2144 header.size.min(usize::MAX as u64) as usize,
2145 )?;
2146 if body.len() as u64 != header.size {
2147 return Err(GitError::InvalidObject(format!(
2148 "pack object declared {} bytes, decoded {}",
2149 header.size,
2150 body.len()
2151 )));
2152 }
2153 let object = match base {
2154 None => {
2155 let object_type = match header.kind {
2156 PackObjectKind::Commit => ObjectType::Commit,
2157 PackObjectKind::Tree => ObjectType::Tree,
2158 PackObjectKind::Blob => ObjectType::Blob,
2159 PackObjectKind::Tag => ObjectType::Tag,
2160 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
2161 return Err(GitError::InvalidFormat(
2162 "delta pack entry decoded without a base".into(),
2163 ));
2164 }
2165 };
2166 Arc::new(EncodedObject::new(object_type, body))
2167 }
2168 Some(DeltaBase::Offset(base_offset)) => {
2169 let base =
2170 read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
2171 let resolved = apply_pack_delta(&base.body, &body)?;
2172 Arc::new(EncodedObject::new(base.object_type, resolved))
2173 }
2174 Some(DeltaBase::Ref(base_oid)) => {
2175 let base = resolve_ref_base(&base_oid)?
2176 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
2177 let resolved = apply_pack_delta(&base.body, &body)?;
2178 Arc::new(EncodedObject::new(base.object_type, resolved))
2179 }
2180 };
2181 cache.insert(offset, Arc::clone(&object));
2185 Ok(object)
2186}
2187
2188pub fn read_object_header_at<F>(
2198 pack_bytes: &[u8],
2199 offset: u64,
2200 format: ObjectFormat,
2201 mut resolve_ref_base_type: F,
2202) -> Result<(ObjectType, u64)>
2203where
2204 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2205{
2206 read_object_header_at_inner(
2207 pack_bytes,
2208 offset,
2209 format,
2210 &mut resolve_ref_base_type,
2211 &mut NoopHeaderTypeCache,
2212 )
2213}
2214
2215pub trait HeaderTypeCache {
2232 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
2234 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
2236}
2237
2238struct NoopHeaderTypeCache;
2239
2240impl HeaderTypeCache for NoopHeaderTypeCache {
2241 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
2242 None
2243 }
2244 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
2245}
2246
2247pub fn read_object_header_at_with_cache<F, C>(
2253 pack_bytes: &[u8],
2254 offset: u64,
2255 format: ObjectFormat,
2256 mut resolve_ref_base_type: F,
2257 type_cache: &mut C,
2258) -> Result<(ObjectType, u64)>
2259where
2260 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2261 C: HeaderTypeCache + ?Sized,
2262{
2263 if let Some(header) = type_cache.get(offset) {
2264 return Ok(header);
2265 }
2266 read_object_header_at_inner(
2267 pack_bytes,
2268 offset,
2269 format,
2270 &mut resolve_ref_base_type,
2271 type_cache,
2272 )
2273}
2274
2275fn read_object_header_at_inner<F, C>(
2276 pack_bytes: &[u8],
2277 offset: u64,
2278 format: ObjectFormat,
2279 resolve_ref_base_type: &mut F,
2280 type_cache: &mut C,
2281) -> Result<(ObjectType, u64)>
2282where
2283 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2284 C: HeaderTypeCache + ?Sized,
2285{
2286 let trailer_offset = pack_bytes
2287 .len()
2288 .checked_sub(format.raw_len())
2289 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
2290 let mut cursor = usize::try_from(offset)
2291 .ok()
2292 .filter(|&value| value < trailer_offset)
2293 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
2294 let header = parse_entry_header(pack_bytes, &mut cursor)?;
2295 let resolved = match header.kind {
2296 PackObjectKind::Commit => (ObjectType::Commit, header.size),
2297 PackObjectKind::Tree => (ObjectType::Tree, header.size),
2298 PackObjectKind::Blob => (ObjectType::Blob, header.size),
2299 PackObjectKind::Tag => (ObjectType::Tag, header.size),
2300 PackObjectKind::OfsDelta => {
2301 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
2302 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
2303 let base_type = match type_cache.get(base_offset) {
2306 Some((base_type, _)) => base_type,
2307 None => {
2308 let (base_type, _) = read_object_header_at_inner(
2309 pack_bytes,
2310 base_offset,
2311 format,
2312 resolve_ref_base_type,
2313 type_cache,
2314 )?;
2315 base_type
2316 }
2317 };
2318 (base_type, size)
2319 }
2320 PackObjectKind::RefDelta => {
2321 let hash_len = format.raw_len();
2322 if cursor + hash_len > trailer_offset {
2323 return Err(GitError::InvalidFormat(
2324 "truncated ref-delta base object id".into(),
2325 ));
2326 }
2327 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
2328 cursor += hash_len;
2329 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
2330 let base_type = resolve_ref_base_type(&oid)?
2331 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
2332 (base_type, size)
2333 }
2334 };
2335 type_cache.put(offset, resolved);
2338 Ok(resolved)
2339}
2340
2341const DELTA_HEADER_PREFIX_LEN: usize = 32;
2345
2346fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
2349 let mut prefix = Vec::new();
2350 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
2351 decoded_delta_result_size(&prefix)
2352}
2353
2354fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
2355 let first = next_byte(bytes, offset)?;
2356 let mut size = u64::from(first & 0x0f);
2357 let kind = match (first >> 4) & 0x07 {
2358 1 => PackObjectKind::Commit,
2359 2 => PackObjectKind::Tree,
2360 3 => PackObjectKind::Blob,
2361 4 => PackObjectKind::Tag,
2362 6 => PackObjectKind::OfsDelta,
2363 7 => PackObjectKind::RefDelta,
2364 other => {
2365 return Err(GitError::InvalidFormat(format!(
2366 "invalid pack object type {other}"
2367 )));
2368 }
2369 };
2370 let mut shift = 4;
2371 let mut byte = first;
2372 while byte & 0x80 != 0 {
2373 byte = next_byte(bytes, offset)?;
2374 let part = u64::from(byte & 0x7f);
2375 size = size
2376 .checked_add(
2377 part.checked_shl(shift)
2378 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
2379 )
2380 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
2381 shift += 7;
2382 }
2383 Ok(EntryHeader { kind, size })
2384}
2385
2386fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
2387 let mut byte = next_byte(bytes, offset)?;
2388 let mut relative = u64::from(byte & 0x7f);
2389 while byte & 0x80 != 0 {
2390 byte = next_byte(bytes, offset)?;
2391 relative = relative
2392 .checked_add(1)
2393 .and_then(|value| value.checked_shl(7))
2394 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2395 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2396 }
2397 entry_offset
2398 .checked_sub(relative)
2399 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2400}
2401
2402fn resolve_pack_entries<F>(
2403 parsed: Vec<ParsedPackEntry>,
2404 format: ObjectFormat,
2405 external_base: &mut F,
2406) -> Result<Vec<PackObject>>
2407where
2408 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
2409{
2410 let mut offset_to_index = HashMap::with_capacity(parsed.len());
2411 for (idx, entry) in parsed.iter().enumerate() {
2412 offset_to_index.insert(parsed_entry_offset(entry), idx);
2413 }
2414
2415 let mut resolved = vec![None; parsed.len()];
2416 let mut oid_to_index = HashMap::new();
2417 let mut unresolved = 0usize;
2418 for (idx, entry) in parsed.iter().enumerate() {
2419 match entry {
2420 ParsedPackEntry::Resolved(object) => {
2421 oid_to_index.insert(object.entry.oid, idx);
2422 resolved[idx] = Some(object.clone());
2423 }
2424 ParsedPackEntry::Delta { .. } => unresolved += 1,
2425 }
2426 }
2427
2428 while unresolved != 0 {
2429 let mut progress = false;
2430 for idx in 0..parsed.len() {
2431 if resolved[idx].is_some() {
2432 continue;
2433 }
2434 let ParsedPackEntry::Delta {
2435 base,
2436 compressed_size,
2437 delta_size,
2438 offset,
2439 delta,
2440 } = &parsed[idx]
2441 else {
2442 continue;
2443 };
2444 let Some(base_object) = delta_base_object(
2445 base,
2446 &offset_to_index,
2447 &oid_to_index,
2448 &resolved,
2449 external_base,
2450 )?
2451 else {
2452 continue;
2453 };
2454 let body = apply_pack_delta(base_object.body(), delta)?;
2455 let object = EncodedObject::new(base_object.object_type(), body);
2456 let oid = object.object_id(format)?;
2457 let pack_object = PackObject {
2458 entry: PackEntry {
2459 oid,
2460 compressed_size: *compressed_size,
2461 uncompressed_size: object.body.len() as u64,
2462 offset: *offset,
2463 },
2464 object,
2465 };
2466 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
2467 return Err(GitError::InvalidObject(
2468 "resolved delta size does not match delta header".into(),
2469 ));
2470 }
2471 if *delta_size != delta.len() as u64 {
2472 return Err(GitError::InvalidObject(format!(
2473 "pack delta declared {delta_size} bytes, decoded {}",
2474 delta.len()
2475 )));
2476 }
2477 oid_to_index.insert(oid, idx);
2478 resolved[idx] = Some(pack_object);
2479 unresolved -= 1;
2480 progress = true;
2481 }
2482 if !progress {
2483 return Err(GitError::Unsupported("unresolved delta base".into()));
2484 }
2485 }
2486
2487 resolved
2488 .into_iter()
2489 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
2490 .collect()
2491}
2492
2493fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
2494 match entry {
2495 ParsedPackEntry::Resolved(object) => object.entry.offset,
2496 ParsedPackEntry::Delta { offset, .. } => *offset,
2497 }
2498}
2499
2500enum DeltaBaseObject<'a> {
2501 Borrowed(&'a EncodedObject),
2502 Owned(EncodedObject),
2503}
2504
2505impl DeltaBaseObject<'_> {
2506 fn object_type(&self) -> ObjectType {
2507 match self {
2508 Self::Borrowed(object) => object.object_type,
2509 Self::Owned(object) => object.object_type,
2510 }
2511 }
2512
2513 fn body(&self) -> &[u8] {
2514 match self {
2515 Self::Borrowed(object) => &object.body,
2516 Self::Owned(object) => &object.body,
2517 }
2518 }
2519}
2520
2521fn delta_base_object<'a, F>(
2522 base: &DeltaBase,
2523 offset_to_index: &HashMap<u64, usize>,
2524 oid_to_index: &HashMap<ObjectId, usize>,
2525 resolved: &'a [Option<PackObject>],
2526 external_base: &mut F,
2527) -> Result<Option<DeltaBaseObject<'a>>>
2528where
2529 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
2530{
2531 match base {
2532 DeltaBase::Offset(offset) => {
2533 let Some(index) = offset_to_index.get(offset).copied() else {
2534 return Err(GitError::InvalidFormat(format!(
2535 "ofs-delta base offset {offset} not found"
2536 )));
2537 };
2538 Ok(resolved[index]
2539 .as_ref()
2540 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
2541 }
2542 DeltaBase::Ref(oid) => {
2543 if let Some(index) = oid_to_index.get(oid).copied() {
2544 return Ok(resolved[index]
2545 .as_ref()
2546 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
2547 }
2548 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
2549 }
2550 }
2551}
2552
2553fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
2554 let mut cursor = 0usize;
2555 let base_size = read_delta_varint(delta, &mut cursor)?;
2556 if base_size != base.len() as u64 {
2557 return Err(GitError::InvalidObject(format!(
2558 "delta base size mismatch: expected {base_size}, got {}",
2559 base.len()
2560 )));
2561 }
2562 let result_size = read_delta_varint(delta, &mut cursor)?;
2563 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
2572 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
2573 while cursor < delta.len() {
2574 let command = delta[cursor];
2575 cursor += 1;
2576 if command & 0x80 != 0 {
2577 let copy_offset =
2578 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
2579 let mut copy_size =
2580 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
2581 if copy_size == 0 {
2582 copy_size = 0x10000;
2583 }
2584 let start = usize::try_from(copy_offset)
2585 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
2586 let len = usize::try_from(copy_size)
2587 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
2588 let end = start
2589 .checked_add(len)
2590 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
2591 let Some(slice) = base.get(start..end) else {
2592 return Err(GitError::InvalidObject(
2593 "delta copy range exceeds base object".into(),
2594 ));
2595 };
2596 result.extend_from_slice(slice);
2597 } else if command != 0 {
2598 let len = usize::from(command);
2599 let end = cursor
2600 .checked_add(len)
2601 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
2602 let Some(slice) = delta.get(cursor..end) else {
2603 return Err(GitError::InvalidObject(
2604 "delta insert range exceeds delta data".into(),
2605 ));
2606 };
2607 result.extend_from_slice(slice);
2608 cursor = end;
2609 } else {
2610 return Err(GitError::InvalidObject(
2611 "delta contains reserved zero command".into(),
2612 ));
2613 }
2614 }
2615 if result.len() as u64 != result_size {
2616 return Err(GitError::InvalidObject(format!(
2617 "delta result size mismatch: expected {result_size}, got {}",
2618 result.len()
2619 )));
2620 }
2621 Ok(result)
2622}
2623
2624fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
2625 let mut cursor = 0usize;
2626 let _ = read_delta_varint(delta, &mut cursor)?;
2627 read_delta_varint(delta, &mut cursor)
2628}
2629
2630const DELTA_BLOCK_SIZE: usize = 16;
2633
2634const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
2639
2640const DELTA_BUCKET_BITS: usize = 12;
2643const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
2644const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
2645
2646struct DeltaIndex<'a> {
2653 base: &'a [u8],
2654 blocks: Vec<DeltaBlock>,
2655 buckets: Vec<usize>,
2656}
2657
2658#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2659struct DeltaBlock {
2660 hash: u32,
2661 offset: usize,
2662}
2663
2664impl<'a> DeltaIndex<'a> {
2665 fn new(base: &'a [u8]) -> Self {
2666 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
2667 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
2668 for_each_delta_anchor(base.len(), |offset| {
2669 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
2670 buckets[delta_bucket(hash) + 1] += 1;
2671 anchors.push(DeltaBlock { hash, offset });
2672 });
2673 for idx in 1..buckets.len() {
2674 buckets[idx] += buckets[idx - 1];
2675 }
2676
2677 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
2678 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
2679 for anchor in anchors {
2680 let bucket = delta_bucket(anchor.hash);
2681 let next = &mut next_offsets[bucket];
2682 blocks[*next] = anchor;
2683 *next += 1;
2684 }
2685
2686 Self {
2687 base,
2688 blocks,
2689 buckets,
2690 }
2691 }
2692
2693 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
2694 let bucket = delta_bucket(hash);
2695 let start = self.buckets[bucket];
2696 let end = self.buckets[bucket + 1];
2697 self.blocks[start..end]
2698 .iter()
2699 .filter(move |block| block.hash == hash)
2700 }
2701
2702 fn has_hash(&self, hash: u32) -> bool {
2703 self.candidate_blocks(hash).next().is_some()
2704 }
2705
2706 fn has_shared_anchor(&self, target: &[u8]) -> bool {
2707 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
2708 return false;
2709 }
2710 let last = target.len() - DELTA_BLOCK_SIZE;
2711 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
2712 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
2713 if self.has_hash(hash) {
2714 return true;
2715 }
2716 }
2717 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
2718 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
2719 if self.has_hash(hash) {
2720 return true;
2721 }
2722 }
2723 false
2724 }
2725
2726 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
2728 if !self.has_shared_anchor(target) {
2729 return None;
2730 }
2731 let base = self.base;
2732 let mut delta = Vec::new();
2733 write_delta_varint(&mut delta, base.len() as u64);
2734 write_delta_varint(&mut delta, target.len() as u64);
2735
2736 let mut pending_insert_start = 0usize;
2737 let mut pos = 0usize;
2738 while pos < target.len() {
2739 let mut best_len = 0usize;
2740 let mut best_offset = 0usize;
2741 if pos + DELTA_BLOCK_SIZE <= target.len() {
2742 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
2743 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
2744 let candidate = candidate.offset;
2747 let max_len = (base.len() - candidate).min(target.len() - pos);
2748 let mut len = 0usize;
2749 while len < max_len && base[candidate + len] == target[pos + len] {
2750 len += 1;
2751 }
2752 if len > best_len {
2753 best_len = len;
2754 best_offset = candidate;
2755 }
2756 }
2757 }
2758
2759 if best_len >= DELTA_BLOCK_SIZE {
2760 if pending_insert_start < pos {
2761 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
2762 }
2763 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
2764 pos += best_len;
2765 pending_insert_start = pos;
2766 } else {
2767 pos += 1;
2768 }
2769 }
2770 if pending_insert_start < target.len() {
2771 write_delta_insert(&mut delta, &target[pending_insert_start..]);
2772 }
2773 Some(delta)
2774 }
2775}
2776
2777fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
2778 if len < DELTA_BLOCK_SIZE {
2779 return;
2780 }
2781 len -= DELTA_BLOCK_SIZE;
2782 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
2783 visit(offset);
2784 }
2785 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
2786 visit(len);
2787 }
2788}
2789
2790fn delta_anchor_count(len: usize) -> usize {
2791 if len < DELTA_BLOCK_SIZE {
2792 return 0;
2793 }
2794 let last = len - DELTA_BLOCK_SIZE;
2795 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
2796}
2797
2798fn delta_bucket(hash: u32) -> usize {
2799 (hash as usize) & DELTA_BUCKET_MASK
2800}
2801
2802const DELTA_MAX_CHAIN: usize = 64;
2805
2806fn block_hash(block: &[u8]) -> u32 {
2812 let mut hash = 0u32;
2813 for &byte in block {
2814 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
2815 }
2816 hash
2817}
2818
2819#[derive(Debug, Clone, PartialEq, Eq)]
2821enum PlannedBase {
2822 None,
2824 InPack { base_idx: usize, delta: Vec<u8> },
2828 External { base_oid: ObjectId, delta: Vec<u8> },
2830}
2831
2832#[derive(Debug, Clone, PartialEq, Eq)]
2833struct PlannedEntry {
2834 base: PlannedBase,
2835}
2836
2837fn compress_planned_payloads(
2838 objects: &[&EncodedObject],
2839 plan: &[PlannedEntry],
2840 order: &[usize],
2841) -> Result<Vec<Vec<u8>>> {
2842 if order.is_empty() {
2843 return Ok(Vec::new());
2844 }
2845
2846 let worker_count = std::thread::available_parallelism()
2847 .map(|threads| threads.get())
2848 .unwrap_or(1)
2849 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
2850 .min(order.len());
2851 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
2852 let mut payloads = Vec::with_capacity(order.len());
2853 for &idx in order {
2854 payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
2855 }
2856 return Ok(payloads);
2857 }
2858
2859 let chunk_len = order.len().div_ceil(worker_count);
2860 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
2861 std::thread::scope(|scope| {
2862 let mut handles = Vec::new();
2863 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
2864 let chunk_start = chunk_idx * chunk_len;
2865 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
2866 let mut chunk_payloads = Vec::with_capacity(chunk.len());
2867 for (offset, &idx) in chunk.iter().enumerate() {
2868 chunk_payloads.push((
2869 chunk_start + offset,
2870 compressed_payload(planned_payload(objects, plan, idx))?,
2871 ));
2872 }
2873 Ok(chunk_payloads)
2874 }));
2875 }
2876
2877 let mut first_error = None;
2878 for handle in handles {
2879 match handle.join() {
2880 Ok(Ok(chunk_payloads)) => {
2881 if first_error.is_none() {
2882 for (pos, payload) in chunk_payloads {
2883 payloads[pos] = payload;
2884 }
2885 }
2886 }
2887 Ok(Err(err)) => {
2888 first_error.get_or_insert(err);
2889 }
2890 Err(_) => {
2891 first_error.get_or_insert_with(|| {
2892 GitError::InvalidObject("pack compression worker panicked".into())
2893 });
2894 }
2895 }
2896 }
2897
2898 match first_error {
2899 Some(err) => Err(err),
2900 None => Ok(()),
2901 }
2902 })?;
2903 Ok(payloads)
2904}
2905
2906fn planned_payload<'a>(
2907 objects: &'a [&'a EncodedObject],
2908 plan: &'a [PlannedEntry],
2909 idx: usize,
2910) -> &'a [u8] {
2911 match &plan[idx].base {
2912 PlannedBase::None => &objects[idx].body,
2913 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
2914 }
2915}
2916
2917fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
2918 let mut out = Vec::new();
2919 write_compressed_payload(&mut out, body)?;
2920 Ok(out)
2921}
2922
2923const DELTA_MAX_EXTERNAL_BASES: usize = 64;
2926
2927struct DeltaWindowEntry<'a> {
2928 idx: usize,
2929 index: DeltaIndex<'a>,
2930}
2931
2932fn delta_type_rank(object_type: ObjectType) -> u8 {
2935 match object_type {
2936 ObjectType::Commit => 0,
2937 ObjectType::Tree => 1,
2938 ObjectType::Blob => 2,
2939 ObjectType::Tag => 3,
2940 }
2941}
2942
2943fn plan_pack_deltas(
2973 objects: &[&EncodedObject],
2974 object_ids: &[ObjectId],
2975 options: &PackWriteOptions,
2976) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
2977 let count = objects.len();
2978 let mut plan: Vec<PlannedEntry> = (0..count)
2979 .map(|_| PlannedEntry {
2980 base: PlannedBase::None,
2981 })
2982 .collect();
2983
2984 let mut order: Vec<usize> = (0..count).collect();
2988 if options.reorder && options.depth > 0 {
2989 order.sort_by(|&left, &right| {
2990 delta_type_rank(objects[left].object_type)
2991 .cmp(&delta_type_rank(objects[right].object_type))
2992 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
2993 .then_with(|| {
2994 object_ids[left]
2995 .as_bytes()
2996 .cmp(object_ids[right].as_bytes())
2997 })
2998 });
2999 }
3000
3001 if options.depth == 0 {
3002 return Ok((plan, order));
3003 }
3004
3005 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
3008 Vec::with_capacity(options.thin_bases.len());
3009 for (oid, object) in &options.thin_bases {
3010 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
3011 }
3012
3013 let mut depth = vec![0usize; count];
3016 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
3018 std::collections::VecDeque::new();
3019
3020 for &idx in &order {
3021 let target = &objects[idx].body;
3022 let target_type = objects[idx].object_type;
3023
3024 let mut best_delta: Option<Vec<u8>> = None;
3025 let mut best_base = PlannedBase::None;
3026
3027 for base_entry in window.iter().rev() {
3029 let base_idx = base_entry.idx;
3030 if objects[base_idx].object_type != target_type {
3031 continue;
3032 }
3033 if depth[base_idx] + 1 > options.depth {
3036 continue;
3037 }
3038 let Some(delta) = base_entry.index.delta(target) else {
3039 continue;
3040 };
3041 if !delta_is_acceptable(&delta, target.len()) {
3042 continue;
3043 }
3044 if best_delta
3045 .as_ref()
3046 .is_none_or(|current| delta.len() < current.len())
3047 {
3048 best_delta = Some(delta);
3049 best_base = PlannedBase::InPack {
3050 base_idx,
3051 delta: Vec::new(),
3052 };
3053 }
3054 }
3055
3056 for (base_oid, base_type, base_index) in
3059 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
3060 {
3061 if *base_type != target_type {
3062 continue;
3063 }
3064 let Some(delta) = base_index.delta(target) else {
3065 continue;
3066 };
3067 if !delta_is_acceptable(&delta, target.len()) {
3068 continue;
3069 }
3070 if best_delta
3071 .as_ref()
3072 .is_none_or(|current| delta.len() < current.len())
3073 {
3074 best_delta = Some(delta);
3075 best_base = PlannedBase::External {
3076 base_oid: *base_oid,
3077 delta: Vec::new(),
3078 };
3079 }
3080 }
3081
3082 if let Some(delta) = best_delta {
3083 match best_base {
3084 PlannedBase::InPack { base_idx, .. } => {
3085 depth[idx] = depth[base_idx] + 1;
3086 plan[idx].base = PlannedBase::InPack { base_idx, delta };
3087 }
3088 PlannedBase::External { base_oid, .. } => {
3089 depth[idx] = 1;
3090 plan[idx].base = PlannedBase::External { base_oid, delta };
3091 }
3092 PlannedBase::None => {}
3093 }
3094 }
3095
3096 window.push_back(DeltaWindowEntry {
3098 idx,
3099 index: DeltaIndex::new(&objects[idx].body),
3100 });
3101 while window.len() > options.window {
3102 window.pop_front();
3103 }
3104 }
3105
3106 Ok((plan, order))
3107}
3108
3109fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
3114 !delta.is_empty() && delta.len() < target_len
3115}
3116
3117fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
3118 loop {
3119 let mut byte = (value as u8) & 0x7f;
3120 value >>= 7;
3121 if value != 0 {
3122 byte |= 0x80;
3123 }
3124 out.push(byte);
3125 if value == 0 {
3126 break;
3127 }
3128 }
3129}
3130
3131fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
3132 while size != 0 {
3133 let chunk = size.min(0x10000);
3134 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
3135 let mut command = 0x80u8;
3136 let mut payload = [0u8; 7];
3137 let mut payload_len = 0usize;
3138 for idx in 0..4 {
3139 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
3140 if byte != 0 {
3141 command |= 1 << idx;
3142 payload[payload_len] = byte;
3143 payload_len += 1;
3144 }
3145 }
3146 for idx in 0..3 {
3147 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
3148 if byte != 0 {
3149 command |= 0x10 << idx;
3150 payload[payload_len] = byte;
3151 payload_len += 1;
3152 }
3153 }
3154 out.push(command);
3155 out.extend_from_slice(&payload[..payload_len]);
3156 offset += chunk;
3157 size -= chunk;
3158 }
3159}
3160
3161fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
3162 while !bytes.is_empty() {
3163 let chunk_len = bytes.len().min(0x7f);
3164 out.push(chunk_len as u8);
3165 out.extend_from_slice(&bytes[..chunk_len]);
3166 bytes = &bytes[chunk_len..];
3167 }
3168}
3169
3170fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
3171 let mut value = 0u64;
3172 let mut shift = 0u32;
3173 loop {
3174 let Some(byte) = delta.get(*cursor).copied() else {
3175 return Err(GitError::InvalidObject("truncated delta size".into()));
3176 };
3177 *cursor += 1;
3178 value = value
3179 .checked_add(
3180 u64::from(byte & 0x7f)
3181 .checked_shl(shift)
3182 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
3183 )
3184 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3185 if byte & 0x80 == 0 {
3186 return Ok(value);
3187 }
3188 shift = shift
3189 .checked_add(7)
3190 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3191 }
3192}
3193
3194fn read_delta_copy_value(
3195 delta: &[u8],
3196 cursor: &mut usize,
3197 command: u8,
3198 masks: &[u8],
3199) -> Result<u64> {
3200 let mut value = 0u64;
3201 for (shift, mask) in masks.iter().enumerate() {
3202 if command & mask != 0 {
3203 let Some(byte) = delta.get(*cursor).copied() else {
3204 return Err(GitError::InvalidObject(
3205 "truncated delta copy command".into(),
3206 ));
3207 };
3208 *cursor += 1;
3209 value |= u64::from(byte) << (shift * 8);
3210 }
3211 }
3212 Ok(value)
3213}
3214
3215thread_local! {
3216 static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
3217}
3218
3219fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
3220 DEFLATE.with(|cell| {
3221 let mut compressor = cell.borrow_mut();
3222 compressor.reset();
3223 out.reserve(zlib_compress_bound(body.len()));
3224 let status = compressor
3225 .compress_vec(body, out, FlushCompress::Finish)
3226 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
3227 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
3228 return Err(GitError::InvalidObject(
3229 "zlib compression did not finish pack entry".into(),
3230 ));
3231 }
3232 Ok(())
3233 })
3234}
3235
3236fn zlib_compress_bound(len: usize) -> usize {
3237 len.saturating_add(len >> 12)
3238 .saturating_add(len >> 14)
3239 .saturating_add(len >> 25)
3240 .saturating_add(13)
3241}
3242
3243fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
3244 let type_code = match object_type {
3245 ObjectType::Commit => 1,
3246 ObjectType::Tree => 2,
3247 ObjectType::Blob => 3,
3248 ObjectType::Tag => 4,
3249 };
3250 write_pack_entry_header_kind(out, type_code, size);
3251}
3252
3253fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
3254 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
3255 size >>= 4;
3256 if size != 0 {
3257 byte |= 0x80;
3258 }
3259 out.push(byte);
3260 while size != 0 {
3261 let mut byte = (size as u8) & 0x7f;
3262 size >>= 7;
3263 if size != 0 {
3264 byte |= 0x80;
3265 }
3266 out.push(byte);
3267 }
3268}
3269
3270fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
3271 if relative == 0 {
3272 return Err(GitError::InvalidFormat(
3273 "ofs-delta relative offset cannot be zero".into(),
3274 ));
3275 }
3276 let mut value = relative;
3277 let mut bytes = vec![(value & 0x7f) as u8];
3278 value >>= 7;
3279 while value != 0 {
3280 value -= 1;
3281 bytes.push(((value & 0x7f) as u8) | 0x80);
3282 value >>= 7;
3283 }
3284 bytes.reverse();
3285 out.extend_from_slice(&bytes);
3286 Ok(())
3287}
3288
3289fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
3290 let Some(byte) = bytes.get(*offset).copied() else {
3291 return Err(GitError::InvalidFormat(
3292 "truncated pack entry header".into(),
3293 ));
3294 };
3295 *offset += 1;
3296 Ok(byte)
3297}
3298
3299fn u16_be(bytes: &[u8]) -> u16 {
3300 u16::from_be_bytes([bytes[0], bytes[1]])
3301}
3302
3303fn u32_be(bytes: &[u8]) -> u32 {
3304 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
3305}
3306
3307fn u64_be(bytes: &[u8]) -> u64 {
3308 u64::from_be_bytes([
3309 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
3310 ])
3311}
3312
3313fn checked_range(
3314 start: usize,
3315 count: usize,
3316 width: usize,
3317 total: usize,
3318) -> Result<std::ops::Range<usize>> {
3319 let len = count
3320 .checked_mul(width)
3321 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
3322 let end = start
3323 .checked_add(len)
3324 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
3325 if end > total {
3326 return Err(GitError::InvalidFormat("truncated pack index table".into()));
3327 }
3328 Ok(start..end)
3329}
3330
3331fn validate_position_permutation(positions: &[u32]) -> Result<()> {
3332 let mut seen = vec![false; positions.len()];
3333 for position in positions {
3334 let idx = *position as usize;
3335 if idx >= positions.len() {
3336 return Err(GitError::InvalidFormat(
3337 "reverse index position points past object table".into(),
3338 ));
3339 }
3340 if seen[idx] {
3341 return Err(GitError::InvalidFormat(
3342 "reverse index position is duplicated".into(),
3343 ));
3344 }
3345 seen[idx] = true;
3346 }
3347 Ok(())
3348}
3349
3350fn parse_midx_pack_names(
3351 bytes: &[u8],
3352 chunks: &[MultiPackIndexChunk],
3353 pack_count: usize,
3354 version: u8,
3355) -> Result<Vec<String>> {
3356 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
3357 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
3358 let mut names = Vec::with_capacity(pack_count);
3359 let mut offset = 0usize;
3360 while names.len() < pack_count {
3361 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
3362 return Err(GitError::InvalidFormat(
3363 "multi-pack-index PNAM entry is unterminated".into(),
3364 ));
3365 };
3366 let name_bytes = &data[offset..offset + relative_end];
3367 if name_bytes.is_empty() {
3368 return Err(GitError::InvalidFormat(
3369 "multi-pack-index PNAM entry is empty".into(),
3370 ));
3371 }
3372 let name = std::str::from_utf8(name_bytes)
3373 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
3374 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
3375 return Err(GitError::InvalidFormat(
3376 "multi-pack-index PNAM entry contains a path separator".into(),
3377 ));
3378 }
3379 names.push(name.to_string());
3380 offset += relative_end + 1;
3381 }
3382 let padding = &data[offset..];
3383 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
3384 return Err(GitError::InvalidFormat(
3385 "multi-pack-index PNAM padding is invalid".into(),
3386 ));
3387 }
3388 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
3389 return Err(GitError::InvalidFormat(
3390 "multi-pack-index v1 PNAM entries are not sorted".into(),
3391 ));
3392 }
3393 Ok(names)
3394}
3395
3396fn parse_midx_oid_fanout(
3397 bytes: &[u8],
3398 chunks: &[MultiPackIndexChunk],
3399) -> Result<([u32; 256], usize)> {
3400 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
3401 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
3402 if data.len() != 256 * 4 {
3403 return Err(GitError::InvalidFormat(
3404 "multi-pack-index OIDF chunk has invalid length".into(),
3405 ));
3406 }
3407 let mut fanout = [0u32; 256];
3408 let mut previous = 0u32;
3409 for (idx, slot) in fanout.iter_mut().enumerate() {
3410 let start = idx * 4;
3411 *slot = u32_be(&data[start..start + 4]);
3412 if *slot < previous {
3413 return Err(GitError::InvalidFormat(
3414 "multi-pack-index OIDF fanout is not monotonic".into(),
3415 ));
3416 }
3417 previous = *slot;
3418 }
3419 Ok((fanout, fanout[255] as usize))
3420}
3421
3422fn parse_midx_object_ids(
3423 bytes: &[u8],
3424 chunks: &[MultiPackIndexChunk],
3425 format: ObjectFormat,
3426 object_count: usize,
3427 fanout: &[u32; 256],
3428) -> Result<Vec<ObjectId>> {
3429 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
3430 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
3431 let expected_len = object_count
3432 .checked_mul(format.raw_len())
3433 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
3434 if data.len() != expected_len {
3435 return Err(GitError::InvalidFormat(
3436 "multi-pack-index OIDL chunk has invalid length".into(),
3437 ));
3438 }
3439
3440 let mut ids = Vec::with_capacity(object_count);
3441 let mut counts = [0u32; 256];
3442 let mut previous_oid: Option<ObjectId> = None;
3443 for idx in 0..object_count {
3444 let start = idx * format.raw_len();
3445 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
3446 if let Some(previous) = &previous_oid
3447 && previous.as_bytes() >= oid.as_bytes()
3448 {
3449 return Err(GitError::InvalidFormat(
3450 "multi-pack-index OIDL object ids are not strictly sorted".into(),
3451 ));
3452 }
3453 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
3454 .checked_add(1)
3455 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3456 previous_oid = Some(oid);
3457 ids.push(oid);
3458 }
3459
3460 let mut running = 0u32;
3461 for (idx, count) in counts.iter().enumerate() {
3462 running = running
3463 .checked_add(*count)
3464 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3465 if fanout[idx] != running {
3466 return Err(GitError::InvalidFormat(
3467 "multi-pack-index OIDF fanout does not match OIDL".into(),
3468 ));
3469 }
3470 }
3471 Ok(ids)
3472}
3473
3474fn parse_midx_object_offsets(
3475 bytes: &[u8],
3476 chunks: &[MultiPackIndexChunk],
3477 object_ids: Vec<ObjectId>,
3478 pack_count: u32,
3479) -> Result<Vec<MultiPackIndexEntry>> {
3480 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
3481 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
3482 let expected_len = object_ids
3483 .len()
3484 .checked_mul(8)
3485 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
3486 if data.len() != expected_len {
3487 return Err(GitError::InvalidFormat(
3488 "multi-pack-index OOFF chunk has invalid length".into(),
3489 ));
3490 }
3491 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
3492 if let Some(large_offsets) = large_offsets
3493 && large_offsets.len() % 8 != 0
3494 {
3495 return Err(GitError::InvalidFormat(
3496 "multi-pack-index LOFF chunk has invalid length".into(),
3497 ));
3498 }
3499
3500 let mut entries = Vec::with_capacity(object_ids.len());
3501 for (idx, oid) in object_ids.into_iter().enumerate() {
3502 let start = idx * 8;
3503 let pack_int_id = u32_be(&data[start..start + 4]);
3504 if pack_int_id >= pack_count {
3505 return Err(GitError::InvalidFormat(
3506 "multi-pack-index object points past pack table".into(),
3507 ));
3508 }
3509 let raw_offset = u32_be(&data[start + 4..start + 8]);
3510 let offset = if raw_offset & 0x8000_0000 == 0 {
3511 u64::from(raw_offset)
3512 } else {
3513 let Some(large_offsets) = large_offsets else {
3514 return Err(GitError::InvalidFormat(
3515 "multi-pack-index large offset missing LOFF chunk".into(),
3516 ));
3517 };
3518 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
3519 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
3520 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3521 })?;
3522 let large_end = large_start.checked_add(8).ok_or_else(|| {
3523 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3524 })?;
3525 if large_end > large_offsets.len() {
3526 return Err(GitError::InvalidFormat(
3527 "multi-pack-index large offset points past LOFF chunk".into(),
3528 ));
3529 }
3530 u64_be(&large_offsets[large_start..large_end])
3531 };
3532 entries.push(MultiPackIndexEntry {
3533 oid,
3534 pack_int_id,
3535 offset,
3536 });
3537 }
3538 Ok(entries)
3539}
3540
3541fn parse_midx_reverse_index(
3542 bytes: &[u8],
3543 chunks: &[MultiPackIndexChunk],
3544 object_count: usize,
3545) -> Result<Option<Vec<u32>>> {
3546 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
3547 return Ok(None);
3548 };
3549 let expected_len = object_count
3550 .checked_mul(4)
3551 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
3552 if data.len() != expected_len {
3553 return Err(GitError::InvalidFormat(
3554 "multi-pack-index RIDX chunk has invalid length".into(),
3555 ));
3556 }
3557 let mut positions = Vec::with_capacity(object_count);
3558 for idx in 0..object_count {
3559 let start = idx * 4;
3560 positions.push(u32_be(&data[start..start + 4]));
3561 }
3562 validate_position_permutation(&positions)?;
3563 Ok(Some(positions))
3564}
3565
3566fn parse_midx_bitmapped_packs(
3567 bytes: &[u8],
3568 chunks: &[MultiPackIndexChunk],
3569 pack_count: usize,
3570 object_count: usize,
3571) -> Result<Option<Vec<MultiPackBitmapPack>>> {
3572 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
3573 return Ok(None);
3574 };
3575 let expected_len = pack_count
3576 .checked_mul(8)
3577 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
3578 if data.len() != expected_len {
3579 return Err(GitError::InvalidFormat(
3580 "multi-pack-index BTMP chunk has invalid length".into(),
3581 ));
3582 }
3583 let mut entries = Vec::with_capacity(pack_count);
3584 for idx in 0..pack_count {
3585 let start = idx * 8;
3586 let bitmap_pos = u32_be(&data[start..start + 4]);
3587 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
3588 let bitmap_end = u64::from(bitmap_pos)
3589 .checked_add(u64::from(bitmap_nr))
3590 .ok_or_else(|| {
3591 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
3592 })?;
3593 if bitmap_end > object_count as u64 {
3594 return Err(GitError::InvalidFormat(
3595 "multi-pack-index BTMP range points past object table".into(),
3596 ));
3597 }
3598 entries.push(MultiPackBitmapPack {
3599 bitmap_pos,
3600 bitmap_nr,
3601 });
3602 }
3603 Ok(Some(entries))
3604}
3605
3606fn midx_chunk_data<'a>(
3607 bytes: &'a [u8],
3608 chunks: &[MultiPackIndexChunk],
3609 id: [u8; 4],
3610 required: bool,
3611) -> Result<Option<&'a [u8]>> {
3612 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
3613 if required {
3614 return Err(GitError::InvalidFormat(format!(
3615 "multi-pack-index missing {} chunk",
3616 std::str::from_utf8(&id).unwrap_or("required")
3617 )));
3618 }
3619 return Ok(None);
3620 };
3621 let start = usize::try_from(chunk.offset)
3622 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
3623 let len = usize::try_from(chunk.len)
3624 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
3625 let end = start
3626 .checked_add(len)
3627 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
3628 let Some(data) = bytes.get(start..end) else {
3629 return Err(GitError::InvalidFormat(
3630 "multi-pack-index chunk extends past file".into(),
3631 ));
3632 };
3633 Ok(Some(data))
3634}
3635
3636fn hash_function_id(format: ObjectFormat) -> u32 {
3637 match format {
3638 ObjectFormat::Sha1 => 1,
3639 ObjectFormat::Sha256 => 2,
3640 }
3641}
3642
3643const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
3646
3647const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
3650
3651const EWAH_ALL_ONES: u64 = u64::MAX;
3653
3654impl EwahBitmap {
3655 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
3669 let required_words = bit_size.div_ceil(64) as usize;
3670 if required_words > words.len() {
3671 return Err(GitError::InvalidFormat(format!(
3672 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
3673 words.len()
3674 )));
3675 }
3676 let significant = &words[..required_words];
3679 let mut builder = EwahBuilder::new(bit_size);
3680 for &word in significant {
3681 if word == 0 {
3682 builder.add_empty_words(false, 1);
3683 } else if word == EWAH_ALL_ONES {
3684 builder.add_empty_words(true, 1);
3685 } else {
3686 builder.add_literal(word);
3687 }
3688 }
3689 builder.finish()
3690 }
3691
3692 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
3698 let word_count = bit_size.div_ceil(64) as usize;
3699 let mut words = vec![0u64; word_count];
3700 for &position in positions {
3701 if position >= bit_size {
3702 return Err(GitError::InvalidFormat(format!(
3703 "EWAH bit position {position} out of range for bit_size {bit_size}"
3704 )));
3705 }
3706 let word_index = (position / 64) as usize;
3707 let bit_index = position % 64;
3708 words[word_index] |= 1u64 << bit_index;
3709 }
3710 Self::from_words(bit_size, &words)
3711 }
3712
3713 pub fn empty() -> Self {
3716 Self {
3717 bit_size: 0,
3718 words: Vec::new(),
3719 rlw_position: 0,
3720 }
3721 }
3722
3723 pub fn to_words(&self) -> Result<Vec<u64>> {
3729 let mut out = Vec::new();
3730 let mut word_idx = 0usize;
3731 while word_idx < self.words.len() {
3732 let rlw = self.words[word_idx];
3733 let run_bit = rlw & 1;
3734 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
3735 let literal_words = (rlw >> 33) as usize;
3736 word_idx += 1;
3737 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
3738 for _ in 0..run_words {
3739 out.push(fill);
3740 }
3741 let literal_end = word_idx
3742 .checked_add(literal_words)
3743 .filter(|end| *end <= self.words.len())
3744 .ok_or_else(|| {
3745 GitError::InvalidFormat("EWAH literal words extend past word table".into())
3746 })?;
3747 out.extend_from_slice(&self.words[word_idx..literal_end]);
3748 word_idx = literal_end;
3749 }
3750 let required_words = (self.bit_size as usize).div_ceil(64);
3751 if out.len() < required_words {
3752 out.resize(required_words, 0);
3753 }
3754 out.truncate(required_words);
3755 Ok(out)
3756 }
3757
3758 pub fn to_positions(&self) -> Result<Vec<u32>> {
3760 let words = self.to_words()?;
3761 let mut positions = Vec::new();
3762 for (word_index, word) in words.iter().enumerate() {
3763 let mut remaining = *word;
3764 while remaining != 0 {
3765 let bit = remaining.trailing_zeros();
3766 let position = (word_index as u64) * 64 + u64::from(bit);
3767 if position < u64::from(self.bit_size) {
3768 positions.push(position as u32);
3770 }
3771 remaining &= remaining - 1;
3772 }
3773 }
3774 Ok(positions)
3775 }
3776
3777 pub fn to_bytes(&self) -> Vec<u8> {
3781 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
3782 self.append_bytes(&mut out);
3783 out
3784 }
3785
3786 fn append_bytes(&self, out: &mut Vec<u8>) {
3787 out.extend_from_slice(&self.bit_size.to_be_bytes());
3788 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
3789 for word in &self.words {
3790 out.extend_from_slice(&word.to_be_bytes());
3791 }
3792 out.extend_from_slice(&self.rlw_position.to_be_bytes());
3793 }
3794}
3795
3796struct EwahBuilder {
3804 bit_size: u32,
3805 words: Vec<u64>,
3806 rlw_position: usize,
3807}
3808
3809impl EwahBuilder {
3810 fn new(bit_size: u32) -> Self {
3811 Self {
3813 bit_size,
3814 words: vec![0u64],
3815 rlw_position: 0,
3816 }
3817 }
3818
3819 fn rlw(&self) -> u64 {
3820 self.words[self.rlw_position]
3821 }
3822
3823 fn set_rlw(&mut self, value: u64) {
3824 self.words[self.rlw_position] = value;
3825 }
3826
3827 fn rlw_running_len(&self) -> u64 {
3828 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
3829 }
3830
3831 fn rlw_running_bit(&self) -> bool {
3832 self.rlw() & 1 == 1
3833 }
3834
3835 fn rlw_literal_len(&self) -> u64 {
3836 self.rlw() >> 33
3837 }
3838
3839 fn set_running_bit(&mut self, bit: bool) {
3840 let mut value = self.rlw();
3841 value &= !1;
3842 value |= u64::from(bit);
3843 self.set_rlw(value);
3844 }
3845
3846 fn set_running_len(&mut self, len: u64) {
3847 let mut value = self.rlw();
3848 value &= !(EWAH_MAX_RUNNING_LEN << 1);
3849 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
3850 self.set_rlw(value);
3851 }
3852
3853 fn set_literal_len(&mut self, len: u64) {
3854 let mut value = self.rlw();
3855 value &= (1u64 << 33) - 1;
3856 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
3857 self.set_rlw(value);
3858 }
3859
3860 fn push_rlw(&mut self) {
3862 self.rlw_position = self.words.len();
3863 self.words.push(0);
3864 }
3865
3866 fn add_empty_words(&mut self, value: bool, mut number: u64) {
3874 while number > 0 {
3875 let can_extend = self.rlw_literal_len() == 0
3879 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
3880 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
3881 if !can_extend {
3882 self.push_rlw();
3883 }
3884 if self.rlw_running_len() == 0 {
3885 self.set_running_bit(value);
3886 }
3887 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
3888 let take = available.min(number);
3889 self.set_running_len(self.rlw_running_len() + take);
3890 number -= take;
3891 }
3892 }
3893
3894 fn add_literal(&mut self, word: u64) {
3897 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
3898 self.push_rlw();
3899 }
3900 let literal_len = self.rlw_literal_len();
3901 self.set_literal_len(literal_len + 1);
3902 self.words.push(word);
3903 }
3904
3905 fn finish(self) -> Result<EwahBitmap> {
3906 let rlw_position = u32::try_from(self.rlw_position)
3907 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
3908 if self.words.len() > u32::MAX as usize {
3909 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
3910 }
3911 Ok(EwahBitmap {
3912 bit_size: self.bit_size,
3913 words: self.words,
3914 rlw_position,
3915 })
3916 }
3917}
3918
3919#[derive(Debug, Clone)]
3932pub struct PackBitmapWriter {
3933 format: ObjectFormat,
3934 pack_checksum: ObjectId,
3935 object_count: u32,
3936 commit_positions: Vec<u32>,
3937 tree_positions: Vec<u32>,
3938 blob_positions: Vec<u32>,
3939 tag_positions: Vec<u32>,
3940 name_hash_cache: Option<Vec<u32>>,
3941 selected: Vec<SelectedCommit>,
3942}
3943
3944#[derive(Debug, Clone)]
3945struct SelectedCommit {
3946 commit_index_position: u32,
3950 flags: u8,
3951 reachable: Vec<u32>,
3952}
3953
3954impl PackBitmapWriter {
3955 pub const FLAG_NONE: u8 = 0;
3959
3960 pub fn new(
3967 format: ObjectFormat,
3968 pack_checksum: ObjectId,
3969 object_types: &[ObjectType],
3970 ) -> Result<Self> {
3971 if object_types.len() > u32::MAX as usize {
3972 return Err(GitError::InvalidFormat(
3973 "too many objects for a pack bitmap".into(),
3974 ));
3975 }
3976 if pack_checksum.format() != format {
3977 return Err(GitError::InvalidObjectId(
3978 "pack checksum format does not match bitmap format".into(),
3979 ));
3980 }
3981 let object_count = object_types.len() as u32;
3982 let mut commit_positions = Vec::new();
3983 let mut tree_positions = Vec::new();
3984 let mut blob_positions = Vec::new();
3985 let mut tag_positions = Vec::new();
3986 for (index, object_type) in object_types.iter().enumerate() {
3987 let position = index as u32;
3988 match object_type {
3989 ObjectType::Commit => commit_positions.push(position),
3990 ObjectType::Tree => tree_positions.push(position),
3991 ObjectType::Blob => blob_positions.push(position),
3992 ObjectType::Tag => tag_positions.push(position),
3993 }
3994 }
3995 Ok(Self {
3996 format,
3997 pack_checksum,
3998 object_count,
3999 commit_positions,
4000 tree_positions,
4001 blob_positions,
4002 tag_positions,
4003 name_hash_cache: None,
4004 selected: Vec::new(),
4005 })
4006 }
4007
4008 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
4014 if cache.len() != self.object_count as usize {
4015 return Err(GitError::InvalidFormat(format!(
4016 "name hash cache has {} entries but pack has {} objects",
4017 cache.len(),
4018 self.object_count
4019 )));
4020 }
4021 self.name_hash_cache = Some(cache);
4022 Ok(self)
4023 }
4024
4025 pub fn add_commit(
4037 &mut self,
4038 commit_position: u32,
4039 commit_index_position: u32,
4040 reachable: &[u32],
4041 ) -> Result<()> {
4042 if commit_position >= self.object_count {
4043 return Err(GitError::InvalidFormat(format!(
4044 "commit position {commit_position} out of range for {} objects",
4045 self.object_count
4046 )));
4047 }
4048 if commit_index_position >= self.object_count {
4049 return Err(GitError::InvalidFormat(format!(
4050 "commit index position {commit_index_position} out of range for {} objects",
4051 self.object_count
4052 )));
4053 }
4054 if !self.commit_positions.contains(&commit_position) {
4055 return Err(GitError::InvalidFormat(format!(
4056 "bitmap commit position {commit_position} is not a commit object"
4057 )));
4058 }
4059 for &position in reachable {
4060 if position >= self.object_count {
4061 return Err(GitError::InvalidFormat(format!(
4062 "reachable position {position} out of range for {} objects",
4063 self.object_count
4064 )));
4065 }
4066 }
4067 let mut reachable = reachable.to_vec();
4068 reachable.push(commit_position);
4069 self.selected.push(SelectedCommit {
4070 commit_index_position,
4071 flags: Self::FLAG_NONE,
4072 reachable,
4073 });
4074 Ok(())
4075 }
4076
4077 pub fn build(&self) -> Result<PackBitmapIndex> {
4084 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
4085 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
4086 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
4087 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
4088
4089 let mut entries = Vec::with_capacity(self.selected.len());
4090 for selected in &self.selected {
4091 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
4092 entries.push(PackBitmapEntry {
4093 object_position: selected.commit_index_position,
4094 xor_offset: 0,
4095 flags: selected.flags,
4096 bitmap,
4097 });
4098 }
4099
4100 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
4101 if self.name_hash_cache.is_some() {
4102 options |= PackBitmapIndex::OPTION_HASH_CACHE;
4103 }
4104
4105 let placeholder_checksum = ObjectId::null(self.format);
4110 Ok(PackBitmapIndex {
4111 version: 1,
4112 format: self.format,
4113 options,
4114 pack_checksum: self.pack_checksum.clone(),
4115 index_checksum: placeholder_checksum,
4116 type_bitmaps: PackBitmapTypeBitmaps {
4117 commits,
4118 trees,
4119 blobs,
4120 tags,
4121 },
4122 entries,
4123 name_hash_cache: self.name_hash_cache.clone(),
4124 })
4125 }
4126
4127 pub fn write(&self) -> Result<Vec<u8>> {
4130 self.build()?.write()
4131 }
4132}
4133
4134impl PackBitmapIndex {
4135 pub fn write(&self) -> Result<Vec<u8>> {
4149 if self.version != 1 {
4150 return Err(GitError::Unsupported(format!(
4151 "bitmap index version {}",
4152 self.version
4153 )));
4154 }
4155 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
4156 if self.options & !known_options != 0 {
4157 return Err(GitError::Unsupported(format!(
4158 "bitmap index options {:#06x}",
4159 self.options & !known_options
4160 )));
4161 }
4162 if self.pack_checksum.format() != self.format {
4163 return Err(GitError::InvalidObjectId(
4164 "bitmap pack checksum format does not match index format".into(),
4165 ));
4166 }
4167 if self.entries.len() > u32::MAX as usize {
4168 return Err(GitError::InvalidFormat(
4169 "too many bitmap index entries".into(),
4170 ));
4171 }
4172 let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
4173 match (&self.name_hash_cache, want_cache) {
4174 (Some(_), false) => {
4175 return Err(GitError::InvalidFormat(
4176 "name hash cache present without OPTION_HASH_CACHE".into(),
4177 ));
4178 }
4179 (None, true) => {
4180 return Err(GitError::InvalidFormat(
4181 "OPTION_HASH_CACHE set without a name hash cache".into(),
4182 ));
4183 }
4184 _ => {}
4185 }
4186
4187 let mut out = Vec::new();
4188 out.extend_from_slice(b"BITM");
4189 out.extend_from_slice(&self.version.to_be_bytes());
4190 out.extend_from_slice(&self.options.to_be_bytes());
4191 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
4192 out.extend_from_slice(self.pack_checksum.as_bytes());
4193
4194 self.type_bitmaps.commits.append_bytes(&mut out);
4195 self.type_bitmaps.trees.append_bytes(&mut out);
4196 self.type_bitmaps.blobs.append_bytes(&mut out);
4197 self.type_bitmaps.tags.append_bytes(&mut out);
4198
4199 for (idx, entry) in self.entries.iter().enumerate() {
4200 if entry.xor_offset as usize > idx {
4201 return Err(GitError::InvalidFormat(
4202 "bitmap index entry has invalid XOR offset".into(),
4203 ));
4204 }
4205 out.extend_from_slice(&entry.object_position.to_be_bytes());
4206 out.push(entry.xor_offset);
4207 out.push(entry.flags);
4208 entry.bitmap.append_bytes(&mut out);
4209 }
4210
4211 if let Some(cache) = &self.name_hash_cache {
4212 for value in cache {
4213 out.extend_from_slice(&value.to_be_bytes());
4214 }
4215 }
4216
4217 let checksum = sley_core::digest_bytes(self.format, &out)?;
4218 out.extend_from_slice(checksum.as_bytes());
4219 Ok(out)
4220 }
4221}
4222
4223pub fn write_bitmap(
4232 format: ObjectFormat,
4233 pack_checksum: ObjectId,
4234 object_types: &[ObjectType],
4235 commits: &[(u32, u32, Vec<u32>)],
4236 name_hash_cache: Option<Vec<u32>>,
4237) -> Result<Vec<u8>> {
4238 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
4239 if let Some(cache) = name_hash_cache {
4240 writer = writer.with_name_hash_cache(cache)?;
4241 }
4242 for (commit_position, commit_index_position, reachable) in commits {
4243 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
4244 }
4245 writer.write()
4246}
4247
4248#[cfg(test)]
4249mod tests {
4250 use super::*;
4251 use flate2::Compression;
4252 use flate2::read::ZlibDecoder;
4253 use flate2::write::ZlibEncoder;
4254 use std::fs;
4255 use std::io::Read;
4256 use std::io::Write;
4257 use std::path::{Path, PathBuf};
4258 use std::process::Command;
4259 use std::time::{SystemTime, UNIX_EPOCH};
4260
4261 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
4262 PackWriteOptions::new()
4263 .with_prefer_ofs_delta(prefer_ofs_delta)
4264 .with_reorder(false)
4265 }
4266
4267 #[test]
4268 fn parses_single_blob_pack() {
4269 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
4270 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
4271 assert_eq!(parsed.version, 2);
4272 assert_eq!(parsed.entries.len(), 1);
4273 let object = &parsed.entries[0].object;
4274 assert_eq!(object.object_type, ObjectType::Blob);
4275 assert_eq!(object.body, b"hello\n");
4276 assert_eq!(
4277 parsed.entries[0].entry.oid.to_hex(),
4278 "ce013625030ba8dba906f756967f9e9ca394464a"
4279 );
4280 }
4281
4282 #[test]
4283 fn parses_single_blob_pack_sha256() {
4284 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
4285 let parsed =
4286 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
4287 assert_eq!(parsed.version, 2);
4288 assert_eq!(parsed.entries.len(), 1);
4289 let object = &parsed.entries[0].object;
4290 assert_eq!(object.object_type, ObjectType::Blob);
4291 assert_eq!(object.body, b"hello\n");
4292 assert_eq!(
4293 parsed.entries[0].entry.oid,
4294 object
4295 .object_id(ObjectFormat::Sha256)
4296 .expect("test operation should succeed")
4297 );
4298 }
4299
4300 #[test]
4301 fn parses_bundle_pack_payload_with_bundle_format() {
4302 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
4303 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
4304 .expect("test operation should succeed");
4305 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
4306 .into_bytes()
4307 .into_iter()
4308 .chain(pack)
4309 .collect::<Vec<_>>();
4310 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
4311 .expect("test operation should succeed");
4312
4313 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
4314 assert_eq!(parsed.entries.len(), 1);
4315 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
4316 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
4317 }
4318
4319 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
4325 let mut pack = Vec::new();
4326 pack.extend_from_slice(b"PACK");
4327 pack.extend_from_slice(&2u32.to_be_bytes());
4328 pack.extend_from_slice(&1u32.to_be_bytes());
4329 write_pack_entry_header_kind(&mut pack, 3, declared_size);
4331 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4332 encoder
4333 .write_all(real_body)
4334 .expect("test operation should succeed");
4335 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
4336 let checksum =
4337 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
4338 pack.extend_from_slice(checksum.as_bytes());
4339 pack
4340 }
4341
4342 #[test]
4355 fn rejects_decompression_bomb_header_without_oom() {
4356 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
4357 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
4358 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
4359 let result = handle.join();
4360 assert!(
4362 result.is_ok(),
4363 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
4364 );
4365 let parse_result = result.expect("parse thread should not panic on a bomb header");
4367 assert!(
4368 parse_result.is_err(),
4369 "bomb header (declared={declared}) should be rejected as invalid"
4370 );
4371 }
4372 }
4373
4374 fn lying_result_size_delta_pack(
4381 format: ObjectFormat,
4382 declared_result_size: u64,
4383 delta_kind: DeltaKind,
4384 ) -> Vec<u8> {
4385 let base = b"hello";
4386 let result = b"hello world"; let mut delta = Vec::new();
4390 write_delta_varint(&mut delta, base.len() as u64);
4391 write_delta_varint(&mut delta, declared_result_size);
4392 let suffix = &result[base.len()..];
4394 delta.push(0x90); delta.push(base.len() as u8);
4396 delta.push(suffix.len() as u8);
4397 delta.extend_from_slice(suffix);
4398
4399 let mut pack = Vec::new();
4400 pack.extend_from_slice(b"PACK");
4401 pack.extend_from_slice(&2u32.to_be_bytes());
4402 pack.extend_from_slice(&2u32.to_be_bytes());
4403
4404 let base_offset = pack.len();
4405 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
4406 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4407 encoder
4408 .write_all(base)
4409 .expect("test operation should succeed");
4410 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
4411
4412 let delta_offset = pack.len();
4413 write_pack_entry_header_kind(
4414 &mut pack,
4415 match delta_kind {
4416 DeltaKind::Offset => 6,
4417 DeltaKind::Ref => 7,
4418 },
4419 delta.len() as u64,
4420 );
4421 match delta_kind {
4422 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
4423 DeltaKind::Ref => {
4424 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
4425 .expect("test operation should succeed");
4426 pack.extend_from_slice(base_oid.as_bytes());
4427 }
4428 }
4429 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4430 encoder
4431 .write_all(&delta)
4432 .expect("test operation should succeed");
4433 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
4434
4435 let checksum =
4436 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
4437 pack.extend_from_slice(checksum.as_bytes());
4438 pack
4439 }
4440
4441 #[test]
4451 fn rejects_delta_result_size_bomb_without_oom() {
4452 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
4453 for &declared in bombs {
4454 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
4455 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
4456 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
4457 let join_result = handle.join();
4458 assert!(
4459 join_result.is_ok(),
4460 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
4461 instead of erroring cleanly"
4462 );
4463 let parse_result =
4464 join_result.expect("parse thread should not panic on a delta bomb");
4465 assert!(
4466 parse_result.is_err(),
4467 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
4468 as invalid (result.len() != declared)"
4469 );
4470 }
4471 }
4472 }
4473
4474 #[test]
4478 fn applies_legitimate_delta_after_result_size_bound() {
4479 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
4480 let base = b"hello";
4481 let result = b"hello world";
4482 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
4483 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
4484 assert_eq!(parsed.entries.len(), 2);
4485 assert_eq!(parsed.entries[0].object.body, base);
4486 assert_eq!(parsed.entries[1].object.body, result);
4487 }
4488 }
4489
4490 #[test]
4491 fn bounded_inflate_reserve_caps_attacker_declared_size() {
4492 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
4494 assert_eq!(
4496 bounded_inflate_reserve(usize::MAX, usize::MAX),
4497 MAX_INFLATE_RESERVE
4498 );
4499 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
4503 assert_eq!(bounded_inflate_reserve(0, 0), 64);
4505 }
4506
4507 #[test]
4508 fn rejects_bundle_pack_payload_with_wrong_object_format() {
4509 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
4510 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
4511 .expect("test operation should succeed");
4512 let bundle_bytes =
4513 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
4514 .into_bytes()
4515 .into_iter()
4516 .chain(pack)
4517 .collect::<Vec<_>>();
4518 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
4519 .expect("test operation should succeed");
4520
4521 assert!(PackFile::parse_bundle(&bundle).is_err());
4522 }
4523
4524 #[test]
4525 fn writes_pack_and_index_that_round_trip() {
4526 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4527 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4528 .expect("test operation should succeed");
4529 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4530 let index =
4531 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
4532 let oid = object
4533 .object_id(ObjectFormat::Sha1)
4534 .expect("test operation should succeed");
4535 assert_eq!(pack.entries[0].object, object);
4536 assert_eq!(index.pack_checksum, pack.checksum);
4537 assert_eq!(
4538 index
4539 .find(&oid)
4540 .expect("test operation should succeed")
4541 .offset,
4542 12
4543 );
4544 }
4545
4546 #[test]
4547 fn writes_sha256_pack_and_index_that_round_trip() {
4548 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
4549 let written =
4550 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4551 .expect("test operation should succeed");
4552 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
4553 .expect("test operation should succeed");
4554 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
4555 .expect("test operation should succeed");
4556 let oid = object
4557 .object_id(ObjectFormat::Sha256)
4558 .expect("test operation should succeed");
4559 assert_eq!(pack.entries[0].object, object);
4560 assert_eq!(index.pack_checksum, pack.checksum);
4561 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
4562 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
4563 assert_eq!(
4564 index
4565 .find(&oid)
4566 .expect("test operation should succeed")
4567 .offset,
4568 12
4569 );
4570 }
4571
4572 #[test]
4573 fn indexes_existing_sha256_pack_bytes() {
4574 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
4575 let written =
4576 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4577 .expect("test operation should succeed");
4578
4579 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
4580 .expect("test operation should succeed");
4581 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
4582 .expect("test operation should succeed");
4583
4584 assert_eq!(indexed.pack_checksum, written.checksum);
4585 assert_eq!(indexed.entries, written.entries);
4586 assert_eq!(index.pack_checksum, written.checksum);
4587 assert_eq!(index.entries, written.entries);
4588 }
4589
4590 #[test]
4591 fn indexes_existing_delta_pack_bytes() {
4592 let (base, changed) = similar_blob_objects();
4593 let options = delta_pack_options(true);
4594 let written = PackFile::write_packed_with_options(
4595 &[base, changed.clone()],
4596 ObjectFormat::Sha1,
4597 &options,
4598 )
4599 .expect("test operation should succeed");
4600
4601 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
4602 .expect("test operation should succeed");
4603 let index =
4604 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
4605 let changed_oid = changed
4606 .object_id(ObjectFormat::Sha1)
4607 .expect("test operation should succeed");
4608
4609 assert_eq!(indexed.pack_checksum, written.checksum);
4610 assert_eq!(indexed.entries, written.entries);
4611 assert_eq!(
4612 index
4613 .find(&changed_oid)
4614 .expect("test operation should succeed")
4615 .offset,
4616 written.entries[1].offset
4617 );
4618 assert_eq!(
4619 index
4620 .find(&changed_oid)
4621 .expect("test operation should succeed")
4622 .crc32,
4623 written.entries[1].crc32
4624 );
4625 }
4626
4627 #[test]
4628 fn writes_ref_delta_pack_and_index_that_round_trip() {
4629 let (base, changed) = similar_blob_objects();
4630 let options = delta_pack_options(false);
4631 let written = PackFile::write_packed_with_options(
4632 &[base.clone(), changed.clone()],
4633 ObjectFormat::Sha1,
4634 &options,
4635 )
4636 .expect("test operation should succeed");
4637 let mut second_offset = written.entries[1].offset as usize;
4638 let header = parse_entry_header(&written.pack, &mut second_offset)
4639 .expect("test operation should succeed");
4640 assert_eq!(header.kind, PackObjectKind::RefDelta);
4641
4642 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4643 let index =
4644 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
4645 let oid = changed
4646 .object_id(ObjectFormat::Sha1)
4647 .expect("test operation should succeed");
4648 assert_eq!(pack.entries[0].object, base);
4649 assert_eq!(pack.entries[1].object, changed);
4650 assert_eq!(index.pack_checksum, pack.checksum);
4651 assert_eq!(
4652 index
4653 .find(&oid)
4654 .expect("test operation should succeed")
4655 .offset,
4656 written.entries[1].offset
4657 );
4658 }
4659
4660 #[test]
4661 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
4662 let (base, changed) = similar_blob_objects();
4663 let options = delta_pack_options(true);
4664 let written = PackFile::write_packed_with_options(
4665 &[base, changed.clone()],
4666 ObjectFormat::Sha1,
4667 &options,
4668 )
4669 .expect("test operation should succeed");
4670 let mut second = written.entries[1].offset as usize;
4672 assert_eq!(
4673 parse_entry_header(&written.pack, &mut second)
4674 .expect("test operation should succeed")
4675 .kind,
4676 PackObjectKind::OfsDelta
4677 );
4678 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4680 for po in &parsed.entries {
4681 let got =
4682 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
4683 Ok(None)
4684 })
4685 .expect("test operation should succeed");
4686 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
4687 }
4688 }
4689
4690 #[derive(Default)]
4693 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
4694
4695 impl HeaderTypeCache for MapHeaderTypeCache {
4696 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
4697 self.0.get(&pack_offset).copied()
4698 }
4699 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
4700 self.0.insert(pack_offset, header);
4701 }
4702 }
4703
4704 #[test]
4705 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
4706 let (base, changed) = similar_blob_objects();
4707 let options = delta_pack_options(true);
4708 let written =
4709 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
4710 .expect("test operation should succeed");
4711 let mut second = written.entries[1].offset as usize;
4713 assert_eq!(
4714 parse_entry_header(&written.pack, &mut second)
4715 .expect("test operation should succeed")
4716 .kind,
4717 PackObjectKind::OfsDelta
4718 );
4719
4720 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4721 let mut cache = MapHeaderTypeCache::default();
4722 for po in &parsed.entries {
4723 let uncached =
4724 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
4725 Ok(None)
4726 })
4727 .expect("test operation should succeed");
4728 assert_eq!(
4730 uncached,
4731 (po.object.object_type, po.object.body.len() as u64),
4732 "uncached header at offset {}",
4733 po.entry.offset
4734 );
4735 let cold = read_object_header_at_with_cache(
4737 &written.pack,
4738 po.entry.offset,
4739 ObjectFormat::Sha1,
4740 |_| Ok(None),
4741 &mut cache,
4742 )
4743 .expect("test operation should succeed");
4744 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
4745 }
4746 for po in &parsed.entries {
4749 let warm = read_object_header_at_with_cache(
4750 &written.pack,
4751 po.entry.offset,
4752 ObjectFormat::Sha1,
4753 |_| panic!("warm cache must not re-walk the chain"),
4754 &mut cache,
4755 )
4756 .expect("test operation should succeed");
4757 assert_eq!(
4758 warm,
4759 (po.object.object_type, po.object.body.len() as u64),
4760 "warm cache at offset {}",
4761 po.entry.offset
4762 );
4763 }
4764 }
4765
4766 #[test]
4767 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
4768 let (base, changed) = similar_blob_objects();
4769 let options = delta_pack_options(false);
4770 let written = PackFile::write_packed_with_options(
4771 &[base, changed.clone()],
4772 ObjectFormat::Sha1,
4773 &options,
4774 )
4775 .expect("test operation should succeed");
4776 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4777 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
4778 .entries
4779 .iter()
4780 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
4781 .collect();
4782 for po in &parsed.entries {
4783 let got =
4784 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
4785 Ok(by_oid.get(oid).cloned())
4786 })
4787 .expect("test operation should succeed");
4788 assert_eq!(*got, po.object);
4789 }
4790 }
4791
4792 #[derive(Default)]
4796 struct CountingDeltaCache {
4797 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
4798 hits: std::cell::Cell<usize>,
4799 inserts: std::cell::Cell<usize>,
4800 }
4801
4802 impl PackDeltaCache for CountingDeltaCache {
4803 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
4804 let hit = self.map.borrow().get(&offset).cloned();
4805 if hit.is_some() {
4806 self.hits.set(self.hits.get() + 1);
4807 }
4808 hit
4809 }
4810 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
4811 self.inserts.set(self.inserts.get() + 1);
4812 self.map.borrow_mut().insert(offset, object);
4813 }
4814 }
4815
4816 #[test]
4817 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
4818 let mut objects = Vec::new();
4821 for idx in 0..8u32 {
4822 let mut body = vec![b'x'; 4096];
4823 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
4824 objects.push(EncodedObject::new(ObjectType::Blob, body));
4825 }
4826 let options = delta_pack_options(true);
4827 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
4828 .expect("test operation should succeed");
4829 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4830
4831 let cache = CountingDeltaCache::default();
4832 for _ in 0..2 {
4835 for po in &parsed.entries {
4836 let got = read_object_at_with_cache_arc(
4837 &written.pack,
4838 po.entry.offset,
4839 ObjectFormat::Sha1,
4840 |_| Ok(None),
4841 &cache,
4842 )
4843 .expect("test operation should succeed");
4844 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
4845 }
4846 }
4847 assert!(cache.hits.get() > 0, "cache never served a warm object");
4850 }
4851
4852 #[test]
4853 fn writes_ofs_delta_pack_and_index_that_round_trip() {
4854 let (base, changed) = similar_blob_objects();
4855 let options = delta_pack_options(true);
4856 let written = PackFile::write_packed_with_options(
4857 &[base.clone(), changed.clone()],
4858 ObjectFormat::Sha1,
4859 &options,
4860 )
4861 .expect("test operation should succeed");
4862 let mut second_offset = written.entries[1].offset as usize;
4863 let header = parse_entry_header(&written.pack, &mut second_offset)
4864 .expect("test operation should succeed");
4865 assert_eq!(header.kind, PackObjectKind::OfsDelta);
4866
4867 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4868 let index =
4869 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
4870 let oid = changed
4871 .object_id(ObjectFormat::Sha1)
4872 .expect("test operation should succeed");
4873 assert_eq!(pack.entries[0].object, base);
4874 assert_eq!(pack.entries[1].object, changed);
4875 assert_eq!(index.pack_checksum, pack.checksum);
4876 assert_eq!(
4877 index
4878 .find(&oid)
4879 .expect("test operation should succeed")
4880 .offset,
4881 written.entries[1].offset
4882 );
4883 }
4884
4885 #[test]
4886 fn resolves_ofs_delta_pack_entry() {
4887 let base = b"hello";
4888 let result = b"hello world";
4889 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
4890 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
4891 assert_eq!(parsed.entries.len(), 2);
4892 assert_eq!(parsed.entries[0].object.body, base);
4893 assert_eq!(parsed.entries[1].object.body, result);
4894 assert_eq!(
4895 parsed.entries[1].entry.oid,
4896 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
4897 .expect("test operation should succeed")
4898 );
4899 }
4900
4901 #[test]
4902 fn resolves_ref_delta_pack_entry() {
4903 let base = b"hello";
4904 let result = b"hello world";
4905 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
4906 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
4907 assert_eq!(parsed.entries.len(), 2);
4908 assert_eq!(parsed.entries[0].object.body, base);
4909 assert_eq!(parsed.entries[1].object.body, result);
4910 assert_eq!(
4911 parsed.entries[1].entry.oid,
4912 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
4913 .expect("test operation should succeed")
4914 );
4915 }
4916
4917 #[test]
4918 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
4919 let base = b"hello";
4920 let result = b"hello world";
4921 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
4922 assert!(PackFile::parse_sha1(&pack).is_err());
4923
4924 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
4925 .expect("test operation should succeed");
4926 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
4927 if oid == &base_oid {
4928 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
4929 } else {
4930 Ok(None)
4931 }
4932 })
4933 .expect("test operation should succeed");
4934 assert_eq!(parsed.entries.len(), 1);
4935 assert_eq!(parsed.entries[0].object.body, result);
4936 assert_eq!(
4937 parsed.entries[0].entry.oid,
4938 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
4939 .expect("test operation should succeed")
4940 );
4941 }
4942
4943 #[test]
4944 fn rejects_bad_pack_checksum() {
4945 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
4946 let last = pack.len() - 1;
4947 pack[last] ^= 1;
4948 assert!(PackFile::parse_sha1(&pack).is_err());
4949 }
4950
4951 #[test]
4952 fn raw_pack_index_rejects_bad_pack_checksum() {
4953 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
4954 let last = pack.len() - 1;
4955 pack[last] ^= 1;
4956 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
4957 }
4958
4959 #[test]
4960 fn pack_index_writer_rejects_duplicate_object_ids() {
4961 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
4962 .expect("test operation should succeed");
4963 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
4964 .expect("test operation should succeed");
4965 let entries = vec![
4966 PackIndexEntry {
4967 oid,
4968 crc32: 1,
4969 offset: 12,
4970 },
4971 PackIndexEntry {
4972 oid,
4973 crc32: 2,
4974 offset: 24,
4975 },
4976 ];
4977 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
4978 }
4979
4980 #[test]
4981 fn parses_single_entry_pack_index() {
4982 let oid = ObjectId::from_hex(
4983 ObjectFormat::Sha1,
4984 "ce013625030ba8dba906f756967f9e9ca394464a",
4985 )
4986 .expect("test operation should succeed");
4987 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
4988 .expect("test operation should succeed");
4989 let index = single_entry_index(
4990 ObjectFormat::Sha1,
4991 oid,
4992 0x1234_5678,
4993 12,
4994 pack_checksum.clone(),
4995 );
4996 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
4997 assert_eq!(parsed.version, 2);
4998 assert_eq!(parsed.pack_checksum, pack_checksum);
4999 assert_eq!(parsed.entries.len(), 1);
5000 assert_eq!(
5001 parsed
5002 .find(&oid)
5003 .expect("test operation should succeed")
5004 .offset,
5005 12
5006 );
5007 assert_eq!(
5008 parsed
5009 .find(&oid)
5010 .expect("test operation should succeed")
5011 .crc32,
5012 0x1234_5678
5013 );
5014 }
5015
5016 #[test]
5017 fn parses_single_entry_pack_index_v1() {
5018 let oid = ObjectId::from_hex(
5019 ObjectFormat::Sha1,
5020 "ce013625030ba8dba906f756967f9e9ca394464a",
5021 )
5022 .expect("test operation should succeed");
5023 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5024 .expect("test operation should succeed");
5025 let index =
5026 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
5027 let parsed =
5028 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
5029 assert_eq!(parsed.version, 1);
5030 assert_eq!(parsed.pack_checksum, pack_checksum);
5031 assert_eq!(parsed.entries.len(), 1);
5032 assert_eq!(
5033 parsed
5034 .find(&oid)
5035 .expect("test operation should succeed")
5036 .offset,
5037 0x1234_5678
5038 );
5039 assert_eq!(
5040 parsed
5041 .find(&oid)
5042 .expect("test operation should succeed")
5043 .crc32,
5044 0
5045 );
5046 }
5047
5048 #[test]
5049 fn rejects_bad_pack_index_v1_checksum() {
5050 let oid = ObjectId::from_hex(
5051 ObjectFormat::Sha1,
5052 "ce013625030ba8dba906f756967f9e9ca394464a",
5053 )
5054 .expect("test operation should succeed");
5055 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5056 .expect("test operation should succeed");
5057 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
5058 let last = index.len() - 1;
5059 index[last] ^= 1;
5060 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
5061 }
5062
5063 #[test]
5064 fn parses_pack_reverse_index() {
5065 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5066 .expect("test operation should succeed");
5067 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
5068 .expect("test operation should succeed");
5069 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
5070 .expect("test operation should succeed");
5071 assert_eq!(parsed.version, 1);
5072 assert_eq!(parsed.format, ObjectFormat::Sha1);
5073 assert_eq!(parsed.positions, vec![2, 0, 1]);
5074 assert_eq!(parsed.pack_checksum, pack_checksum);
5075 assert_eq!(
5076 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
5077 .expect("test operation should succeed"),
5078 reverse_index
5079 );
5080 }
5081
5082 #[test]
5083 fn rejects_bad_pack_reverse_index_checksum() {
5084 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5085 .expect("test operation should succeed");
5086 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
5087 .expect("test operation should succeed");
5088 let last = reverse_index.len() - 1;
5089 reverse_index[last] ^= 1;
5090 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
5091 }
5092
5093 #[test]
5094 fn rejects_bad_pack_reverse_index_positions() {
5095 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5096 .expect("test operation should succeed");
5097 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
5098 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
5099 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
5100 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
5101 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5102 .expect("test operation should succeed");
5103 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
5104 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
5105 }
5106
5107 #[test]
5108 fn parses_pack_mtimes() {
5109 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5110 .expect("test operation should succeed");
5111 let mtimes = PackMtimes::write(
5112 ObjectFormat::Sha1,
5113 &[1, 1_700_000_000, u32::MAX],
5114 &pack_checksum,
5115 )
5116 .expect("test operation should succeed");
5117 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
5118 .expect("test operation should succeed");
5119 assert_eq!(parsed.version, 1);
5120 assert_eq!(parsed.format, ObjectFormat::Sha1);
5121 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
5122 assert_eq!(parsed.pack_checksum, pack_checksum);
5123 assert_eq!(
5124 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
5125 .expect("test operation should succeed"),
5126 mtimes
5127 );
5128 }
5129
5130 #[test]
5131 fn rejects_bad_pack_mtimes_checksum() {
5132 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5133 .expect("test operation should succeed");
5134 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
5135 .expect("test operation should succeed");
5136 let last = mtimes.len() - 1;
5137 mtimes[last] ^= 1;
5138 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
5139 }
5140
5141 #[test]
5142 fn rejects_bad_pack_mtimes_shape() {
5143 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5144 .expect("test operation should succeed");
5145 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
5146 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
5147
5148 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
5149 wrong_hash[11] = 2;
5150 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
5151 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
5152 .expect("test operation should succeed");
5153 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
5154 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
5155 }
5156
5157 #[test]
5158 fn parses_multi_pack_index_header_and_chunk_lookup() {
5159 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
5160 .expect("test operation should succeed");
5161 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
5162 .expect("test operation should succeed");
5163 let chunks = midx_chunks_with_pack_names(
5164 ObjectFormat::Sha1,
5165 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
5166 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
5167 );
5168 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
5169 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
5170 .expect("test operation should succeed");
5171 assert_eq!(parsed.version, 2);
5172 assert_eq!(parsed.format, ObjectFormat::Sha1);
5173 assert_eq!(parsed.pack_count, 2);
5174 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
5175 assert_eq!(parsed.object_count, 2);
5176 assert_eq!(parsed.objects.len(), 2);
5177 assert_eq!(
5178 parsed
5179 .find(&first)
5180 .expect("test operation should succeed")
5181 .pack_int_id,
5182 0
5183 );
5184 assert_eq!(
5185 parsed
5186 .find(&first)
5187 .expect("test operation should succeed")
5188 .offset,
5189 12
5190 );
5191 assert_eq!(
5192 parsed
5193 .find(&second)
5194 .expect("test operation should succeed")
5195 .pack_int_id,
5196 1
5197 );
5198 assert_eq!(
5199 parsed
5200 .find(&second)
5201 .expect("test operation should succeed")
5202 .offset,
5203 0x1_0000_0000
5204 );
5205 assert_eq!(parsed.reverse_index, None);
5206 assert_eq!(parsed.bitmapped_packs, None);
5207 assert_eq!(parsed.chunks.len(), 5);
5208 assert_eq!(parsed.chunks[0].id, *b"PNAM");
5209 assert_eq!(parsed.chunks[0].offset, 84);
5210 assert_eq!(parsed.chunks[0].len, 24);
5211 assert_eq!(parsed.chunks[1].id, *b"OIDF");
5212 assert_eq!(parsed.chunks[1].offset, 108);
5213 assert_eq!(parsed.chunks[1].len, 1024);
5214 }
5215
5216 #[test]
5217 fn rejects_bad_multi_pack_index_checksum() {
5218 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
5219 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
5220 let last = midx.len() - 1;
5221 midx[last] ^= 1;
5222 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
5223 }
5224
5225 #[test]
5226 fn rejects_bad_multi_pack_index_shape() {
5227 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
5228 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
5229 wrong_hash[5] = 2;
5230 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
5231 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
5232 .expect("test operation should succeed");
5233 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
5234 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
5235
5236 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
5237 missing_terminator[12] = b'B';
5238 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
5239 let checksum =
5240 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
5241 .expect("test operation should succeed");
5242 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
5243 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
5244
5245 let mut bad_offset = multi_pack_index(
5246 ObjectFormat::Sha1,
5247 2,
5248 0,
5249 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
5250 );
5251 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
5252 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
5253 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
5254 .expect("test operation should succeed");
5255 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
5256 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
5257 }
5258
5259 #[test]
5260 fn rejects_bad_multi_pack_index_pack_names() {
5261 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
5262 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
5263
5264 let too_few = multi_pack_index(
5265 ObjectFormat::Sha1,
5266 2,
5267 2,
5268 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
5269 );
5270 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
5271
5272 let bad_padding = multi_pack_index(
5273 ObjectFormat::Sha1,
5274 2,
5275 1,
5276 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
5277 );
5278 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
5279
5280 let unsorted_v1 = multi_pack_index(
5281 ObjectFormat::Sha1,
5282 1,
5283 2,
5284 &midx_chunks_with_pack_names(
5285 ObjectFormat::Sha1,
5286 b"pack-b.idx\0pack-a.idx\0".to_vec(),
5287 &[],
5288 ),
5289 );
5290 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
5291
5292 let unsorted_v2 = multi_pack_index(
5293 ObjectFormat::Sha1,
5294 2,
5295 2,
5296 &midx_chunks_with_pack_names(
5297 ObjectFormat::Sha1,
5298 b"pack-b.idx\0pack-a.idx\0".to_vec(),
5299 &[],
5300 ),
5301 );
5302 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
5303 .expect("test operation should succeed");
5304 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
5305 }
5306
5307 #[test]
5308 fn rejects_bad_multi_pack_index_object_tables() {
5309 let oid_a = ObjectId::from_hex(
5310 ObjectFormat::Sha1,
5311 "1111111111111111111111111111111111111111",
5312 )
5313 .expect("test operation should succeed");
5314 let oid_b = ObjectId::from_hex(
5315 ObjectFormat::Sha1,
5316 "2222222222222222222222222222222222222222",
5317 )
5318 .expect("test operation should succeed");
5319
5320 let missing_oidf = multi_pack_index(
5321 ObjectFormat::Sha1,
5322 2,
5323 1,
5324 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
5325 );
5326 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
5327
5328 let bad_fanout = vec![
5329 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
5330 (*b"OIDF", vec![0; 256 * 4]),
5331 (*b"OIDL", oid_a.as_bytes().to_vec()),
5332 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
5333 ];
5334 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
5335 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
5336
5337 let mut unsorted = Vec::new();
5338 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
5339 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
5340 let mut oid_lookup = Vec::new();
5341 oid_lookup.extend_from_slice(oid_b.as_bytes());
5342 oid_lookup.extend_from_slice(oid_a.as_bytes());
5343 unsorted.push((*b"OIDL", oid_lookup));
5344 unsorted.push((
5345 *b"OOFF",
5346 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
5347 ));
5348 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
5349 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
5350
5351 let bad_pack = multi_pack_index(
5352 ObjectFormat::Sha1,
5353 2,
5354 1,
5355 &midx_chunks_with_pack_names(
5356 ObjectFormat::Sha1,
5357 b"pack-a.idx\0\0".to_vec(),
5358 &[(oid_a.clone(), 1, 12)],
5359 ),
5360 );
5361 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
5362
5363 let mut large_offsets = Vec::new();
5364 let missing_loff = vec![
5365 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
5366 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
5367 (*b"OIDL", oid_a.as_bytes().to_vec()),
5368 (
5369 *b"OOFF",
5370 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
5371 ),
5372 ];
5373 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
5374 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
5375
5376 let mut bad_loff =
5377 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
5378 bad_loff.push((*b"LOFF", vec![0]));
5379 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
5380 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
5381 }
5382
5383 #[test]
5384 fn parses_multi_pack_index_bitmap_chunks() {
5385 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
5386 .expect("test operation should succeed");
5387 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
5388 .expect("test operation should succeed");
5389 let mut chunks = midx_chunks_with_pack_names(
5390 ObjectFormat::Sha1,
5391 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
5392 &[(first, 0, 12), (second, 1, 24)],
5393 );
5394 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
5395 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
5396 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
5397
5398 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
5399 .expect("test operation should succeed");
5400 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
5401 assert_eq!(
5402 parsed.bitmapped_packs,
5403 Some(vec![
5404 MultiPackBitmapPack {
5405 bitmap_pos: 0,
5406 bitmap_nr: 1,
5407 },
5408 MultiPackBitmapPack {
5409 bitmap_pos: 1,
5410 bitmap_nr: 1,
5411 },
5412 ])
5413 );
5414 }
5415
5416 #[test]
5417 fn writes_multi_pack_index_that_round_trips() {
5418 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
5419 .expect("test operation should succeed");
5420 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
5421 .expect("test operation should succeed");
5422 let bytes = MultiPackIndex::write(
5423 ObjectFormat::Sha1,
5424 2,
5425 &["pack-b.idx".into(), "pack-a.idx".into()],
5426 &[
5427 MultiPackIndexEntry {
5428 oid: second.clone(),
5429 pack_int_id: 0,
5430 offset: 0x1_0000_0000,
5431 },
5432 MultiPackIndexEntry {
5433 oid: first.clone(),
5434 pack_int_id: 1,
5435 offset: 12,
5436 },
5437 ],
5438 )
5439 .expect("test operation should succeed");
5440
5441 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
5442 .expect("test operation should succeed");
5443 assert_eq!(parsed.version, 2);
5444 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
5445 assert_eq!(parsed.object_count, 2);
5446 assert_eq!(
5447 parsed
5448 .find(&first)
5449 .expect("test operation should succeed")
5450 .pack_int_id,
5451 1
5452 );
5453 assert_eq!(
5454 parsed
5455 .find(&first)
5456 .expect("test operation should succeed")
5457 .offset,
5458 12
5459 );
5460 assert_eq!(
5461 parsed
5462 .find(&second)
5463 .expect("test operation should succeed")
5464 .pack_int_id,
5465 0
5466 );
5467 assert_eq!(
5468 parsed
5469 .find(&second)
5470 .expect("test operation should succeed")
5471 .offset,
5472 0x1_0000_0000
5473 );
5474 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
5475 }
5476
5477 #[test]
5478 fn write_multi_pack_index_rejects_invalid_inputs() {
5479 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
5480 .expect("test operation should succeed");
5481 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
5482 assert!(
5483 MultiPackIndex::write(
5484 ObjectFormat::Sha1,
5485 1,
5486 &["pack-b.idx".into(), "pack-a.idx".into()],
5487 &[],
5488 )
5489 .is_err()
5490 );
5491 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
5492 assert!(
5493 MultiPackIndex::write(
5494 ObjectFormat::Sha1,
5495 2,
5496 &["pack-a.idx".into()],
5497 &[MultiPackIndexEntry {
5498 oid,
5499 pack_int_id: 1,
5500 offset: 12,
5501 }],
5502 )
5503 .is_err()
5504 );
5505 assert!(
5506 MultiPackIndex::write(
5507 ObjectFormat::Sha1,
5508 2,
5509 &["pack-a.idx".into()],
5510 &[
5511 MultiPackIndexEntry {
5512 oid,
5513 pack_int_id: 0,
5514 offset: 12,
5515 },
5516 MultiPackIndexEntry {
5517 oid,
5518 pack_int_id: 0,
5519 offset: 24,
5520 },
5521 ],
5522 )
5523 .is_err()
5524 );
5525 }
5526
5527 #[test]
5528 fn rejects_bad_multi_pack_index_bitmap_chunks() {
5529 let oid_a = ObjectId::from_hex(
5530 ObjectFormat::Sha1,
5531 "1111111111111111111111111111111111111111",
5532 )
5533 .expect("test operation should succeed");
5534 let oid_b = ObjectId::from_hex(
5535 ObjectFormat::Sha1,
5536 "2222222222222222222222222222222222222222",
5537 )
5538 .expect("test operation should succeed");
5539
5540 let mut duplicate_ridx = midx_chunks_with_pack_names(
5541 ObjectFormat::Sha1,
5542 b"pack-a.idx\0\0".to_vec(),
5543 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
5544 );
5545 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
5546 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
5547 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
5548
5549 let mut short_btmp = midx_chunks_with_pack_names(
5550 ObjectFormat::Sha1,
5551 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
5552 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
5553 );
5554 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
5555 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
5556 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
5557
5558 let mut out_of_range_btmp = midx_chunks_with_pack_names(
5559 ObjectFormat::Sha1,
5560 b"pack-a.idx\0\0".to_vec(),
5561 &[(oid_a, 0, 12), (oid_b, 0, 24)],
5562 );
5563 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
5564 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
5565 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
5566 }
5567
5568 #[test]
5569 fn parses_pack_bitmap_index_with_hash_cache() {
5570 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5571 .expect("test operation should succeed");
5572 let bitmap = pack_bitmap_index(
5573 ObjectFormat::Sha1,
5574 3,
5575 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
5576 &pack_checksum,
5577 &[(2, 0, 1, &[0b101])],
5578 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
5579 );
5580
5581 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
5582 .expect("test operation should succeed");
5583 assert_eq!(parsed.version, 1);
5584 assert_eq!(parsed.format, ObjectFormat::Sha1);
5585 assert_eq!(
5586 parsed.options,
5587 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
5588 );
5589 assert_eq!(parsed.pack_checksum, pack_checksum);
5590 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
5591 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
5592 assert_eq!(parsed.entries.len(), 1);
5593 let entry = parsed
5594 .entry_for_index_position(2)
5595 .expect("test operation should succeed");
5596 assert_eq!(entry.xor_offset, 0);
5597 assert_eq!(entry.flags, 1);
5598 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
5599 assert_eq!(
5600 parsed.name_hash_cache,
5601 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
5602 );
5603 }
5604
5605 #[test]
5606 fn parses_pack_bitmap_index_sha256() {
5607 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
5608 .expect("test operation should succeed");
5609 let bitmap = pack_bitmap_index(
5610 ObjectFormat::Sha256,
5611 2,
5612 PackBitmapIndex::OPTION_FULL_DAG,
5613 &pack_checksum,
5614 &[(0, 0, 0, &[0b11])],
5615 None,
5616 );
5617
5618 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
5619 .expect("test operation should succeed");
5620 assert_eq!(parsed.version, 1);
5621 assert_eq!(parsed.format, ObjectFormat::Sha256);
5622 assert_eq!(parsed.pack_checksum, pack_checksum);
5623 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
5624 assert_eq!(parsed.entries[0].object_position, 0);
5625 assert_eq!(parsed.name_hash_cache, None);
5626 }
5627
5628 #[test]
5629 fn parses_upstream_git_written_pack_bitmap_index() {
5630 let root = unique_temp_dir("git-pack-bitmap-upstream");
5631 fs::create_dir_all(&root).expect("test operation should succeed");
5632 {
5633 run_git_success(&root, &["init", "-q", "-b", "main"]);
5634 run_git_success(
5635 &root,
5636 &[
5637 "-c",
5638 "user.name=Example User",
5639 "-c",
5640 "user.email=example@example.invalid",
5641 "commit",
5642 "--allow-empty",
5643 "-q",
5644 "-m",
5645 "one",
5646 ],
5647 );
5648 run_git_success(
5649 &root,
5650 &[
5651 "-c",
5652 "user.name=Example User",
5653 "-c",
5654 "user.email=example@example.invalid",
5655 "commit",
5656 "--allow-empty",
5657 "-q",
5658 "-m",
5659 "two",
5660 ],
5661 );
5662 run_git_success(&root, &["repack", "-adb"]);
5663 let pack_dir = root.join(".git").join("objects").join("pack");
5664 let idx_path = single_path_with_extension(&pack_dir, "idx");
5665 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
5666 let index = PackIndex::parse(
5667 &fs::read(idx_path).expect("test operation should succeed"),
5668 ObjectFormat::Sha1,
5669 )
5670 .expect("test operation should succeed");
5671 let bitmap = PackBitmapIndex::parse(
5672 &fs::read(bitmap_path).expect("test operation should succeed"),
5673 ObjectFormat::Sha1,
5674 index.entries.len(),
5675 )
5676 .expect("test operation should succeed");
5677 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
5678 assert!(!bitmap.entries.is_empty());
5679 };
5680 let _ = fs::remove_dir_all(&root);
5681 }
5682
5683 #[test]
5684 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
5685 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5686 .expect("test operation should succeed");
5687 let bitmap = pack_bitmap_index(
5688 ObjectFormat::Sha1,
5689 1,
5690 PackBitmapIndex::OPTION_FULL_DAG,
5691 &pack_checksum,
5692 &[(0, 0, 0, &[1])],
5693 None,
5694 );
5695
5696 let mut bad_signature = bitmap.clone();
5697 bad_signature[0] = b'X';
5698 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
5699
5700 let mut bad_version = bitmap.clone();
5701 bad_version[5] = 2;
5702 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
5703 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
5704
5705 let mut bad_option = bitmap.clone();
5706 bad_option[7] = 0x20;
5707 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
5708 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
5709
5710 let mut bad_checksum = bitmap;
5711 let last = bad_checksum.len() - 1;
5712 bad_checksum[last] ^= 1;
5713 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
5714 }
5715
5716 #[test]
5717 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
5718 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5719 .expect("test operation should succeed");
5720 let bitmap = pack_bitmap_index(
5721 ObjectFormat::Sha1,
5722 2,
5723 PackBitmapIndex::OPTION_FULL_DAG,
5724 &pack_checksum,
5725 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
5726 None,
5727 );
5728
5729 let mut truncated = bitmap.clone();
5730 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
5731 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
5732 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
5733
5734 let mut out_of_range_position = pack_bitmap_index(
5735 ObjectFormat::Sha1,
5736 2,
5737 PackBitmapIndex::OPTION_FULL_DAG,
5738 &pack_checksum,
5739 &[(2, 0, 0, &[0b01])],
5740 None,
5741 );
5742 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
5743 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
5744 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
5745
5746 let invalid_xor = pack_bitmap_index(
5747 ObjectFormat::Sha1,
5748 2,
5749 PackBitmapIndex::OPTION_FULL_DAG,
5750 &pack_checksum,
5751 &[(0, 1, 0, &[0b01])],
5752 None,
5753 );
5754 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
5755 }
5756
5757 #[test]
5758 fn parses_single_entry_pack_index_sha256() {
5759 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
5760 .expect("test operation should succeed");
5761 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
5762 .expect("test operation should succeed");
5763 let index = single_entry_index(
5764 ObjectFormat::Sha256,
5765 oid,
5766 0x1234_5678,
5767 12,
5768 pack_checksum.clone(),
5769 );
5770 let parsed =
5771 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
5772 assert_eq!(parsed.version, 2);
5773 assert_eq!(parsed.pack_checksum, pack_checksum);
5774 assert_eq!(parsed.entries.len(), 1);
5775 assert_eq!(
5776 parsed
5777 .find(&oid)
5778 .expect("test operation should succeed")
5779 .offset,
5780 12
5781 );
5782 assert_eq!(
5783 parsed
5784 .find(&oid)
5785 .expect("test operation should succeed")
5786 .crc32,
5787 0x1234_5678
5788 );
5789 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
5790 }
5791
5792 #[test]
5793 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
5794 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
5795 }
5796
5797 #[test]
5798 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
5799 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
5800 }
5801
5802 #[test]
5803 fn write_packed_rejects_duplicate_objects() {
5804 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
5805 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
5806 }
5807
5808 #[test]
5809 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
5810 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
5811 let sha1 = object
5812 .object_id(ObjectFormat::Sha1)
5813 .expect("test operation should succeed");
5814 let sha256 = object
5815 .object_id(ObjectFormat::Sha256)
5816 .expect("test operation should succeed");
5817 let duplicate = [
5818 PackInput {
5819 oid: &sha1,
5820 object: &object,
5821 },
5822 PackInput {
5823 oid: &sha1,
5824 object: &object,
5825 },
5826 ];
5827 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
5828
5829 let wrong_format = [PackInput {
5830 oid: &sha256,
5831 object: &object,
5832 }];
5833 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
5834 }
5835
5836 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
5837 let objects = similar_blob_family(8);
5838 let packed =
5839 PackFile::write_packed(&objects, format).expect("test operation should succeed");
5840 let undeltified =
5841 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
5842
5843 assert!(
5846 packed.pack.len() < undeltified.pack.len(),
5847 "expected delta pack ({}) smaller than undeltified pack ({})",
5848 packed.pack.len(),
5849 undeltified.pack.len()
5850 );
5851
5852 let kinds = pack_entry_kinds(&packed.pack, format);
5854 let delta_count = kinds
5855 .iter()
5856 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
5857 .count();
5858 assert!(
5859 delta_count >= 1,
5860 "expected at least one delta entry, found kinds {kinds:?}"
5861 );
5862
5863 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
5865 assert_eq!(parsed.entries.len(), objects.len());
5866 for object in &objects {
5867 let oid = object
5868 .object_id(format)
5869 .expect("test operation should succeed");
5870 let found = parsed
5871 .entries
5872 .iter()
5873 .find(|entry| entry.entry.oid == oid)
5874 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
5875 assert_eq!(&found.object, object, "object {oid} did not round-trip");
5876 }
5877
5878 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
5880 assert_eq!(index.pack_checksum, packed.checksum);
5881 for object in &objects {
5882 let oid = object
5883 .object_id(format)
5884 .expect("test operation should succeed");
5885 assert!(index.find(&oid).is_some(), "index missing {oid}");
5886 }
5887 }
5888
5889 #[test]
5890 fn write_packed_emits_ofs_delta_by_default() {
5891 let objects = similar_blob_family(6);
5892 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5893 .expect("test operation should succeed");
5894 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
5895 assert!(
5896 kinds.contains(&PackObjectKind::OfsDelta),
5897 "expected an ofs-delta entry by default, found {kinds:?}"
5898 );
5899 assert!(
5900 !kinds.contains(&PackObjectKind::RefDelta),
5901 "default self-contained pack must not use ref-delta, found {kinds:?}"
5902 );
5903 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
5905 }
5906
5907 #[test]
5908 fn write_packed_can_emit_ref_delta() {
5909 let objects = similar_blob_family(6);
5910 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
5911 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5912 .expect("test operation should succeed");
5913 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
5914 assert!(
5915 kinds.contains(&PackObjectKind::RefDelta),
5916 "expected a ref-delta entry, found {kinds:?}"
5917 );
5918 assert!(
5919 !kinds.contains(&PackObjectKind::OfsDelta),
5920 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
5921 );
5922
5923 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
5926 .expect("test operation should succeed");
5927 assert_eq!(parsed.entries.len(), objects.len());
5928 }
5929
5930 #[test]
5931 fn write_packed_bounds_delta_chain_depth() {
5932 let objects = incremental_blob_chain(20);
5936 let format = ObjectFormat::Sha1;
5937
5938 for max_depth in [1usize, 2, 5] {
5939 let options = PackWriteOptions::new()
5940 .with_window(20)
5941 .with_depth(max_depth);
5942 let packed = PackFile::write_packed_with_options(&objects, format, &options)
5943 .expect("test operation should succeed");
5944
5945 let depths = pack_entry_depths(&packed.pack, format);
5946 let observed = depths.iter().copied().max().unwrap_or(0);
5947 assert!(
5948 observed <= max_depth,
5949 "max chain depth {observed} exceeded bound {max_depth}"
5950 );
5951
5952 let parsed =
5954 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
5955 for object in &objects {
5956 let oid = object
5957 .object_id(format)
5958 .expect("test operation should succeed");
5959 let found = parsed
5960 .entries
5961 .iter()
5962 .find(|entry| entry.entry.oid == oid)
5963 .expect("test operation should succeed");
5964 assert_eq!(&found.object, object);
5965 }
5966 }
5967 }
5968
5969 #[test]
5970 fn write_packed_depth_zero_stores_everything_undeltified() {
5971 let objects = similar_blob_family(5);
5972 let options = PackWriteOptions::new().with_depth(0);
5973 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5974 .expect("test operation should succeed");
5975 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
5976 assert!(
5977 kinds
5978 .iter()
5979 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
5980 "depth 0 must disable deltas, found {kinds:?}"
5981 );
5982 }
5983
5984 #[test]
5985 fn write_thin_uses_external_base_and_round_trips_sha1() {
5986 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
5987 }
5988
5989 #[test]
5990 fn write_thin_uses_external_base_and_round_trips_sha256() {
5991 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
5992 }
5993
5994 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
5995 let base = blob_with_marker("EXTERNAL-BASE");
5998 let target = blob_with_marker("EXTERNAL-TARGET");
5999 let base_oid = base
6000 .object_id(format)
6001 .expect("test operation should succeed");
6002
6003 let mut external = HashMap::new();
6004 external.insert(base_oid, base.clone());
6005 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
6006 .expect("test operation should succeed");
6007
6008 let kinds = pack_entry_kinds(&packed.pack, format);
6010 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
6011
6012 let mut offset = 12usize;
6014 let header =
6015 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
6016 assert_eq!(header.kind, PackObjectKind::RefDelta);
6017 let referenced =
6018 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
6019 .expect("test operation should succeed");
6020 assert_eq!(referenced, base_oid);
6021
6022 assert!(PackFile::parse(&packed.pack, format).is_err());
6024
6025 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
6027 if oid == &base_oid {
6028 Ok(Some(base.clone()))
6029 } else {
6030 Ok(None)
6031 }
6032 })
6033 .expect("test operation should succeed");
6034 assert_eq!(parsed.entries.len(), 1);
6035 assert_eq!(parsed.entries[0].object, target);
6036 }
6037
6038 #[test]
6039 fn write_packed_preserves_distinct_objects_with_no_similarity() {
6040 let objects = vec![
6043 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
6044 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
6045 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
6046 ];
6047 let format = ObjectFormat::Sha1;
6048 let packed =
6049 PackFile::write_packed(&objects, format).expect("test operation should succeed");
6050 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
6051 assert_eq!(parsed.entries.len(), objects.len());
6052 for object in &objects {
6053 let oid = object
6054 .object_id(format)
6055 .expect("test operation should succeed");
6056 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
6057 }
6058 }
6059
6060 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
6064 let mut common_head = Vec::new();
6065 for _ in 0..200 {
6066 common_head.extend_from_slice(b"shared header line for delta testing\n");
6067 }
6068 let mut common_tail = Vec::new();
6069 for _ in 0..200 {
6070 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
6071 }
6072 (0..count)
6073 .map(|idx| {
6074 let mut body = common_head.clone();
6075 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
6076 body.extend_from_slice(&common_tail);
6077 EncodedObject::new(ObjectType::Blob, body)
6078 })
6079 .collect()
6080 }
6081
6082 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
6085 let mut body = Vec::new();
6086 for _ in 0..100 {
6087 body.extend_from_slice(b"baseline content shared across the whole chain\n");
6088 }
6089 let mut objects = Vec::with_capacity(count);
6090 for idx in 0..count {
6091 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
6092 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
6093 }
6094 objects
6095 }
6096
6097 fn blob_with_marker(marker: &str) -> EncodedObject {
6098 let mut body = Vec::new();
6099 for _ in 0..150 {
6100 body.extend_from_slice(b"common body shared between base and target\n");
6101 }
6102 body.extend_from_slice(marker.as_bytes());
6103 body.push(b'\n');
6104 for _ in 0..150 {
6105 body.extend_from_slice(b"more common body shared between objects\n");
6106 }
6107 EncodedObject::new(ObjectType::Blob, body)
6108 }
6109
6110 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
6112 pack_entry_descriptors(pack, format)
6113 .into_iter()
6114 .map(|descriptor| descriptor.kind)
6115 .collect()
6116 }
6117
6118 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
6122 let descriptors = pack_entry_descriptors(pack, format);
6123 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
6124 let mut depths = Vec::with_capacity(descriptors.len());
6125 for descriptor in &descriptors {
6126 let depth = match &descriptor.base {
6127 EntryBase::None => 0,
6128 EntryBase::Offset(base_offset) => {
6129 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
6130 }
6131 EntryBase::Ref => 1,
6135 };
6136 depth_by_offset.insert(descriptor.offset, depth);
6137 depths.push(depth);
6138 }
6139 depths
6140 }
6141
6142 struct EntryDescriptor {
6143 offset: u64,
6144 kind: PackObjectKind,
6145 base: EntryBase,
6146 }
6147
6148 enum EntryBase {
6149 None,
6150 Offset(u64),
6151 Ref,
6152 }
6153
6154 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
6155 let trailer_offset = pack.len() - format.raw_len();
6156 let count = u32_be(&pack[8..12]) as usize;
6157 let mut offset = 12usize;
6158 let mut descriptors = Vec::with_capacity(count);
6159 for _ in 0..count {
6160 let entry_offset = offset as u64;
6161 let header =
6162 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
6163 let base = match header.kind {
6164 PackObjectKind::OfsDelta => {
6165 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
6166 .expect("test operation should succeed");
6167 EntryBase::Offset(base_offset)
6168 }
6169 PackObjectKind::RefDelta => {
6170 offset += format.raw_len();
6171 EntryBase::Ref
6172 }
6173 _ => EntryBase::None,
6174 };
6175 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
6176 let mut body = Vec::new();
6177 decoder
6178 .read_to_end(&mut body)
6179 .expect("test operation should succeed");
6180 offset += decoder.total_in() as usize;
6181 descriptors.push(EntryDescriptor {
6182 offset: entry_offset,
6183 kind: header.kind,
6184 base,
6185 });
6186 }
6187 descriptors
6188 }
6189
6190 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
6191 let mut base = Vec::new();
6192 for _ in 0..300 {
6193 base.extend_from_slice(b"common payload\n");
6194 }
6195 base.extend_from_slice(b"base\n");
6196 let mut changed = Vec::new();
6197 for _ in 0..300 {
6198 changed.extend_from_slice(b"common payload\n");
6199 }
6200 changed.extend_from_slice(b"changed\n");
6201 (
6202 EncodedObject::new(ObjectType::Blob, base),
6203 EncodedObject::new(ObjectType::Blob, changed),
6204 )
6205 }
6206
6207 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
6208 let mut pack = Vec::new();
6209 pack.extend_from_slice(b"PACK");
6210 pack.extend_from_slice(&2u32.to_be_bytes());
6211 pack.extend_from_slice(&1u32.to_be_bytes());
6212 write_entry_header(&mut pack, object_type, body.len() as u64);
6213 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6214 encoder
6215 .write_all(body)
6216 .expect("test operation should succeed");
6217 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6218 let checksum =
6219 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6220 pack.extend_from_slice(checksum.as_bytes());
6221 pack
6222 }
6223
6224 #[derive(Clone, Copy, Debug)]
6225 enum DeltaKind {
6226 Offset,
6227 Ref,
6228 }
6229
6230 fn two_object_delta_pack(
6231 format: ObjectFormat,
6232 base: &[u8],
6233 result: &[u8],
6234 delta_kind: DeltaKind,
6235 ) -> Vec<u8> {
6236 let mut pack = Vec::new();
6237 pack.extend_from_slice(b"PACK");
6238 pack.extend_from_slice(&2u32.to_be_bytes());
6239 pack.extend_from_slice(&2u32.to_be_bytes());
6240
6241 let base_offset = pack.len();
6242 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
6243 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6244 encoder
6245 .write_all(base)
6246 .expect("test operation should succeed");
6247 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6248
6249 let delta = append_suffix_delta(base, result);
6250 let delta_offset = pack.len();
6251 write_pack_entry_header_kind(
6252 &mut pack,
6253 match delta_kind {
6254 DeltaKind::Offset => 6,
6255 DeltaKind::Ref => 7,
6256 },
6257 delta.len() as u64,
6258 );
6259 match delta_kind {
6260 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
6261 DeltaKind::Ref => {
6262 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
6263 .expect("test operation should succeed");
6264 pack.extend_from_slice(base_oid.as_bytes());
6265 }
6266 }
6267 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6268 encoder
6269 .write_all(&delta)
6270 .expect("test operation should succeed");
6271 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6272
6273 let checksum =
6274 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6275 pack.extend_from_slice(checksum.as_bytes());
6276 pack
6277 }
6278
6279 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
6280 let mut pack = Vec::new();
6281 pack.extend_from_slice(b"PACK");
6282 pack.extend_from_slice(&2u32.to_be_bytes());
6283 pack.extend_from_slice(&1u32.to_be_bytes());
6284
6285 let delta = append_suffix_delta(base, result);
6286 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
6287 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
6288 .expect("test operation should succeed");
6289 pack.extend_from_slice(base_oid.as_bytes());
6290 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6291 encoder
6292 .write_all(&delta)
6293 .expect("test operation should succeed");
6294 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6295
6296 let checksum =
6297 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6298 pack.extend_from_slice(checksum.as_bytes());
6299 pack
6300 }
6301
6302 fn unique_temp_dir(name: &str) -> PathBuf {
6303 let nanos = SystemTime::now()
6304 .duration_since(UNIX_EPOCH)
6305 .expect("test operation should succeed")
6306 .as_nanos();
6307 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
6308 }
6309
6310 fn run_git_success(cwd: &Path, args: &[&str]) {
6311 let output = Command::new("git")
6312 .current_dir(cwd)
6313 .args(args)
6314 .output()
6315 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
6316 assert!(
6317 output.status.success(),
6318 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
6319 output.status.code(),
6320 String::from_utf8_lossy(&output.stdout),
6321 String::from_utf8_lossy(&output.stderr)
6322 );
6323 }
6324
6325 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
6326 let mut paths = fs::read_dir(dir)
6327 .expect("test operation should succeed")
6328 .map(|entry| entry.expect("test operation should succeed").path())
6329 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
6330 .collect::<Vec<_>>();
6331 assert_eq!(paths.len(), 1, "expected one .{extension} file");
6332 paths.remove(0)
6333 }
6334
6335 fn pack_bitmap_index(
6336 format: ObjectFormat,
6337 object_count: u32,
6338 options: u16,
6339 pack_checksum: &ObjectId,
6340 entries: &[(u32, u8, u8, &[u64])],
6341 name_hash_cache: Option<&[u32]>,
6342 ) -> Vec<u8> {
6343 let mut out = Vec::new();
6344 out.extend_from_slice(b"BITM");
6345 out.extend_from_slice(&1u16.to_be_bytes());
6346 out.extend_from_slice(&options.to_be_bytes());
6347 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
6348 out.extend_from_slice(pack_checksum.as_bytes());
6349 write_test_ewah(&mut out, object_count, &[0b001]);
6350 write_test_ewah(&mut out, object_count, &[0b010]);
6351 write_test_ewah(&mut out, object_count, &[0b100]);
6352 write_test_ewah(&mut out, object_count, &[0]);
6353 for (position, xor_offset, flags, words) in entries {
6354 out.extend_from_slice(&position.to_be_bytes());
6355 out.push(*xor_offset);
6356 out.push(*flags);
6357 write_test_ewah(&mut out, object_count, words);
6358 }
6359 if let Some(cache) = name_hash_cache {
6360 for value in cache {
6361 out.extend_from_slice(&value.to_be_bytes());
6362 }
6363 }
6364 let checksum =
6365 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
6366 out.extend_from_slice(checksum.as_bytes());
6367 out
6368 }
6369
6370 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
6371 out.extend_from_slice(&bit_size.to_be_bytes());
6372 let words = ewah_literal_words(literals);
6373 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
6374 for word in words {
6375 out.extend_from_slice(&word.to_be_bytes());
6376 }
6377 out.extend_from_slice(&0u32.to_be_bytes());
6378 }
6379
6380 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
6381 let rlw = (literals.len() as u64) << 33;
6382 let mut words = vec![rlw];
6383 words.extend_from_slice(literals);
6384 words
6385 }
6386
6387 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
6388 let checksum_offset = bytes.len() - format.raw_len();
6389 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
6390 .expect("test operation should succeed");
6391 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
6392 }
6393
6394 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
6395 assert!(result.starts_with(base));
6396 let suffix = &result[base.len()..];
6397 assert!(base.len() < 0x10000);
6398 assert!(suffix.len() < 0x80);
6399 let mut delta = Vec::new();
6400 write_delta_varint(&mut delta, base.len() as u64);
6401 write_delta_varint(&mut delta, result.len() as u64);
6402 delta.push(0x90);
6403 delta.push(base.len() as u8);
6404 delta.push(suffix.len() as u8);
6405 delta.extend_from_slice(suffix);
6406 delta
6407 }
6408
6409 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
6410 loop {
6411 let mut byte = (value as u8) & 0x7f;
6412 value >>= 7;
6413 if value != 0 {
6414 byte |= 0x80;
6415 }
6416 out.push(byte);
6417 if value == 0 {
6418 break;
6419 }
6420 }
6421 }
6422
6423 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
6424 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
6425 size >>= 4;
6426 if size != 0 {
6427 byte |= 0x80;
6428 }
6429 out.push(byte);
6430 while size != 0 {
6431 let mut byte = (size as u8) & 0x7f;
6432 size >>= 7;
6433 if size != 0 {
6434 byte |= 0x80;
6435 }
6436 out.push(byte);
6437 }
6438 }
6439
6440 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
6441 assert!(relative < 0x80);
6442 out.push(relative as u8);
6443 }
6444
6445 fn single_entry_index(
6446 format: ObjectFormat,
6447 oid: ObjectId,
6448 crc32: u32,
6449 offset: u32,
6450 pack_checksum: ObjectId,
6451 ) -> Vec<u8> {
6452 let mut index = Vec::new();
6453 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
6454 index.extend_from_slice(&2u32.to_be_bytes());
6455 for idx in 0..256 {
6456 let count = if idx >= usize::from(oid.as_bytes()[0]) {
6457 1u32
6458 } else {
6459 0u32
6460 };
6461 index.extend_from_slice(&count.to_be_bytes());
6462 }
6463 index.extend_from_slice(oid.as_bytes());
6464 index.extend_from_slice(&crc32.to_be_bytes());
6465 index.extend_from_slice(&offset.to_be_bytes());
6466 index.extend_from_slice(pack_checksum.as_bytes());
6467 let checksum =
6468 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
6469 index.extend_from_slice(checksum.as_bytes());
6470 index
6471 }
6472
6473 fn single_entry_index_v1(
6474 format: ObjectFormat,
6475 oid: ObjectId,
6476 offset: u32,
6477 pack_checksum: ObjectId,
6478 ) -> Vec<u8> {
6479 let mut index = Vec::new();
6480 for idx in 0..256 {
6481 let count = if idx >= usize::from(oid.as_bytes()[0]) {
6482 1u32
6483 } else {
6484 0u32
6485 };
6486 index.extend_from_slice(&count.to_be_bytes());
6487 }
6488 index.extend_from_slice(&offset.to_be_bytes());
6489 index.extend_from_slice(oid.as_bytes());
6490 index.extend_from_slice(pack_checksum.as_bytes());
6491 let checksum =
6492 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
6493 index.extend_from_slice(checksum.as_bytes());
6494 index
6495 }
6496
6497 fn pack_reverse_index(
6498 format: ObjectFormat,
6499 positions: &[u32],
6500 pack_checksum: ObjectId,
6501 ) -> Vec<u8> {
6502 let mut reverse_index = Vec::new();
6503 reverse_index.extend_from_slice(b"RIDX");
6504 reverse_index.extend_from_slice(&1u32.to_be_bytes());
6505 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
6506 for position in positions {
6507 reverse_index.extend_from_slice(&position.to_be_bytes());
6508 }
6509 reverse_index.extend_from_slice(pack_checksum.as_bytes());
6510 let checksum =
6511 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
6512 reverse_index.extend_from_slice(checksum.as_bytes());
6513 reverse_index
6514 }
6515
6516 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
6517 let mut out = Vec::new();
6518 out.extend_from_slice(b"MTME");
6519 out.extend_from_slice(&1u32.to_be_bytes());
6520 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
6521 for mtime in mtimes {
6522 out.extend_from_slice(&mtime.to_be_bytes());
6523 }
6524 out.extend_from_slice(pack_checksum.as_bytes());
6525 let checksum =
6526 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
6527 out.extend_from_slice(checksum.as_bytes());
6528 out
6529 }
6530
6531 fn midx_chunks_with_pack_names(
6532 _format: ObjectFormat,
6533 pack_names: Vec<u8>,
6534 entries: &[(ObjectId, u32, u64)],
6535 ) -> Vec<([u8; 4], Vec<u8>)> {
6536 let mut entries = entries.to_vec();
6537 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
6538 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
6539 let mut large_offsets = Vec::new();
6540 let mut chunks = vec![
6541 (*b"PNAM", pack_names),
6542 (*b"OIDF", midx_oid_fanout(&object_ids)),
6543 (*b"OIDL", midx_oid_lookup(&object_ids)),
6544 (
6545 *b"OOFF",
6546 midx_ooff_entries(
6547 &entries
6548 .iter()
6549 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
6550 .collect::<Vec<_>>(),
6551 &mut large_offsets,
6552 ),
6553 ),
6554 ];
6555 if !large_offsets.is_empty() {
6556 chunks.push((*b"LOFF", large_offsets));
6557 }
6558 chunks
6559 }
6560
6561 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
6562 let mut counts = [0u32; 256];
6563 for oid in object_ids {
6564 counts[oid.as_bytes()[0] as usize] += 1;
6565 }
6566 let mut running = 0u32;
6567 let mut out = Vec::new();
6568 for count in counts {
6569 running += count;
6570 out.extend_from_slice(&running.to_be_bytes());
6571 }
6572 out
6573 }
6574
6575 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
6576 let mut out = Vec::new();
6577 for oid in object_ids {
6578 out.extend_from_slice(oid.as_bytes());
6579 }
6580 out
6581 }
6582
6583 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
6584 let mut out = Vec::new();
6585 for (pack_int_id, offset) in entries {
6586 out.extend_from_slice(&pack_int_id.to_be_bytes());
6587 if *offset < 0x8000_0000 {
6588 out.extend_from_slice(&(*offset as u32).to_be_bytes());
6589 } else {
6590 let large_idx = (large_offsets.len() / 8) as u32;
6591 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
6592 large_offsets.extend_from_slice(&offset.to_be_bytes());
6593 }
6594 }
6595 out
6596 }
6597
6598 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
6599 let mut out = Vec::new();
6600 for value in values {
6601 out.extend_from_slice(&value.to_be_bytes());
6602 }
6603 out
6604 }
6605
6606 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
6607 let mut out = Vec::new();
6608 for (bitmap_pos, bitmap_nr) in entries {
6609 out.extend_from_slice(&bitmap_pos.to_be_bytes());
6610 out.extend_from_slice(&bitmap_nr.to_be_bytes());
6611 }
6612 out
6613 }
6614
6615 fn multi_pack_index(
6616 format: ObjectFormat,
6617 version: u8,
6618 pack_count: u32,
6619 chunks: &[([u8; 4], Vec<u8>)],
6620 ) -> Vec<u8> {
6621 let lookup_len = (chunks.len() + 1) * 12;
6622 let mut out = Vec::new();
6623 out.extend_from_slice(b"MIDX");
6624 out.push(version);
6625 out.push(hash_function_id(format) as u8);
6626 out.push(chunks.len() as u8);
6627 out.push(0);
6628 out.extend_from_slice(&pack_count.to_be_bytes());
6629 let mut chunk_offset = (12 + lookup_len) as u64;
6630 for (id, data) in chunks {
6631 out.extend_from_slice(id);
6632 out.extend_from_slice(&chunk_offset.to_be_bytes());
6633 chunk_offset += data.len() as u64;
6634 }
6635 out.extend_from_slice(&[0, 0, 0, 0]);
6636 out.extend_from_slice(&chunk_offset.to_be_bytes());
6637 for (_id, data) in chunks {
6638 out.extend_from_slice(data);
6639 }
6640 let checksum =
6641 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
6642 out.extend_from_slice(checksum.as_bytes());
6643 out
6644 }
6645
6646 fn pack_checksum_sha1() -> ObjectId {
6649 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
6650 }
6651
6652 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
6653 let mut offset = 0usize;
6656 let checksum_offset = bytes.len();
6657 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
6658 .expect("test operation should succeed")
6659 }
6660
6661 #[test]
6662 fn ewah_encodes_single_literal_word_matching_helper() {
6663 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
6667 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
6668 assert_eq!(ewah.rlw_position, 0);
6669 assert_eq!(ewah.bit_size, 64);
6670 }
6671
6672 #[test]
6673 fn ewah_byte_layout_is_big_endian() {
6674 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
6675 .expect("test operation should succeed");
6676 let bytes = ewah.to_bytes();
6677 let mut expected = Vec::new();
6678 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
6682 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
6684 }
6685
6686 #[test]
6687 fn ewah_empty_bitmap_serialises_like_git() {
6688 let ewah = EwahBitmap::empty();
6689 let bytes = ewah.to_bytes();
6690 assert_eq!(bytes, vec![0u8; 12]);
6692 let parsed = parse_ewah_bytes(&bytes);
6694 assert_eq!(parsed, ewah);
6695 assert!(
6696 parsed
6697 .to_positions()
6698 .expect("test operation should succeed")
6699 .is_empty()
6700 );
6701 }
6702
6703 #[test]
6704 fn ewah_compresses_clean_zero_run() {
6705 let ewah =
6708 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
6709 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
6710 let rlw = ewah.words[0];
6711 assert_eq!(rlw & 1, 0, "run bit should be zero");
6712 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
6713 assert_eq!(rlw >> 33, 1, "literal length should be 1");
6714 assert_eq!(ewah.words[1], 0b1);
6715 }
6716
6717 #[test]
6718 fn ewah_compresses_clean_ones_run() {
6719 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
6720 .expect("test operation should succeed");
6721 assert_eq!(ewah.words.len(), 1);
6723 let rlw = ewah.words[0];
6724 assert_eq!(rlw & 1, 1, "run bit should be one");
6725 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
6726 assert_eq!(rlw >> 33, 0, "no literals");
6727 }
6728
6729 #[test]
6730 fn ewah_run_then_literal_then_run_roundtrips() {
6731 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
6732 let bit_size = (words.len() * 64) as u32;
6733 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
6734 assert_eq!(
6735 ewah.to_words().expect("test operation should succeed"),
6736 words
6737 );
6738 }
6739
6740 #[test]
6741 fn ewah_drops_trailing_clean_zero_words() {
6742 let words = vec![0b1, 0, 0, 0];
6745 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
6746 assert_eq!(ewah.bit_size, 1);
6748 assert_eq!(
6749 ewah.to_words().expect("test operation should succeed"),
6750 vec![0b1]
6751 );
6752 }
6753
6754 #[test]
6755 fn ewah_from_positions_roundtrips_via_positions() {
6756 let positions = [0u32, 1, 63, 64, 65, 200, 511];
6757 let ewah =
6758 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
6759 let mut decoded = ewah.to_positions().expect("test operation should succeed");
6760 decoded.sort_unstable();
6761 assert_eq!(decoded, positions);
6762 }
6763
6764 #[test]
6765 fn ewah_from_positions_dedupes_and_orders() {
6766 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
6767 .expect("test operation should succeed");
6768 assert_eq!(
6769 ewah.to_positions().expect("test operation should succeed"),
6770 vec![5, 100]
6771 );
6772 }
6773
6774 #[test]
6775 fn ewah_huge_zero_run_spans_multiple_rlws() {
6776 let mut builder = EwahBuilder::new(0);
6781 builder.add_empty_words(false, 0xffff_ffff);
6782 builder.add_empty_words(false, 5);
6783 let ewah = builder.finish().expect("test operation should succeed");
6784 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
6785 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
6786 assert_eq!(ewah.words[1] & 1, 0);
6787 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
6788 assert_eq!(ewah.rlw_position, 1);
6789 }
6790
6791 #[test]
6792 fn ewah_from_words_rejects_oversized_bit_size() {
6793 assert!(EwahBitmap::from_words(65, &[0]).is_err());
6795 }
6796
6797 #[test]
6798 fn ewah_from_positions_rejects_out_of_range() {
6799 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
6800 }
6801
6802 #[test]
6803 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
6804 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
6807 let bit_size = (words.len() * 64) as u32;
6808 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
6809 let bytes = ewah.to_bytes();
6810 let parsed = parse_ewah_bytes(&bytes);
6811 assert_eq!(parsed, ewah);
6812 assert_eq!(
6813 parsed.to_words().expect("test operation should succeed"),
6814 words
6815 );
6816 }
6817
6818 #[test]
6819 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
6820 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
6822 let bytes = write_bitmap(
6823 ObjectFormat::Sha1,
6824 pack_checksum_sha1(),
6825 &object_types,
6826 &[(0u32, 0u32, vec![1u32, 2u32])],
6827 None,
6828 )
6829 .expect("test operation should succeed");
6830 assert_eq!(&bytes[..4], b"BITM");
6831
6832 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
6833 .expect("test operation should succeed");
6834 assert_eq!(parsed.version, 1);
6835 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
6836 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
6837 assert_eq!(
6838 parsed
6839 .type_bitmaps
6840 .commits
6841 .to_positions()
6842 .expect("test operation should succeed"),
6843 vec![0]
6844 );
6845 assert_eq!(
6846 parsed
6847 .type_bitmaps
6848 .trees
6849 .to_positions()
6850 .expect("test operation should succeed"),
6851 vec![1]
6852 );
6853 assert_eq!(
6854 parsed
6855 .type_bitmaps
6856 .blobs
6857 .to_positions()
6858 .expect("test operation should succeed"),
6859 vec![2]
6860 );
6861 assert!(
6862 parsed
6863 .type_bitmaps
6864 .tags
6865 .to_positions()
6866 .expect("test operation should succeed")
6867 .is_empty()
6868 );
6869 assert_eq!(parsed.entries.len(), 1);
6870 let entry = parsed
6871 .entry_for_index_position(0)
6872 .expect("test operation should succeed");
6873 assert_eq!(entry.xor_offset, 0);
6874 assert_eq!(entry.flags, 0);
6875 assert_eq!(
6876 entry
6877 .bitmap
6878 .to_positions()
6879 .expect("test operation should succeed"),
6880 vec![0, 1, 2]
6881 );
6882 assert_eq!(parsed.name_hash_cache, None);
6883 }
6884
6885 #[test]
6886 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
6887 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6888 .expect("test operation should succeed");
6889 let object_types = [ObjectType::Commit, ObjectType::Tree];
6890 let bytes = write_bitmap(
6891 ObjectFormat::Sha256,
6892 pack_checksum.clone(),
6893 &object_types,
6894 &[(0u32, 0u32, vec![1u32])],
6895 None,
6896 )
6897 .expect("test operation should succeed");
6898 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
6899 .expect("test operation should succeed");
6900 assert_eq!(parsed.format, ObjectFormat::Sha256);
6901 assert_eq!(parsed.pack_checksum, pack_checksum);
6902 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6903 assert_eq!(
6904 parsed.entries[0]
6905 .bitmap
6906 .to_positions()
6907 .expect("test operation should succeed"),
6908 vec![0, 1]
6909 );
6910 }
6911
6912 #[test]
6913 fn pack_bitmap_index_write_includes_name_hash_cache() {
6914 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
6915 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
6916 let bytes = write_bitmap(
6917 ObjectFormat::Sha1,
6918 pack_checksum_sha1(),
6919 &object_types,
6920 &[(0u32, 0u32, vec![1u32, 2u32])],
6921 Some(cache.clone()),
6922 )
6923 .expect("test operation should succeed");
6924 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
6925 .expect("test operation should succeed");
6926 assert_eq!(
6927 parsed.options,
6928 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6929 );
6930 assert_eq!(parsed.name_hash_cache, Some(cache));
6931 }
6932
6933 #[test]
6934 fn pack_bitmap_writer_supports_multiple_commits() {
6935 let object_types = [
6936 ObjectType::Commit,
6937 ObjectType::Commit,
6938 ObjectType::Tree,
6939 ObjectType::Blob,
6940 ];
6941 let mut writer =
6942 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
6943 .expect("test operation should succeed");
6944 writer
6945 .add_commit(0, 0, &[2, 3])
6946 .expect("test operation should succeed");
6947 writer
6948 .add_commit(1, 1, &[2])
6949 .expect("test operation should succeed");
6950 let bytes = writer.write().expect("test operation should succeed");
6951 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
6952 .expect("test operation should succeed");
6953 assert_eq!(parsed.entries.len(), 2);
6954 assert_eq!(
6955 parsed
6956 .type_bitmaps
6957 .commits
6958 .to_positions()
6959 .expect("test operation should succeed"),
6960 vec![0, 1]
6961 );
6962 let first = parsed
6963 .entry_for_index_position(0)
6964 .expect("test operation should succeed");
6965 assert_eq!(
6966 first
6967 .bitmap
6968 .to_positions()
6969 .expect("test operation should succeed"),
6970 vec![0, 2, 3]
6971 );
6972 let second = parsed
6973 .entry_for_index_position(1)
6974 .expect("test operation should succeed");
6975 assert_eq!(
6976 second
6977 .bitmap
6978 .to_positions()
6979 .expect("test operation should succeed"),
6980 vec![1, 2]
6981 );
6982 }
6983
6984 #[test]
6985 fn pack_bitmap_index_recomputes_checksum_on_write() {
6986 let object_types = [ObjectType::Commit, ObjectType::Blob];
6989 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
6990 .expect("test operation should succeed");
6991 let mut index = writer.build().expect("test operation should succeed");
6992 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
6994 index.entries.clear(); index.entries.push(PackBitmapEntry {
6996 object_position: 0,
6997 xor_offset: 0,
6998 flags: 0,
6999 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
7000 });
7001 let bytes = index.write().expect("test operation should succeed");
7002 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
7004 .expect("test operation should succeed");
7005 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
7006 }
7007
7008 #[test]
7009 fn pack_bitmap_writer_rejects_non_commit_selection() {
7010 let object_types = [ObjectType::Commit, ObjectType::Blob];
7011 let mut writer =
7012 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
7013 .expect("test operation should succeed");
7014 assert!(writer.add_commit(1, 1, &[]).is_err());
7016 assert!(writer.add_commit(5, 5, &[]).is_err());
7018 assert!(writer.add_commit(0, 5, &[]).is_err());
7020 assert!(writer.add_commit(0, 0, &[9]).is_err());
7022 }
7023
7024 #[test]
7025 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
7026 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7027 .expect("test operation should succeed");
7028 assert!(
7029 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
7030 .is_err()
7031 );
7032 }
7033
7034 #[test]
7035 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
7036 let writer = PackBitmapWriter::new(
7037 ObjectFormat::Sha1,
7038 pack_checksum_sha1(),
7039 &[ObjectType::Commit],
7040 )
7041 .expect("test operation should succeed");
7042 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
7043 }
7044
7045 #[test]
7046 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
7047 let mut index = PackBitmapWriter::new(
7048 ObjectFormat::Sha1,
7049 pack_checksum_sha1(),
7050 &[ObjectType::Commit],
7051 )
7052 .expect("test operation should succeed")
7053 .build()
7054 .expect("test operation should succeed");
7055 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
7057 assert!(index.write().is_err());
7058 index.options = PackBitmapIndex::OPTION_FULL_DAG;
7060 index.name_hash_cache = Some(vec![0]);
7061 assert!(index.write().is_err());
7062 }
7063
7064 #[test]
7065 fn write_bitmap_roundtrips_through_upstream_git_parser() {
7066 let root = unique_temp_dir("git-pack-bitmap-writer");
7070 fs::create_dir_all(&root).expect("test operation should succeed");
7071 {
7072 run_git_success(&root, &["init", "-q", "-b", "main"]);
7073 run_git_success(
7074 &root,
7075 &[
7076 "-c",
7077 "user.name=Example User",
7078 "-c",
7079 "user.email=example@example.invalid",
7080 "commit",
7081 "--allow-empty",
7082 "-q",
7083 "-m",
7084 "one",
7085 ],
7086 );
7087 run_git_success(&root, &["repack", "-adb"]);
7088 let pack_dir = root.join(".git").join("objects").join("pack");
7089 let idx_path = single_path_with_extension(&pack_dir, "idx");
7090 let index = PackIndex::parse(
7091 &fs::read(idx_path).expect("test operation should succeed"),
7092 ObjectFormat::Sha1,
7093 )
7094 .expect("test operation should succeed");
7095 let pack_path = single_path_with_extension(&pack_dir, "pack");
7097 let pack =
7098 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
7099 .expect("test operation should succeed");
7100 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
7103 offsets.sort_unstable();
7104 let position_of = |offset: u64| -> u32 {
7105 offsets
7106 .iter()
7107 .position(|value| *value == offset)
7108 .expect("test operation should succeed") as u32
7109 };
7110 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
7111 for entry in &index.entries {
7112 let position = position_of(entry.offset) as usize;
7113 if let Some(parsed) = pack
7115 .entries
7116 .iter()
7117 .find(|po| po.entry.offset == entry.offset)
7118 {
7119 object_types[position] = parsed.object.object_type;
7120 }
7121 }
7122 let commit_position = object_types
7124 .iter()
7125 .position(|ty| *ty == ObjectType::Commit)
7126 .expect("test operation should succeed") as u32;
7127 let commit_index_position = index
7129 .entries
7130 .iter()
7131 .position(|entry| position_of(entry.offset) == commit_position)
7132 .expect("test operation should succeed")
7133 as u32;
7134 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
7135 let bytes = write_bitmap(
7136 ObjectFormat::Sha1,
7137 index.pack_checksum.clone(),
7138 &object_types,
7139 &[(commit_position, commit_index_position, reachable)],
7140 None,
7141 )
7142 .expect("test operation should succeed");
7143 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
7144 .expect("test operation should succeed");
7145 assert_eq!(parsed.pack_checksum, index.pack_checksum);
7146 assert_eq!(parsed.entries.len(), 1);
7147 assert_eq!(
7148 parsed.entries[0]
7149 .bitmap
7150 .to_positions()
7151 .expect("test operation should succeed")
7152 .len(),
7153 index.entries.len()
7154 );
7155 };
7156 let _ = fs::remove_dir_all(&root);
7157 }
7158}