1use std::borrow::Cow;
13use std::collections::{HashMap, HashSet};
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19use sha2::{Digest as Sha256Digest, Sha256};
20
21use crate::error::{Error, Result};
22use crate::gitmodules;
23use crate::index::MODE_GITLINK;
24use crate::objects::{parse_commit, parse_tag, parse_tree, HashAlgo, Object, ObjectId, ObjectKind};
25use crate::odb::Odb;
26
27#[derive(Clone)]
29enum PackHasher {
30 Sha1(Sha1),
31 Sha256(Sha256),
32}
33
34impl PackHasher {
35 fn new(algo: HashAlgo) -> Self {
36 match algo {
37 HashAlgo::Sha1 => Self::Sha1(Sha1::new()),
38 HashAlgo::Sha256 => Self::Sha256(Sha256::new()),
39 }
40 }
41
42 fn update(&mut self, data: &[u8]) {
43 match self {
44 Self::Sha1(h) => Digest::update(h, data),
45 Self::Sha256(h) => Sha256Digest::update(h, data),
46 }
47 }
48
49 fn finalize(self) -> Vec<u8> {
50 match self {
51 Self::Sha1(h) => h.finalize().to_vec(),
52 Self::Sha256(h) => h.finalize().to_vec(),
53 }
54 }
55
56 fn len(&self) -> usize {
57 match self {
58 Self::Sha1(_) => 20,
59 Self::Sha256(_) => 32,
60 }
61 }
62}
63
64fn hash_object_with(algo: HashAlgo, kind: ObjectKind, data: &[u8]) -> ObjectId {
67 let header = format!("{kind} {}\0", data.len());
68 let mut h = PackHasher::new(algo);
69 h.update(header.as_bytes());
70 h.update(data);
71 ObjectId::from_bytes(&h.finalize()).expect("digest is a valid OID width")
72}
73
74#[derive(Debug, Default)]
76pub struct UnpackOptions {
77 pub dry_run: bool,
79 pub quiet: bool,
81 pub strict: bool,
83 pub allowed_missing: HashSet<ObjectId>,
85 pub allow_promisor_missing_references: bool,
87 pub max_input_bytes: Option<u64>,
92 pub shallow_boundaries: HashSet<ObjectId>,
98}
99
100struct PendingDelta {
102 offset: usize,
105 base_oid: Option<ObjectId>,
107 base_offset: Option<usize>,
109 delta_data: Vec<u8>,
111}
112
113pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
128 const MAX_RETAIN_BYTES: usize = 1024 * 1024;
132
133 let algo = odb.hash_algo();
134 let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes, algo);
135
136 let sig = rd.read_exact_n(4)?;
138 if sig != b"PACK" {
139 return Err(Error::CorruptObject(
140 "not a pack stream: invalid signature".to_owned(),
141 ));
142 }
143 let version = rd.read_u32_be()?;
144 if version != 2 && version != 3 {
145 return Err(Error::CorruptObject(format!(
146 "unsupported pack version {version}"
147 )));
148 }
149 let nr_objects = rd.read_u32_be()? as usize;
150
151 let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
153 let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
155
156 let mut pending: Vec<PendingDelta> = Vec::new();
157 let mut count = 0usize;
158
159 for _ in 0..nr_objects {
160 let obj_offset = rd.stream_pos();
161 let (type_code, size) = rd.read_type_size()?;
162
163 match type_code {
164 1..=4 => {
165 let kind = type_code_to_kind(type_code)?;
166 let data = rd.decompress(size)?;
167 let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
168 let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
169 by_offset.insert(obj_offset, entry.clone());
170 by_oid.insert(oid, entry);
171 count += 1;
172 }
173 6 => {
174 let neg = rd.read_ofs_neg_offset()?;
176 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
177 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
178 })?;
179 let delta_data = rd.decompress(size)?;
180 pending.push(PendingDelta {
181 offset: obj_offset,
182 base_oid: None,
183 base_offset: Some(base_offset),
184 delta_data,
185 });
186 }
187 7 => {
188 let base_bytes = rd.read_exact_n(algo.len())?;
190 let base_oid = ObjectId::from_bytes(&base_bytes)?;
191 let delta_data = rd.decompress(size)?;
192 pending.push(PendingDelta {
193 offset: obj_offset,
194 base_oid: Some(base_oid),
195 base_offset: None,
196 delta_data,
197 });
198 }
199 other => {
200 return Err(Error::CorruptObject(format!(
201 "unknown packed-object type {other}"
202 )))
203 }
204 }
205 }
206
207 let digest = rd.finalize_hasher();
209 let trailing = rd.read_trailer()?;
210 if digest != trailing {
211 return Err(Error::CorruptObject(
212 "pack trailing checksum mismatch".to_owned(),
213 ));
214 }
215
216 let mut remaining = pending;
219 loop {
220 if remaining.is_empty() {
221 break;
222 }
223 let before = remaining.len();
224 let mut still_pending: Vec<PendingDelta> = Vec::new();
225
226 for delta in remaining {
227 let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
228 if let Some(base_off) = delta.base_offset {
229 by_offset
230 .get(&base_off)
231 .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
232 } else if let Some(ref base_id) = delta.base_oid {
233 if let Some(e) = by_oid.get(base_id) {
234 Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
235 } else if !opts.dry_run {
236 odb.read(base_id)
237 .ok()
238 .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
239 } else {
240 None
241 }
242 } else {
243 None
244 };
245
246 match base_res {
247 Some(Ok((base_kind, base_data))) => {
248 let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
249 let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
250 let new_entry = packed_entry_after_write(
251 base_kind,
252 result,
253 oid,
254 odb,
255 opts,
256 MAX_RETAIN_BYTES,
257 );
258 by_offset.insert(delta.offset, new_entry.clone());
259 by_oid.insert(oid, new_entry);
260 count += 1;
261 }
262 Some(Err(e)) => return Err(e),
263 None => still_pending.push(delta),
264 }
265 }
266
267 remaining = still_pending;
268 if remaining.len() == before {
269 return Err(Error::CorruptObject(format!(
270 "{} delta(s) could not be resolved",
271 remaining.len()
272 )));
273 }
274 }
275
276 if opts.strict {
277 let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
278 HashMap::with_capacity(by_oid.len());
279 for (oid, entry) in &by_oid {
280 let kind = entry.kind();
281 let data = match entry {
282 PackedObjectEntry::InMemory { data, .. } => data.clone(),
283 PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
284 };
285 dot_fsck_map.insert(*oid, (kind, data));
286 }
287 gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
288 strict_verify_packed_references_map(
289 Some(odb),
290 &by_oid,
291 &opts.allowed_missing,
292 opts.allow_promisor_missing_references,
293 &opts.shallow_boundaries,
294 )?;
295 }
296
297 Ok(count)
298}
299
300#[derive(Debug, Clone)]
302enum PackedObjectEntry {
303 InMemory { kind: ObjectKind, data: Vec<u8> },
304 BlobOnDisk { oid: ObjectId },
305}
306
307impl PackedObjectEntry {
308 fn kind(&self) -> ObjectKind {
309 match self {
310 PackedObjectEntry::InMemory { kind, .. } => *kind,
311 PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
312 }
313 }
314}
315
316fn packed_entry_after_write(
317 kind: ObjectKind,
318 data: Vec<u8>,
319 oid: ObjectId,
320 _odb: &Odb,
321 opts: &UnpackOptions,
322 max_retain: usize,
323) -> PackedObjectEntry {
324 if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
325 PackedObjectEntry::BlobOnDisk { oid }
326 } else {
327 PackedObjectEntry::InMemory { kind, data }
328 }
329}
330
331fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
332 match entry {
333 PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
334 PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
335 }
336}
337
338fn strict_verify_packed_references_map(
339 odb: Option<&Odb>,
340 pack: &HashMap<ObjectId, PackedObjectEntry>,
341 allowed_missing: &HashSet<ObjectId>,
342 allow_promisor_missing_references: bool,
343 shallow_boundaries: &HashSet<ObjectId>,
344) -> Result<()> {
345 for (oid, entry) in pack {
346 match entry {
347 PackedObjectEntry::BlobOnDisk { .. } => {}
348 PackedObjectEntry::InMemory { kind, data } => match kind {
349 ObjectKind::Tree => {
350 for e in parse_tree(data)? {
351 if e.mode == MODE_GITLINK {
356 continue;
357 }
358 if !strict_ref_resolves_map(
359 &e.oid,
360 pack,
361 odb,
362 allowed_missing,
363 allow_promisor_missing_references,
364 ) {
365 return Err(Error::CorruptObject(format!(
366 "strict: missing object {} referenced by tree",
367 e.oid.to_hex()
368 )));
369 }
370 }
371 }
372 ObjectKind::Commit => {
373 let c = parse_commit(data)?;
374 if !strict_ref_resolves_map(
375 &c.tree,
376 pack,
377 odb,
378 allowed_missing,
379 allow_promisor_missing_references,
380 ) {
381 return Err(Error::CorruptObject(format!(
382 "strict: missing tree {} referenced by commit",
383 c.tree.to_hex()
384 )));
385 }
386 if shallow_boundaries.contains(oid) {
390 continue;
391 }
392 for p in &c.parents {
393 if !strict_ref_resolves_map(
394 p,
395 pack,
396 odb,
397 allowed_missing,
398 allow_promisor_missing_references,
399 ) {
400 return Err(Error::CorruptObject(format!(
401 "strict: missing parent {} referenced by commit",
402 p.to_hex()
403 )));
404 }
405 }
406 }
407 ObjectKind::Tag => {
408 let t = parse_tag(data)?;
409 if !strict_ref_resolves_map(
410 &t.object,
411 pack,
412 odb,
413 allowed_missing,
414 allow_promisor_missing_references,
415 ) {
416 return Err(Error::CorruptObject(format!(
417 "strict: missing object {} referenced by tag",
418 t.object.to_hex()
419 )));
420 }
421 }
422 ObjectKind::Blob => {}
423 },
424 }
425 }
426 Ok(())
427}
428
429fn strict_ref_resolves_map(
430 oid: &ObjectId,
431 pack: &HashMap<ObjectId, PackedObjectEntry>,
432 odb: Option<&Odb>,
433 allowed_missing: &HashSet<ObjectId>,
434 allow_promisor_missing_references: bool,
435) -> bool {
436 pack.contains_key(oid)
437 || allowed_missing.contains(oid)
438 || odb.is_some_and(|o| o.exists(oid))
439 || allow_promisor_missing_references
440}
441
442fn strict_ref_resolves(
443 oid: &ObjectId,
444 pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
445 odb: Option<&Odb>,
446) -> bool {
447 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
448}
449
450pub fn strict_verify_packed_references(
456 odb: Option<&Odb>,
457 pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
458) -> Result<()> {
459 for (kind, data) in pack.values() {
460 match kind {
461 ObjectKind::Tree => {
462 for e in parse_tree(data)? {
463 if e.mode == MODE_GITLINK {
468 continue;
469 }
470 if !strict_ref_resolves(&e.oid, pack, odb) {
471 return Err(Error::CorruptObject(format!(
472 "strict: missing object {} referenced by tree",
473 e.oid.to_hex()
474 )));
475 }
476 }
477 }
478 ObjectKind::Commit => {
479 let c = parse_commit(data)?;
480 if !strict_ref_resolves(&c.tree, pack, odb) {
481 return Err(Error::CorruptObject(format!(
482 "strict: missing tree {} referenced by commit",
483 c.tree.to_hex()
484 )));
485 }
486 for p in &c.parents {
487 if !strict_ref_resolves(p, pack, odb) {
488 return Err(Error::CorruptObject(format!(
489 "strict: missing parent {} referenced by commit",
490 p.to_hex()
491 )));
492 }
493 }
494 }
495 ObjectKind::Tag => {
496 let t = parse_tag(data)?;
497 if !strict_ref_resolves(&t.object, pack, odb) {
498 return Err(Error::CorruptObject(format!(
499 "strict: missing object {} referenced by tag",
500 t.object.to_hex()
501 )));
502 }
503 }
504 ObjectKind::Blob => {}
505 }
506 }
507 Ok(())
508}
509
510pub fn pack_is_thin(data: &[u8], algo: HashAlgo) -> bool {
517 pack_is_thin_inner(data, algo).unwrap_or(false)
518}
519
520fn pack_is_thin_inner(data: &[u8], algo: HashAlgo) -> Result<bool> {
521 let mut rd = PackReader::new(data.to_vec());
522 if rd.read_exact(4)? != b"PACK" {
523 return Ok(false);
524 }
525 let _version = rd.read_u32_be()?;
526 let nr_objects = rd.read_u32_be()? as usize;
527
528 let mut in_pack: HashSet<ObjectId> = HashSet::new();
529 let mut ref_delta_bases: Vec<ObjectId> = Vec::new();
530 for _ in 0..nr_objects {
531 let obj_offset = rd.pos;
532 let (type_code, size) = rd.read_type_size()?;
533 match type_code {
534 1..=4 => {
535 let kind = type_code_to_kind(type_code)?;
536 let obj_data = rd.decompress(size)?;
537 in_pack.insert(hash_object_with(algo, kind, &obj_data));
538 }
539 6 => {
540 let _neg = rd.read_ofs_neg_offset()?;
542 let _ = obj_offset;
543 let _ = rd.decompress(size)?;
544 }
545 7 => {
546 let base_bytes = rd.read_exact(algo.len())?;
547 ref_delta_bases.push(ObjectId::from_bytes(base_bytes)?);
548 let _ = rd.decompress(size)?;
549 }
550 _ => return Ok(false),
551 }
552 }
553 Ok(ref_delta_bases.iter().any(|b| !in_pack.contains(b)))
555}
556
557pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
564 let rd = PackReader::new(data.to_vec());
565 build_pack_object_map(rd, odb)
566}
567
568fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
569 let algo = odb.hash_algo();
570 let sig = rd.read_exact(4)?;
571 if sig != b"PACK" {
572 return Err(Error::CorruptObject(
573 "not a pack stream: invalid signature".to_owned(),
574 ));
575 }
576 let version = rd.read_u32_be()?;
577 if version != 2 && version != 3 {
578 return Err(Error::CorruptObject(format!(
579 "unsupported pack version {version}"
580 )));
581 }
582 let nr_objects = rd.read_u32_be()? as usize;
583
584 let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
585 let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
586 let mut pending: Vec<PendingDelta> = Vec::new();
587
588 fn base_from_pack_or_odb(
589 by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
590 odb: &Odb,
591 id: &ObjectId,
592 ) -> Option<(ObjectKind, Vec<u8>)> {
593 if let Some(e) = by_oid.get(id) {
594 return Some(e.clone());
595 }
596 odb.read(id).ok().map(|o| (o.kind, o.data))
597 }
598
599 for _ in 0..nr_objects {
600 let obj_offset = rd.pos;
601 let (type_code, size) = rd.read_type_size()?;
602
603 match type_code {
604 1..=4 => {
605 let kind = type_code_to_kind(type_code)?;
606 let data = rd.decompress(size)?;
607 let oid = odb.hash(kind, &data);
608 by_offset.insert(obj_offset, (kind, data.clone()));
609 by_oid.insert(oid, (kind, data));
610 }
611 6 => {
612 let neg = rd.read_ofs_neg_offset()?;
613 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
614 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
615 })?;
616 let delta_data = rd.decompress(size)?;
617 pending.push(PendingDelta {
618 offset: obj_offset,
619 base_oid: None,
620 base_offset: Some(base_offset),
621 delta_data,
622 });
623 }
624 7 => {
625 let base_bytes = rd.read_exact(algo.len())?;
626 let base_oid = ObjectId::from_bytes(base_bytes)?;
627 let delta_data = rd.decompress(size)?;
628 pending.push(PendingDelta {
629 offset: obj_offset,
630 base_oid: Some(base_oid),
631 base_offset: None,
632 delta_data,
633 });
634 }
635 other => {
636 return Err(Error::CorruptObject(format!(
637 "unknown packed-object type {other}"
638 )))
639 }
640 }
641 }
642
643 let consumed = rd.pos;
644 {
645 let mut hasher = PackHasher::new(algo);
646 hasher.update(&rd.data[..consumed]);
647 let digest = hasher.finalize();
648 let trailing = rd.read_exact(algo.len())?;
649 if digest.as_slice() != trailing {
650 return Err(Error::CorruptObject(
651 "pack trailing checksum mismatch".to_owned(),
652 ));
653 }
654 }
655
656 let mut remaining = pending;
657 loop {
658 if remaining.is_empty() {
659 break;
660 }
661 let before = remaining.len();
662 let mut still_pending: Vec<PendingDelta> = Vec::new();
663
664 for delta in remaining {
665 let base = if let Some(base_off) = delta.base_offset {
666 by_offset.get(&base_off).cloned()
667 } else if let Some(ref base_id) = delta.base_oid {
668 base_from_pack_or_odb(&by_oid, odb, base_id)
669 } else {
670 None
671 };
672
673 if let Some((base_kind, base_data)) = base {
674 let result = apply_delta(&base_data, &delta.delta_data)?;
675 let oid = odb.hash(base_kind, &result);
676 by_offset.insert(delta.offset, (base_kind, result.clone()));
677 by_oid.insert(oid, (base_kind, result));
678 } else {
679 still_pending.push(delta);
680 }
681 }
682
683 remaining = still_pending;
684 if remaining.len() == before {
685 return Err(Error::CorruptObject(format!(
686 "{} delta(s) could not be resolved",
687 remaining.len()
688 )));
689 }
690 }
691
692 Ok(by_oid
693 .into_iter()
694 .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
695 .collect())
696}
697
698fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
701 if dry_run {
702 Ok(odb.hash(kind, data))
703 } else {
704 odb.write_local(kind, data)
707 }
708}
709
710fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
712 match code {
713 1 => Ok(ObjectKind::Commit),
714 2 => Ok(ObjectKind::Tree),
715 3 => Ok(ObjectKind::Blob),
716 4 => Ok(ObjectKind::Tag),
717 _ => Err(Error::CorruptObject(format!(
718 "type code {code} is not a regular object type"
719 ))),
720 }
721}
722
723struct PackReader {
725 data: Vec<u8>,
726 pos: usize,
727}
728
729impl PackReader {
730 fn new(data: Vec<u8>) -> Self {
731 Self { data, pos: 0 }
732 }
733
734 fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
737 if self.pos + n > self.data.len() {
738 return Err(Error::CorruptObject(format!(
739 "pack stream truncated: need {n} bytes at offset {}",
740 self.pos
741 )));
742 }
743 let slice = &self.data[self.pos..self.pos + n];
744 self.pos += n;
745 Ok(slice)
746 }
747
748 fn read_byte(&mut self) -> Result<u8> {
750 if self.pos >= self.data.len() {
751 return Err(Error::CorruptObject(
752 "unexpected end of pack stream".to_owned(),
753 ));
754 }
755 let b = self.data[self.pos];
756 self.pos += 1;
757 Ok(b)
758 }
759
760 fn read_u32_be(&mut self) -> Result<u32> {
762 let bytes = self.read_exact(4)?;
763 Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
764 Error::CorruptObject("u32 read failed".to_owned())
765 })?))
766 }
767
768 fn read_type_size(&mut self) -> Result<(u8, usize)> {
773 let c = self.read_byte()?;
774 let type_code = (c >> 4) & 0x7;
775 let mut size = (c & 0x0f) as usize;
776 let mut shift = 4u32;
777 let mut cur = c;
778 while cur & 0x80 != 0 {
779 cur = self.read_byte()?;
780 size |= ((cur & 0x7f) as usize) << shift;
781 shift += 7;
782 }
783 Ok((type_code, size))
784 }
785
786 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
791 let mut c = self.read_byte()?;
792 let mut value = (c & 0x7f) as usize;
793 while c & 0x80 != 0 {
794 c = self.read_byte()?;
795 value = (value + 1) << 7 | (c & 0x7f) as usize;
796 }
797 Ok(value)
798 }
799
800 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
805 let slice = &self.data[self.pos..];
806 let mut decoder = ZlibDecoder::new(slice);
807 let mut out = Vec::with_capacity(expected_size);
808 decoder
809 .read_to_end(&mut out)
810 .map_err(|e| Error::Zlib(e.to_string()))?;
811 if out.len() != expected_size {
812 return Err(Error::CorruptObject(format!(
813 "decompressed {} bytes but expected {}",
814 out.len(),
815 expected_size
816 )));
817 }
818 self.pos += decoder.total_in() as usize;
819 Ok(out)
820 }
821}
822
823fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
824 if e.kind() == io::ErrorKind::UnexpectedEof {
825 Error::CorruptObject(format!(
826 "pack stream truncated ({context}) at offset {stream_pos}"
827 ))
828 } else {
829 Error::Io(e)
830 }
831}
832
833struct StreamingPackReader<'a> {
839 inner: &'a mut dyn Read,
840 pack_hasher: PackHasher,
841 stream_pos: usize,
842 max_input_bytes: Option<u64>,
843 pending: Vec<u8>,
846}
847
848impl<'a> StreamingPackReader<'a> {
849 fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>, algo: HashAlgo) -> Self {
850 Self {
851 inner,
852 pack_hasher: PackHasher::new(algo),
853 stream_pos: 0,
854 max_input_bytes,
855 pending: Vec::new(),
856 }
857 }
858
859 fn stream_pos(&self) -> usize {
860 self.stream_pos
861 }
862
863 fn enforce_max_input(&self) -> Result<()> {
864 if let Some(limit) = self.max_input_bytes {
865 let pos = u64::try_from(self.stream_pos)
866 .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
867 if pos > limit {
868 return Err(Error::CorruptObject(
869 "pack exceeds maximum allowed size".to_owned(),
870 ));
871 }
872 }
873 Ok(())
874 }
875
876 fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
878 let n = if !self.pending.is_empty() {
879 let take = buf.len().min(self.pending.len());
880 buf[..take].copy_from_slice(&self.pending[..take]);
881 self.pending.drain(..take);
882 take
883 } else {
884 self.inner.read(buf).map_err(Error::Io)?
885 };
886 if n > 0 {
887 self.pack_hasher.update(&buf[..n]);
888 self.stream_pos += n;
889 self.enforce_max_input()?;
890 }
891 Ok(n)
892 }
893
894 fn read_byte(&mut self) -> Result<u8> {
895 let mut b = [0u8; 1];
896 let n = self.read_from_source(&mut b)?;
897 if n == 0 {
898 return Err(Error::CorruptObject(format!(
899 "pack stream truncated (read byte) at offset {}",
900 self.stream_pos
901 )));
902 }
903 Ok(b[0])
904 }
905
906 fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
907 let mut v = vec![0u8; n];
908 let mut got = 0usize;
909 while got < n {
910 let m = self.read_from_source(&mut v[got..n])?;
911 if m == 0 {
912 return Err(Error::CorruptObject(format!(
913 "pack stream truncated (read exact) at offset {}",
914 self.stream_pos
915 )));
916 }
917 got += m;
918 }
919 Ok(v)
920 }
921
922 fn read_u32_be(&mut self) -> Result<u32> {
923 let mut b = [0u8; 4];
924 let mut got = 0usize;
925 while got < 4 {
926 let m = self.read_from_source(&mut b[got..4])?;
927 if m == 0 {
928 return Err(Error::CorruptObject(format!(
929 "pack stream truncated (read u32) at offset {}",
930 self.stream_pos
931 )));
932 }
933 got += m;
934 }
935 Ok(u32::from_be_bytes(b))
936 }
937
938 fn read_type_size(&mut self) -> Result<(u8, usize)> {
939 let c = self.read_byte()?;
940 let type_code = (c >> 4) & 0x7;
941 let mut size = (c & 0x0f) as usize;
942 let mut shift = 4u32;
943 let mut cur = c;
944 while cur & 0x80 != 0 {
945 cur = self.read_byte()?;
946 size |= ((cur & 0x7f) as usize) << shift;
947 shift += 7;
948 }
949 Ok((type_code, size))
950 }
951
952 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
953 let mut c = self.read_byte()?;
954 let mut value = (c & 0x7f) as usize;
955 while c & 0x80 != 0 {
956 c = self.read_byte()?;
957 value = (value + 1) << 7 | (c & 0x7f) as usize;
958 }
959 Ok(value)
960 }
961
962 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
972 if expected_size == 0 {
976 const CHUNK: usize = 64 * 1024;
977 let mut scratch = [0u8; CHUNK];
978 loop {
979 let mut cursor = std::io::Cursor::new(self.pending.as_slice());
980 let mut z = ZlibDecoder::new(&mut cursor);
981 let mut sink = [0u8; 1];
982 match z.read(&mut sink) {
983 Ok(0) => {
984 let consumed = z.total_in() as usize;
985 if consumed > self.pending.len() {
986 return Err(Error::CorruptObject(
987 "zlib total_in exceeds pending buffer".to_owned(),
988 ));
989 }
990 if consumed == 0 {
991 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
992 if n == 0 {
993 return Err(Error::CorruptObject(format!(
994 "pack stream truncated (zlib) at offset {}",
995 self.stream_pos
996 )));
997 }
998 self.pending.extend_from_slice(&scratch[..n]);
999 continue;
1000 }
1001 self.pack_hasher.update(&self.pending[..consumed]);
1002 self.stream_pos += consumed;
1003 self.pending.drain(..consumed);
1004 self.enforce_max_input()?;
1005 return Ok(Vec::new());
1006 }
1007 Ok(_) => {
1008 return Err(Error::CorruptObject(
1009 "0-byte packed object inflated to non-empty output".to_owned(),
1010 ));
1011 }
1012 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
1013 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
1014 if n == 0 {
1015 return Err(Error::CorruptObject(format!(
1016 "pack stream truncated (zlib) at offset {}",
1017 self.stream_pos
1018 )));
1019 }
1020 self.pending.extend_from_slice(&scratch[..n]);
1021 }
1022 Err(e) => return Err(Error::Zlib(e.to_string())),
1023 }
1024 }
1025 }
1026
1027 const CHUNK: usize = 64 * 1024;
1028 let mut scratch = [0u8; CHUNK];
1029
1030 let mut out = vec![0u8; expected_size];
1031 let mut z = Decompress::new(true);
1032 let mut out_pos = 0usize;
1033 let mut eof = false;
1034 loop {
1035 if self.pending.is_empty() && !eof {
1036 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
1037 if n == 0 {
1038 eof = true;
1039 } else {
1040 self.pending.extend_from_slice(&scratch[..n]);
1041 }
1042 }
1043
1044 let flush = if eof && self.pending.is_empty() {
1045 FlushDecompress::Finish
1046 } else {
1047 FlushDecompress::None
1048 };
1049
1050 let before_in = z.total_in();
1051 let before_out = z.total_out();
1052 let status = z
1053 .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
1054 .map_err(|e| Error::Zlib(e.to_string()))?;
1055 let consumed = (z.total_in() - before_in) as usize;
1056 if consumed > self.pending.len() {
1057 return Err(Error::CorruptObject(
1058 "zlib consumed more than pending buffer".to_owned(),
1059 ));
1060 }
1061 self.pack_hasher.update(&self.pending[..consumed]);
1062 self.stream_pos += consumed;
1063 self.pending.drain(..consumed);
1064 self.enforce_max_input()?;
1065 out_pos += (z.total_out() - before_out) as usize;
1066
1067 match status {
1068 Status::StreamEnd => {
1069 if out_pos != expected_size {
1070 return Err(Error::CorruptObject(format!(
1071 "decompressed size mismatch: got {out_pos}, want {expected_size}"
1072 )));
1073 }
1074 return Ok(out);
1075 }
1076 Status::Ok | Status::BufError => {
1077 if consumed == 0 && !eof {
1078 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
1079 if n == 0 {
1080 eof = true;
1081 } else {
1082 self.pending.extend_from_slice(&scratch[..n]);
1083 }
1084 } else if eof && self.pending.is_empty() && out_pos != expected_size {
1085 return Err(Error::CorruptObject(format!(
1086 "pack stream truncated (zlib) at offset {}",
1087 self.stream_pos
1088 )));
1089 }
1090 }
1091 }
1092 }
1093 }
1094
1095 fn finalize_hasher(&self) -> Vec<u8> {
1097 self.pack_hasher.clone().finalize()
1098 }
1099
1100 fn read_trailer(&mut self) -> Result<Vec<u8>> {
1102 let hash_len = self.pack_hasher.len();
1103 let mut b = vec![0u8; hash_len];
1104 if self.pending.len() >= hash_len {
1105 b.copy_from_slice(&self.pending[..hash_len]);
1106 self.pending.drain(..hash_len);
1107 self.stream_pos += hash_len;
1108 self.enforce_max_input()?;
1109 return Ok(b);
1110 }
1111 let tail = self.pending.len();
1112 if tail > 0 {
1113 b[..tail].copy_from_slice(&self.pending[..]);
1114 self.pending.clear();
1115 }
1116 self.inner
1117 .read_exact(&mut b[tail..])
1118 .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
1119 self.stream_pos += hash_len;
1120 self.enforce_max_input()?;
1121 Ok(b)
1122 }
1123}
1124
1125pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
1139 let mut pos = 0usize;
1140
1141 let src_size = read_delta_varint(delta, &mut pos)?;
1142 if src_size != base.len() {
1143 return Err(Error::CorruptObject(format!(
1144 "delta source size {src_size} != base size {}",
1145 base.len()
1146 )));
1147 }
1148 let dest_size = read_delta_varint(delta, &mut pos)?;
1149 let mut result = Vec::with_capacity(dest_size);
1150
1151 while pos < delta.len() {
1152 let cmd = delta[pos];
1153 pos += 1;
1154 if cmd == 0 {
1155 return Err(Error::CorruptObject(
1156 "reserved opcode 0 in delta stream".to_owned(),
1157 ));
1158 }
1159 if cmd & 0x80 != 0 {
1160 let mut offset = 0usize;
1163 let mut size = 0usize;
1164
1165 macro_rules! maybe_read_byte {
1166 ($flag:expr, $shift:expr, $target:expr) => {
1167 if cmd & $flag != 0 {
1168 let b = *delta.get(pos).ok_or_else(|| {
1169 Error::CorruptObject("truncated delta COPY operand".to_owned())
1170 })?;
1171 pos += 1;
1172 $target |= (b as usize) << $shift;
1173 }
1174 };
1175 }
1176
1177 maybe_read_byte!(0x01, 0, offset);
1178 maybe_read_byte!(0x02, 8, offset);
1179 maybe_read_byte!(0x04, 16, offset);
1180 maybe_read_byte!(0x08, 24, offset);
1181 maybe_read_byte!(0x10, 0, size);
1182 maybe_read_byte!(0x20, 8, size);
1183 maybe_read_byte!(0x40, 16, size);
1184
1185 if size == 0 {
1186 size = 0x10000;
1187 }
1188
1189 let end = offset.checked_add(size).ok_or_else(|| {
1190 Error::CorruptObject("delta COPY range overflows usize".to_owned())
1191 })?;
1192 let chunk = base.get(offset..end).ok_or_else(|| {
1193 Error::CorruptObject(format!(
1194 "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1195 base.len()
1196 ))
1197 })?;
1198 result.extend_from_slice(chunk);
1199 } else {
1200 let n = cmd as usize;
1202 let chunk = delta
1203 .get(pos..pos + n)
1204 .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1205 result.extend_from_slice(chunk);
1206 pos += n;
1207 }
1208 }
1209
1210 if result.len() != dest_size {
1211 return Err(Error::CorruptObject(format!(
1212 "delta produced {} bytes but expected {dest_size}",
1213 result.len()
1214 )));
1215 }
1216
1217 Ok(result)
1218}
1219
1220fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1224 let mut value = 0usize;
1225 let mut shift = 0u32;
1226 loop {
1227 let b = *data
1228 .get(*pos)
1229 .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1230 *pos += 1;
1231 value |= ((b & 0x7f) as usize) << shift;
1232 shift += 7;
1233 if b & 0x80 == 0 {
1234 break;
1235 }
1236 }
1237 Ok(value)
1238}
1239
1240#[cfg(test)]
1241mod tests {
1242 use super::*;
1243
1244 fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1247 use flate2::write::ZlibEncoder;
1248 use std::io::Write;
1249
1250 let mut entries: Vec<Vec<u8>> = Vec::new();
1251 for (kind, data) in objects {
1252 let type_code: u8 = match kind {
1253 ObjectKind::Commit => 1,
1254 ObjectKind::Tree => 2,
1255 ObjectKind::Blob => 3,
1256 ObjectKind::Tag => 4,
1257 };
1258 let mut header = Vec::new();
1260 let mut size = data.len();
1261 let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1262 size >>= 4;
1263 if size > 0 {
1264 header.push(first | 0x80);
1265 while size > 0 {
1266 let b = (size & 0x7f) as u8;
1267 size >>= 7;
1268 header.push(if size > 0 { b | 0x80 } else { b });
1269 }
1270 } else {
1271 header.push(first);
1272 }
1273 let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1275 enc.write_all(data).unwrap();
1276 let compressed = enc.finish().unwrap();
1277 let mut entry = header;
1278 entry.extend_from_slice(&compressed);
1279 entries.push(entry);
1280 }
1281
1282 let mut pack = Vec::new();
1284 pack.extend_from_slice(b"PACK");
1285 pack.extend_from_slice(&2u32.to_be_bytes());
1286 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1287 for entry in &entries {
1288 pack.extend_from_slice(entry);
1289 }
1290 let mut hasher = Sha1::new();
1291 hasher.update(&pack);
1292 let digest = hasher.finalize();
1293 pack.extend_from_slice(digest.as_slice());
1294 pack
1295 }
1296
1297 #[test]
1298 fn test_apply_delta_simple() {
1299 let base = b"hello";
1301 let mut delta = Vec::new();
1302 delta.push(5u8);
1304 delta.push(11u8);
1306 delta.push(0x80 | 0x01 | 0x10); delta.push(0u8); delta.push(5u8); delta.push(6u8);
1313 delta.extend_from_slice(b" world");
1314
1315 let result = apply_delta(base, &delta).unwrap();
1316 assert_eq!(result, b"hello world");
1317 }
1318
1319 #[test]
1320 fn test_apply_delta_insert_only() {
1321 let base = b"";
1322 let mut delta = Vec::new();
1323 delta.push(0u8); delta.push(5u8); delta.push(5u8); delta.extend_from_slice(b"hello");
1327
1328 let result = apply_delta(base, &delta).unwrap();
1329 assert_eq!(result, b"hello");
1330 }
1331
1332 #[test]
1333 fn test_apply_delta_copy_only() {
1334 let base = b"abcdef";
1335 let mut delta = Vec::new();
1336 delta.push(6u8); delta.push(3u8); delta.push(0x91u8);
1341 delta.push(2u8); delta.push(3u8); let result = apply_delta(base, &delta).unwrap();
1345 assert_eq!(result, b"cde");
1346 }
1347
1348 #[test]
1349 fn test_apply_delta_size_zero_means_65536() {
1350 let base = vec![0xABu8; 65536];
1352 let mut delta = Vec::new();
1353 delta.push(0x80 | (65536 & 0x7f) as u8); delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); delta.push(((65536 >> 14) & 0x7f) as u8); delta.push(0x80 | (65536 & 0x7f) as u8);
1359 delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1360 delta.push(((65536 >> 14) & 0x7f) as u8);
1361 delta.push(0x80u8);
1364
1365 let result = apply_delta(&base, &delta).unwrap();
1366 assert_eq!(result.len(), 65536);
1367 assert!(result.iter().all(|&b| b == 0xAB));
1368 }
1369
1370 #[test]
1371 fn test_unpack_objects_blobs() {
1372 use tempfile::TempDir;
1373 let tmp = TempDir::new().unwrap();
1374 let objects_dir = tmp.path().join("objects");
1375 std::fs::create_dir_all(&objects_dir).unwrap();
1376 let odb = Odb::new(&objects_dir);
1377
1378 let pack = make_pack(&[
1379 (ObjectKind::Blob, b"hello\n"),
1380 (ObjectKind::Blob, b"world\n"),
1381 ]);
1382
1383 let opts = UnpackOptions::default();
1384 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1385 assert_eq!(count, 2);
1386
1387 let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1389 let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1390 let obj1 = odb.read(&oid1).unwrap();
1391 let obj2 = odb.read(&oid2).unwrap();
1392 assert_eq!(obj1.data, b"hello\n");
1393 assert_eq!(obj2.data, b"world\n");
1394 }
1395
1396 #[test]
1397 fn test_unpack_objects_empty_tree() {
1398 use tempfile::TempDir;
1399 let tmp = TempDir::new().unwrap();
1400 let objects_dir = tmp.path().join("objects");
1401 std::fs::create_dir_all(&objects_dir).unwrap();
1402 let odb = Odb::new(&objects_dir);
1403
1404 let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1405 let opts = UnpackOptions::default();
1406 assert_eq!(
1407 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1408 1
1409 );
1410 let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1411 assert!(odb.exists(&oid));
1412 let loose = objects_dir
1413 .join(oid.loose_prefix())
1414 .join(oid.loose_suffix());
1415 assert!(
1416 loose.is_file(),
1417 "empty tree must be materialized as a loose object during unpack"
1418 );
1419 }
1420
1421 #[test]
1422 fn test_strict_skips_gitlink_tree_entries() {
1423 use crate::index::{MODE_GITLINK, MODE_REGULAR};
1424 use crate::objects::{serialize_tree, TreeEntry};
1425
1426 let submodule_oid = ObjectId::from_hex(&"7f".repeat(20)).unwrap();
1429
1430 let tree_data = serialize_tree(&[TreeEntry {
1432 mode: MODE_GITLINK,
1433 name: b"sub".to_vec(),
1434 oid: submodule_oid,
1435 }]);
1436 let tree_oid = Odb::hash_object_data(ObjectKind::Tree, &tree_data);
1437
1438 let mut pack = HashMap::new();
1441 pack.insert(tree_oid, (ObjectKind::Tree, tree_data.clone()));
1442 assert!(strict_verify_packed_references(None, &pack).is_ok());
1443
1444 let bad_tree = serialize_tree(&[TreeEntry {
1447 mode: MODE_REGULAR,
1448 name: b"file".to_vec(),
1449 oid: ObjectId::from_hex(&"ab".repeat(20)).unwrap(),
1450 }]);
1451 let bad_oid = Odb::hash_object_data(ObjectKind::Tree, &bad_tree);
1452 let mut bad_pack = HashMap::new();
1453 bad_pack.insert(bad_oid, (ObjectKind::Tree, bad_tree));
1454 assert!(matches!(
1455 strict_verify_packed_references(None, &bad_pack),
1456 Err(Error::CorruptObject(_))
1457 ));
1458 }
1459
1460 struct ChunkedReader<'a> {
1462 data: &'a [u8],
1463 pos: usize,
1464 max_len: usize,
1465 }
1466
1467 impl io::Read for ChunkedReader<'_> {
1468 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1469 if self.pos >= self.data.len() {
1470 return Ok(0);
1471 }
1472 let take = (self.data.len() - self.pos)
1473 .min(self.max_len)
1474 .min(buf.len());
1475 buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1476 self.pos += take;
1477 Ok(take)
1478 }
1479 }
1480
1481 #[test]
1482 fn test_unpack_objects_chunked_read_matches_full_buffer() {
1483 use tempfile::TempDir;
1484 let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1485 let opts = UnpackOptions::default();
1486 let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1487
1488 let tmp = TempDir::new().unwrap();
1489 let objects_dir = tmp.path().join("objects");
1490 std::fs::create_dir_all(&objects_dir).unwrap();
1491 let odb = Odb::new(&objects_dir);
1492 assert_eq!(
1493 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1494 1
1495 );
1496 assert!(odb.exists(&oid));
1497
1498 let tmp2 = TempDir::new().unwrap();
1499 let objects_dir2 = tmp2.path().join("objects");
1500 std::fs::create_dir_all(&objects_dir2).unwrap();
1501 let odb2 = Odb::new(&objects_dir2);
1502 let mut chunked = ChunkedReader {
1503 data: pack.as_slice(),
1504 pos: 0,
1505 max_len: 8,
1506 };
1507 assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1508 assert!(odb2.exists(&oid));
1509 }
1510
1511 #[test]
1512 fn test_unpack_objects_dry_run_writes_nothing() {
1513 use tempfile::TempDir;
1514 let tmp = TempDir::new().unwrap();
1515 let objects_dir = tmp.path().join("objects");
1516 std::fs::create_dir_all(&objects_dir).unwrap();
1517 let odb = Odb::new(&objects_dir);
1518
1519 let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1520
1521 let opts = UnpackOptions {
1522 dry_run: true,
1523 quiet: true,
1524 strict: false,
1525 allowed_missing: Default::default(),
1526 allow_promisor_missing_references: false,
1527 max_input_bytes: None,
1528 ..Default::default()
1529 };
1530 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1531 assert_eq!(count, 1);
1532
1533 let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1535 assert!(!odb.exists(&oid));
1536 }
1537
1538 #[test]
1539 fn test_unpack_objects_bad_signature() {
1540 use tempfile::TempDir;
1541 let tmp = TempDir::new().unwrap();
1542 let objects_dir = tmp.path().join("objects");
1543 std::fs::create_dir_all(&objects_dir).unwrap();
1544 let odb = Odb::new(&objects_dir);
1545
1546 let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1547 bad.extend_from_slice(&[0u8; 20]);
1548 let opts = UnpackOptions::default();
1549 let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1550 assert!(err.to_string().contains("invalid signature"));
1551 }
1552
1553 #[test]
1554 fn test_unpack_objects_checksum_mismatch() {
1555 use tempfile::TempDir;
1556 let tmp = TempDir::new().unwrap();
1557 let objects_dir = tmp.path().join("objects");
1558 std::fs::create_dir_all(&objects_dir).unwrap();
1559 let odb = Odb::new(&objects_dir);
1560
1561 let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1562 let n = pack.len();
1564 pack[n - 1] ^= 0xFF;
1565
1566 let opts = UnpackOptions::default();
1567 let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1568 assert!(err.to_string().contains("checksum"));
1569 }
1570
1571 #[test]
1572 fn test_apply_delta_source_size_mismatch() {
1573 let base = b"hi";
1574 let delta = [3u8, 2u8, 2u8, b'h', b'i']; let err = apply_delta(base, &delta).unwrap_err();
1576 assert!(err.to_string().contains("source size"));
1577 }
1578}