1use std::borrow::Cow;
13use std::collections::{HashMap, HashSet};
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19
20use crate::error::{Error, Result};
21use crate::gitmodules;
22use crate::index::MODE_GITLINK;
23use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
24use crate::odb::Odb;
25
26#[derive(Debug, Default)]
28pub struct UnpackOptions {
29 pub dry_run: bool,
31 pub quiet: bool,
33 pub strict: bool,
35 pub allowed_missing: HashSet<ObjectId>,
37 pub allow_promisor_missing_references: bool,
39 pub max_input_bytes: Option<u64>,
44 pub shallow_boundaries: HashSet<ObjectId>,
50}
51
52struct PendingDelta {
54 offset: usize,
57 base_oid: Option<ObjectId>,
59 base_offset: Option<usize>,
61 delta_data: Vec<u8>,
63}
64
65pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
80 const MAX_RETAIN_BYTES: usize = 1024 * 1024;
84
85 let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes);
86
87 let sig = rd.read_exact_n(4)?;
89 if sig != b"PACK" {
90 return Err(Error::CorruptObject(
91 "not a pack stream: invalid signature".to_owned(),
92 ));
93 }
94 let version = rd.read_u32_be()?;
95 if version != 2 && version != 3 {
96 return Err(Error::CorruptObject(format!(
97 "unsupported pack version {version}"
98 )));
99 }
100 let nr_objects = rd.read_u32_be()? as usize;
101
102 let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
104 let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
106
107 let mut pending: Vec<PendingDelta> = Vec::new();
108 let mut count = 0usize;
109
110 for _ in 0..nr_objects {
111 let obj_offset = rd.stream_pos();
112 let (type_code, size) = rd.read_type_size()?;
113
114 match type_code {
115 1..=4 => {
116 let kind = type_code_to_kind(type_code)?;
117 let data = rd.decompress(size)?;
118 let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
119 let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
120 by_offset.insert(obj_offset, entry.clone());
121 by_oid.insert(oid, entry);
122 count += 1;
123 }
124 6 => {
125 let neg = rd.read_ofs_neg_offset()?;
127 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
128 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
129 })?;
130 let delta_data = rd.decompress(size)?;
131 pending.push(PendingDelta {
132 offset: obj_offset,
133 base_oid: None,
134 base_offset: Some(base_offset),
135 delta_data,
136 });
137 }
138 7 => {
139 let base_bytes = rd.read_exact_n(20)?;
141 let base_oid = ObjectId::from_bytes(&base_bytes)?;
142 let delta_data = rd.decompress(size)?;
143 pending.push(PendingDelta {
144 offset: obj_offset,
145 base_oid: Some(base_oid),
146 base_offset: None,
147 delta_data,
148 });
149 }
150 other => {
151 return Err(Error::CorruptObject(format!(
152 "unknown packed-object type {other}"
153 )))
154 }
155 }
156 }
157
158 let digest = rd.finalize_hasher();
160 let trailing = rd.read_trailer_20()?;
161 if digest.as_slice() != trailing {
162 return Err(Error::CorruptObject(
163 "pack trailing checksum mismatch".to_owned(),
164 ));
165 }
166
167 let mut remaining = pending;
170 loop {
171 if remaining.is_empty() {
172 break;
173 }
174 let before = remaining.len();
175 let mut still_pending: Vec<PendingDelta> = Vec::new();
176
177 for delta in remaining {
178 let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
179 if let Some(base_off) = delta.base_offset {
180 by_offset
181 .get(&base_off)
182 .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
183 } else if let Some(ref base_id) = delta.base_oid {
184 if let Some(e) = by_oid.get(base_id) {
185 Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
186 } else if !opts.dry_run {
187 odb.read(base_id)
188 .ok()
189 .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
190 } else {
191 None
192 }
193 } else {
194 None
195 };
196
197 match base_res {
198 Some(Ok((base_kind, base_data))) => {
199 let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
200 let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
201 let new_entry = packed_entry_after_write(
202 base_kind,
203 result,
204 oid,
205 odb,
206 opts,
207 MAX_RETAIN_BYTES,
208 );
209 by_offset.insert(delta.offset, new_entry.clone());
210 by_oid.insert(oid, new_entry);
211 count += 1;
212 }
213 Some(Err(e)) => return Err(e),
214 None => still_pending.push(delta),
215 }
216 }
217
218 remaining = still_pending;
219 if remaining.len() == before {
220 return Err(Error::CorruptObject(format!(
221 "{} delta(s) could not be resolved",
222 remaining.len()
223 )));
224 }
225 }
226
227 if opts.strict {
228 let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
229 HashMap::with_capacity(by_oid.len());
230 for (oid, entry) in &by_oid {
231 let kind = entry.kind();
232 let data = match entry {
233 PackedObjectEntry::InMemory { data, .. } => data.clone(),
234 PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
235 };
236 dot_fsck_map.insert(*oid, (kind, data));
237 }
238 gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
239 strict_verify_packed_references_map(
240 Some(odb),
241 &by_oid,
242 &opts.allowed_missing,
243 opts.allow_promisor_missing_references,
244 &opts.shallow_boundaries,
245 )?;
246 }
247
248 Ok(count)
249}
250
251#[derive(Debug, Clone)]
253enum PackedObjectEntry {
254 InMemory { kind: ObjectKind, data: Vec<u8> },
255 BlobOnDisk { oid: ObjectId },
256}
257
258impl PackedObjectEntry {
259 fn kind(&self) -> ObjectKind {
260 match self {
261 PackedObjectEntry::InMemory { kind, .. } => *kind,
262 PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
263 }
264 }
265}
266
267fn packed_entry_after_write(
268 kind: ObjectKind,
269 data: Vec<u8>,
270 oid: ObjectId,
271 _odb: &Odb,
272 opts: &UnpackOptions,
273 max_retain: usize,
274) -> PackedObjectEntry {
275 if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
276 PackedObjectEntry::BlobOnDisk { oid }
277 } else {
278 PackedObjectEntry::InMemory { kind, data }
279 }
280}
281
282fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
283 match entry {
284 PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
285 PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
286 }
287}
288
289fn strict_verify_packed_references_map(
290 odb: Option<&Odb>,
291 pack: &HashMap<ObjectId, PackedObjectEntry>,
292 allowed_missing: &HashSet<ObjectId>,
293 allow_promisor_missing_references: bool,
294 shallow_boundaries: &HashSet<ObjectId>,
295) -> Result<()> {
296 for (oid, entry) in pack {
297 match entry {
298 PackedObjectEntry::BlobOnDisk { .. } => {}
299 PackedObjectEntry::InMemory { kind, data } => match kind {
300 ObjectKind::Tree => {
301 for e in parse_tree(data)? {
302 if e.mode == MODE_GITLINK {
307 continue;
308 }
309 if !strict_ref_resolves_map(
310 &e.oid,
311 pack,
312 odb,
313 allowed_missing,
314 allow_promisor_missing_references,
315 ) {
316 return Err(Error::CorruptObject(format!(
317 "strict: missing object {} referenced by tree",
318 e.oid.to_hex()
319 )));
320 }
321 }
322 }
323 ObjectKind::Commit => {
324 let c = parse_commit(data)?;
325 if !strict_ref_resolves_map(
326 &c.tree,
327 pack,
328 odb,
329 allowed_missing,
330 allow_promisor_missing_references,
331 ) {
332 return Err(Error::CorruptObject(format!(
333 "strict: missing tree {} referenced by commit",
334 c.tree.to_hex()
335 )));
336 }
337 if shallow_boundaries.contains(oid) {
341 continue;
342 }
343 for p in &c.parents {
344 if !strict_ref_resolves_map(
345 p,
346 pack,
347 odb,
348 allowed_missing,
349 allow_promisor_missing_references,
350 ) {
351 return Err(Error::CorruptObject(format!(
352 "strict: missing parent {} referenced by commit",
353 p.to_hex()
354 )));
355 }
356 }
357 }
358 ObjectKind::Tag => {
359 let t = parse_tag(data)?;
360 if !strict_ref_resolves_map(
361 &t.object,
362 pack,
363 odb,
364 allowed_missing,
365 allow_promisor_missing_references,
366 ) {
367 return Err(Error::CorruptObject(format!(
368 "strict: missing object {} referenced by tag",
369 t.object.to_hex()
370 )));
371 }
372 }
373 ObjectKind::Blob => {}
374 },
375 }
376 }
377 Ok(())
378}
379
380fn strict_ref_resolves_map(
381 oid: &ObjectId,
382 pack: &HashMap<ObjectId, PackedObjectEntry>,
383 odb: Option<&Odb>,
384 allowed_missing: &HashSet<ObjectId>,
385 allow_promisor_missing_references: bool,
386) -> bool {
387 pack.contains_key(oid)
388 || allowed_missing.contains(oid)
389 || odb.is_some_and(|o| o.exists(oid))
390 || allow_promisor_missing_references
391}
392
393fn strict_ref_resolves(
394 oid: &ObjectId,
395 pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
396 odb: Option<&Odb>,
397) -> bool {
398 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
399}
400
401pub fn strict_verify_packed_references(
407 odb: Option<&Odb>,
408 pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
409) -> Result<()> {
410 for (kind, data) in pack.values() {
411 match kind {
412 ObjectKind::Tree => {
413 for e in parse_tree(data)? {
414 if e.mode == MODE_GITLINK {
419 continue;
420 }
421 if !strict_ref_resolves(&e.oid, pack, odb) {
422 return Err(Error::CorruptObject(format!(
423 "strict: missing object {} referenced by tree",
424 e.oid.to_hex()
425 )));
426 }
427 }
428 }
429 ObjectKind::Commit => {
430 let c = parse_commit(data)?;
431 if !strict_ref_resolves(&c.tree, pack, odb) {
432 return Err(Error::CorruptObject(format!(
433 "strict: missing tree {} referenced by commit",
434 c.tree.to_hex()
435 )));
436 }
437 for p in &c.parents {
438 if !strict_ref_resolves(p, pack, odb) {
439 return Err(Error::CorruptObject(format!(
440 "strict: missing parent {} referenced by commit",
441 p.to_hex()
442 )));
443 }
444 }
445 }
446 ObjectKind::Tag => {
447 let t = parse_tag(data)?;
448 if !strict_ref_resolves(&t.object, pack, odb) {
449 return Err(Error::CorruptObject(format!(
450 "strict: missing object {} referenced by tag",
451 t.object.to_hex()
452 )));
453 }
454 }
455 ObjectKind::Blob => {}
456 }
457 }
458 Ok(())
459}
460
461pub fn pack_is_thin(data: &[u8]) -> bool {
468 pack_is_thin_inner(data).unwrap_or(false)
469}
470
471fn pack_is_thin_inner(data: &[u8]) -> Result<bool> {
472 let mut rd = PackReader::new(data.to_vec());
473 if rd.read_exact(4)? != b"PACK" {
474 return Ok(false);
475 }
476 let _version = rd.read_u32_be()?;
477 let nr_objects = rd.read_u32_be()? as usize;
478
479 let mut in_pack: HashSet<ObjectId> = HashSet::new();
480 let mut ref_delta_bases: Vec<ObjectId> = Vec::new();
481 for _ in 0..nr_objects {
482 let obj_offset = rd.pos;
483 let (type_code, size) = rd.read_type_size()?;
484 match type_code {
485 1..=4 => {
486 let kind = type_code_to_kind(type_code)?;
487 let obj_data = rd.decompress(size)?;
488 in_pack.insert(Odb::hash_object_data(kind, &obj_data));
489 }
490 6 => {
491 let _neg = rd.read_ofs_neg_offset()?;
493 let _ = obj_offset;
494 let _ = rd.decompress(size)?;
495 }
496 7 => {
497 let base_bytes = rd.read_exact(20)?;
498 ref_delta_bases.push(ObjectId::from_bytes(base_bytes)?);
499 let _ = rd.decompress(size)?;
500 }
501 _ => return Ok(false),
502 }
503 }
504 Ok(ref_delta_bases.iter().any(|b| !in_pack.contains(b)))
506}
507
508pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
515 let rd = PackReader::new(data.to_vec());
516 build_pack_object_map(rd, odb)
517}
518
519fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
520 let sig = rd.read_exact(4)?;
521 if sig != b"PACK" {
522 return Err(Error::CorruptObject(
523 "not a pack stream: invalid signature".to_owned(),
524 ));
525 }
526 let version = rd.read_u32_be()?;
527 if version != 2 && version != 3 {
528 return Err(Error::CorruptObject(format!(
529 "unsupported pack version {version}"
530 )));
531 }
532 let nr_objects = rd.read_u32_be()? as usize;
533
534 let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
535 let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
536 let mut pending: Vec<PendingDelta> = Vec::new();
537
538 fn base_from_pack_or_odb(
539 by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
540 odb: &Odb,
541 id: &ObjectId,
542 ) -> Option<(ObjectKind, Vec<u8>)> {
543 if let Some(e) = by_oid.get(id) {
544 return Some(e.clone());
545 }
546 odb.read(id).ok().map(|o| (o.kind, o.data))
547 }
548
549 for _ in 0..nr_objects {
550 let obj_offset = rd.pos;
551 let (type_code, size) = rd.read_type_size()?;
552
553 match type_code {
554 1..=4 => {
555 let kind = type_code_to_kind(type_code)?;
556 let data = rd.decompress(size)?;
557 let oid = Odb::hash_object_data(kind, &data);
558 by_offset.insert(obj_offset, (kind, data.clone()));
559 by_oid.insert(oid, (kind, data));
560 }
561 6 => {
562 let neg = rd.read_ofs_neg_offset()?;
563 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
564 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
565 })?;
566 let delta_data = rd.decompress(size)?;
567 pending.push(PendingDelta {
568 offset: obj_offset,
569 base_oid: None,
570 base_offset: Some(base_offset),
571 delta_data,
572 });
573 }
574 7 => {
575 let base_bytes = rd.read_exact(20)?;
576 let base_oid = ObjectId::from_bytes(base_bytes)?;
577 let delta_data = rd.decompress(size)?;
578 pending.push(PendingDelta {
579 offset: obj_offset,
580 base_oid: Some(base_oid),
581 base_offset: None,
582 delta_data,
583 });
584 }
585 other => {
586 return Err(Error::CorruptObject(format!(
587 "unknown packed-object type {other}"
588 )))
589 }
590 }
591 }
592
593 let consumed = rd.pos;
594 {
595 let mut hasher = Sha1::new();
596 hasher.update(&rd.data[..consumed]);
597 let digest = hasher.finalize();
598 let trailing = rd.read_exact(20)?;
599 if digest.as_slice() != trailing {
600 return Err(Error::CorruptObject(
601 "pack trailing checksum mismatch".to_owned(),
602 ));
603 }
604 }
605
606 let mut remaining = pending;
607 loop {
608 if remaining.is_empty() {
609 break;
610 }
611 let before = remaining.len();
612 let mut still_pending: Vec<PendingDelta> = Vec::new();
613
614 for delta in remaining {
615 let base = if let Some(base_off) = delta.base_offset {
616 by_offset.get(&base_off).cloned()
617 } else if let Some(ref base_id) = delta.base_oid {
618 base_from_pack_or_odb(&by_oid, odb, base_id)
619 } else {
620 None
621 };
622
623 if let Some((base_kind, base_data)) = base {
624 let result = apply_delta(&base_data, &delta.delta_data)?;
625 let oid = Odb::hash_object_data(base_kind, &result);
626 by_offset.insert(delta.offset, (base_kind, result.clone()));
627 by_oid.insert(oid, (base_kind, result));
628 } else {
629 still_pending.push(delta);
630 }
631 }
632
633 remaining = still_pending;
634 if remaining.len() == before {
635 return Err(Error::CorruptObject(format!(
636 "{} delta(s) could not be resolved",
637 remaining.len()
638 )));
639 }
640 }
641
642 Ok(by_oid
643 .into_iter()
644 .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
645 .collect())
646}
647
648fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
651 if dry_run {
652 Ok(Odb::hash_object_data(kind, data))
653 } else {
654 odb.write_local(kind, data)
657 }
658}
659
660fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
662 match code {
663 1 => Ok(ObjectKind::Commit),
664 2 => Ok(ObjectKind::Tree),
665 3 => Ok(ObjectKind::Blob),
666 4 => Ok(ObjectKind::Tag),
667 _ => Err(Error::CorruptObject(format!(
668 "type code {code} is not a regular object type"
669 ))),
670 }
671}
672
673struct PackReader {
675 data: Vec<u8>,
676 pos: usize,
677}
678
679impl PackReader {
680 fn new(data: Vec<u8>) -> Self {
681 Self { data, pos: 0 }
682 }
683
684 fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
687 if self.pos + n > self.data.len() {
688 return Err(Error::CorruptObject(format!(
689 "pack stream truncated: need {n} bytes at offset {}",
690 self.pos
691 )));
692 }
693 let slice = &self.data[self.pos..self.pos + n];
694 self.pos += n;
695 Ok(slice)
696 }
697
698 fn read_byte(&mut self) -> Result<u8> {
700 if self.pos >= self.data.len() {
701 return Err(Error::CorruptObject(
702 "unexpected end of pack stream".to_owned(),
703 ));
704 }
705 let b = self.data[self.pos];
706 self.pos += 1;
707 Ok(b)
708 }
709
710 fn read_u32_be(&mut self) -> Result<u32> {
712 let bytes = self.read_exact(4)?;
713 Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
714 Error::CorruptObject("u32 read failed".to_owned())
715 })?))
716 }
717
718 fn read_type_size(&mut self) -> Result<(u8, usize)> {
723 let c = self.read_byte()?;
724 let type_code = (c >> 4) & 0x7;
725 let mut size = (c & 0x0f) as usize;
726 let mut shift = 4u32;
727 let mut cur = c;
728 while cur & 0x80 != 0 {
729 cur = self.read_byte()?;
730 size |= ((cur & 0x7f) as usize) << shift;
731 shift += 7;
732 }
733 Ok((type_code, size))
734 }
735
736 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
741 let mut c = self.read_byte()?;
742 let mut value = (c & 0x7f) as usize;
743 while c & 0x80 != 0 {
744 c = self.read_byte()?;
745 value = (value + 1) << 7 | (c & 0x7f) as usize;
746 }
747 Ok(value)
748 }
749
750 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
755 let slice = &self.data[self.pos..];
756 let mut decoder = ZlibDecoder::new(slice);
757 let mut out = Vec::with_capacity(expected_size);
758 decoder
759 .read_to_end(&mut out)
760 .map_err(|e| Error::Zlib(e.to_string()))?;
761 if out.len() != expected_size {
762 return Err(Error::CorruptObject(format!(
763 "decompressed {} bytes but expected {}",
764 out.len(),
765 expected_size
766 )));
767 }
768 self.pos += decoder.total_in() as usize;
769 Ok(out)
770 }
771}
772
773fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
774 if e.kind() == io::ErrorKind::UnexpectedEof {
775 Error::CorruptObject(format!(
776 "pack stream truncated ({context}) at offset {stream_pos}"
777 ))
778 } else {
779 Error::Io(e)
780 }
781}
782
783struct StreamingPackReader<'a> {
789 inner: &'a mut dyn Read,
790 pack_hasher: Sha1,
791 stream_pos: usize,
792 max_input_bytes: Option<u64>,
793 pending: Vec<u8>,
796}
797
798impl<'a> StreamingPackReader<'a> {
799 fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>) -> Self {
800 Self {
801 inner,
802 pack_hasher: Sha1::new(),
803 stream_pos: 0,
804 max_input_bytes,
805 pending: Vec::new(),
806 }
807 }
808
809 fn stream_pos(&self) -> usize {
810 self.stream_pos
811 }
812
813 fn enforce_max_input(&self) -> Result<()> {
814 if let Some(limit) = self.max_input_bytes {
815 let pos = u64::try_from(self.stream_pos)
816 .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
817 if pos > limit {
818 return Err(Error::CorruptObject(
819 "pack exceeds maximum allowed size".to_owned(),
820 ));
821 }
822 }
823 Ok(())
824 }
825
826 fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
828 let n = if !self.pending.is_empty() {
829 let take = buf.len().min(self.pending.len());
830 buf[..take].copy_from_slice(&self.pending[..take]);
831 self.pending.drain(..take);
832 take
833 } else {
834 self.inner.read(buf).map_err(Error::Io)?
835 };
836 if n > 0 {
837 self.pack_hasher.update(&buf[..n]);
838 self.stream_pos += n;
839 self.enforce_max_input()?;
840 }
841 Ok(n)
842 }
843
844 fn read_byte(&mut self) -> Result<u8> {
845 let mut b = [0u8; 1];
846 let n = self.read_from_source(&mut b)?;
847 if n == 0 {
848 return Err(Error::CorruptObject(format!(
849 "pack stream truncated (read byte) at offset {}",
850 self.stream_pos
851 )));
852 }
853 Ok(b[0])
854 }
855
856 fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
857 let mut v = vec![0u8; n];
858 let mut got = 0usize;
859 while got < n {
860 let m = self.read_from_source(&mut v[got..n])?;
861 if m == 0 {
862 return Err(Error::CorruptObject(format!(
863 "pack stream truncated (read exact) at offset {}",
864 self.stream_pos
865 )));
866 }
867 got += m;
868 }
869 Ok(v)
870 }
871
872 fn read_u32_be(&mut self) -> Result<u32> {
873 let mut b = [0u8; 4];
874 let mut got = 0usize;
875 while got < 4 {
876 let m = self.read_from_source(&mut b[got..4])?;
877 if m == 0 {
878 return Err(Error::CorruptObject(format!(
879 "pack stream truncated (read u32) at offset {}",
880 self.stream_pos
881 )));
882 }
883 got += m;
884 }
885 Ok(u32::from_be_bytes(b))
886 }
887
888 fn read_type_size(&mut self) -> Result<(u8, usize)> {
889 let c = self.read_byte()?;
890 let type_code = (c >> 4) & 0x7;
891 let mut size = (c & 0x0f) as usize;
892 let mut shift = 4u32;
893 let mut cur = c;
894 while cur & 0x80 != 0 {
895 cur = self.read_byte()?;
896 size |= ((cur & 0x7f) as usize) << shift;
897 shift += 7;
898 }
899 Ok((type_code, size))
900 }
901
902 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
903 let mut c = self.read_byte()?;
904 let mut value = (c & 0x7f) as usize;
905 while c & 0x80 != 0 {
906 c = self.read_byte()?;
907 value = (value + 1) << 7 | (c & 0x7f) as usize;
908 }
909 Ok(value)
910 }
911
912 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
922 if expected_size == 0 {
926 const CHUNK: usize = 64 * 1024;
927 let mut scratch = [0u8; CHUNK];
928 loop {
929 let mut cursor = std::io::Cursor::new(self.pending.as_slice());
930 let mut z = ZlibDecoder::new(&mut cursor);
931 let mut sink = [0u8; 1];
932 match z.read(&mut sink) {
933 Ok(0) => {
934 let consumed = z.total_in() as usize;
935 if consumed > self.pending.len() {
936 return Err(Error::CorruptObject(
937 "zlib total_in exceeds pending buffer".to_owned(),
938 ));
939 }
940 if consumed == 0 {
941 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
942 if n == 0 {
943 return Err(Error::CorruptObject(format!(
944 "pack stream truncated (zlib) at offset {}",
945 self.stream_pos
946 )));
947 }
948 self.pending.extend_from_slice(&scratch[..n]);
949 continue;
950 }
951 self.pack_hasher.update(&self.pending[..consumed]);
952 self.stream_pos += consumed;
953 self.pending.drain(..consumed);
954 self.enforce_max_input()?;
955 return Ok(Vec::new());
956 }
957 Ok(_) => {
958 return Err(Error::CorruptObject(
959 "0-byte packed object inflated to non-empty output".to_owned(),
960 ));
961 }
962 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
963 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
964 if n == 0 {
965 return Err(Error::CorruptObject(format!(
966 "pack stream truncated (zlib) at offset {}",
967 self.stream_pos
968 )));
969 }
970 self.pending.extend_from_slice(&scratch[..n]);
971 }
972 Err(e) => return Err(Error::Zlib(e.to_string())),
973 }
974 }
975 }
976
977 const CHUNK: usize = 64 * 1024;
978 let mut scratch = [0u8; CHUNK];
979
980 let mut out = vec![0u8; expected_size];
981 let mut z = Decompress::new(true);
982 let mut out_pos = 0usize;
983 let mut eof = false;
984 loop {
985 if self.pending.is_empty() && !eof {
986 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
987 if n == 0 {
988 eof = true;
989 } else {
990 self.pending.extend_from_slice(&scratch[..n]);
991 }
992 }
993
994 let flush = if eof && self.pending.is_empty() {
995 FlushDecompress::Finish
996 } else {
997 FlushDecompress::None
998 };
999
1000 let before_in = z.total_in();
1001 let before_out = z.total_out();
1002 let status = z
1003 .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
1004 .map_err(|e| Error::Zlib(e.to_string()))?;
1005 let consumed = (z.total_in() - before_in) as usize;
1006 if consumed > self.pending.len() {
1007 return Err(Error::CorruptObject(
1008 "zlib consumed more than pending buffer".to_owned(),
1009 ));
1010 }
1011 self.pack_hasher.update(&self.pending[..consumed]);
1012 self.stream_pos += consumed;
1013 self.pending.drain(..consumed);
1014 self.enforce_max_input()?;
1015 out_pos += (z.total_out() - before_out) as usize;
1016
1017 match status {
1018 Status::StreamEnd => {
1019 if out_pos != expected_size {
1020 return Err(Error::CorruptObject(format!(
1021 "decompressed size mismatch: got {out_pos}, want {expected_size}"
1022 )));
1023 }
1024 return Ok(out);
1025 }
1026 Status::Ok | Status::BufError => {
1027 if consumed == 0 && !eof {
1028 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
1029 if n == 0 {
1030 eof = true;
1031 } else {
1032 self.pending.extend_from_slice(&scratch[..n]);
1033 }
1034 } else if eof && self.pending.is_empty() && out_pos != expected_size {
1035 return Err(Error::CorruptObject(format!(
1036 "pack stream truncated (zlib) at offset {}",
1037 self.stream_pos
1038 )));
1039 }
1040 }
1041 }
1042 }
1043 }
1044
1045 fn finalize_hasher(
1047 &self,
1048 ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
1049 self.pack_hasher.clone().finalize()
1050 }
1051
1052 fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
1054 let mut b = [0u8; 20];
1055 if self.pending.len() >= 20 {
1056 b.copy_from_slice(&self.pending[..20]);
1057 self.pending.drain(..20);
1058 self.stream_pos += 20;
1059 self.enforce_max_input()?;
1060 return Ok(b);
1061 }
1062 let tail = self.pending.len();
1063 if tail > 0 {
1064 b[..tail].copy_from_slice(&self.pending[..]);
1065 self.pending.clear();
1066 }
1067 self.inner
1068 .read_exact(&mut b[tail..])
1069 .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
1070 self.stream_pos += 20;
1071 self.enforce_max_input()?;
1072 Ok(b)
1073 }
1074}
1075
1076pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
1090 let mut pos = 0usize;
1091
1092 let src_size = read_delta_varint(delta, &mut pos)?;
1093 if src_size != base.len() {
1094 return Err(Error::CorruptObject(format!(
1095 "delta source size {src_size} != base size {}",
1096 base.len()
1097 )));
1098 }
1099 let dest_size = read_delta_varint(delta, &mut pos)?;
1100 let mut result = Vec::with_capacity(dest_size);
1101
1102 while pos < delta.len() {
1103 let cmd = delta[pos];
1104 pos += 1;
1105 if cmd == 0 {
1106 return Err(Error::CorruptObject(
1107 "reserved opcode 0 in delta stream".to_owned(),
1108 ));
1109 }
1110 if cmd & 0x80 != 0 {
1111 let mut offset = 0usize;
1114 let mut size = 0usize;
1115
1116 macro_rules! maybe_read_byte {
1117 ($flag:expr, $shift:expr, $target:expr) => {
1118 if cmd & $flag != 0 {
1119 let b = *delta.get(pos).ok_or_else(|| {
1120 Error::CorruptObject("truncated delta COPY operand".to_owned())
1121 })?;
1122 pos += 1;
1123 $target |= (b as usize) << $shift;
1124 }
1125 };
1126 }
1127
1128 maybe_read_byte!(0x01, 0, offset);
1129 maybe_read_byte!(0x02, 8, offset);
1130 maybe_read_byte!(0x04, 16, offset);
1131 maybe_read_byte!(0x08, 24, offset);
1132 maybe_read_byte!(0x10, 0, size);
1133 maybe_read_byte!(0x20, 8, size);
1134 maybe_read_byte!(0x40, 16, size);
1135
1136 if size == 0 {
1137 size = 0x10000;
1138 }
1139
1140 let end = offset.checked_add(size).ok_or_else(|| {
1141 Error::CorruptObject("delta COPY range overflows usize".to_owned())
1142 })?;
1143 let chunk = base.get(offset..end).ok_or_else(|| {
1144 Error::CorruptObject(format!(
1145 "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1146 base.len()
1147 ))
1148 })?;
1149 result.extend_from_slice(chunk);
1150 } else {
1151 let n = cmd as usize;
1153 let chunk = delta
1154 .get(pos..pos + n)
1155 .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1156 result.extend_from_slice(chunk);
1157 pos += n;
1158 }
1159 }
1160
1161 if result.len() != dest_size {
1162 return Err(Error::CorruptObject(format!(
1163 "delta produced {} bytes but expected {dest_size}",
1164 result.len()
1165 )));
1166 }
1167
1168 Ok(result)
1169}
1170
1171fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1175 let mut value = 0usize;
1176 let mut shift = 0u32;
1177 loop {
1178 let b = *data
1179 .get(*pos)
1180 .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1181 *pos += 1;
1182 value |= ((b & 0x7f) as usize) << shift;
1183 shift += 7;
1184 if b & 0x80 == 0 {
1185 break;
1186 }
1187 }
1188 Ok(value)
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193 use super::*;
1194
1195 fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1198 use flate2::write::ZlibEncoder;
1199 use std::io::Write;
1200
1201 let mut entries: Vec<Vec<u8>> = Vec::new();
1202 for (kind, data) in objects {
1203 let type_code: u8 = match kind {
1204 ObjectKind::Commit => 1,
1205 ObjectKind::Tree => 2,
1206 ObjectKind::Blob => 3,
1207 ObjectKind::Tag => 4,
1208 };
1209 let mut header = Vec::new();
1211 let mut size = data.len();
1212 let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1213 size >>= 4;
1214 if size > 0 {
1215 header.push(first | 0x80);
1216 while size > 0 {
1217 let b = (size & 0x7f) as u8;
1218 size >>= 7;
1219 header.push(if size > 0 { b | 0x80 } else { b });
1220 }
1221 } else {
1222 header.push(first);
1223 }
1224 let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1226 enc.write_all(data).unwrap();
1227 let compressed = enc.finish().unwrap();
1228 let mut entry = header;
1229 entry.extend_from_slice(&compressed);
1230 entries.push(entry);
1231 }
1232
1233 let mut pack = Vec::new();
1235 pack.extend_from_slice(b"PACK");
1236 pack.extend_from_slice(&2u32.to_be_bytes());
1237 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1238 for entry in &entries {
1239 pack.extend_from_slice(entry);
1240 }
1241 let mut hasher = Sha1::new();
1242 hasher.update(&pack);
1243 let digest = hasher.finalize();
1244 pack.extend_from_slice(digest.as_slice());
1245 pack
1246 }
1247
1248 #[test]
1249 fn test_apply_delta_simple() {
1250 let base = b"hello";
1252 let mut delta = Vec::new();
1253 delta.push(5u8);
1255 delta.push(11u8);
1257 delta.push(0x80 | 0x01 | 0x10); delta.push(0u8); delta.push(5u8); delta.push(6u8);
1264 delta.extend_from_slice(b" world");
1265
1266 let result = apply_delta(base, &delta).unwrap();
1267 assert_eq!(result, b"hello world");
1268 }
1269
1270 #[test]
1271 fn test_apply_delta_insert_only() {
1272 let base = b"";
1273 let mut delta = Vec::new();
1274 delta.push(0u8); delta.push(5u8); delta.push(5u8); delta.extend_from_slice(b"hello");
1278
1279 let result = apply_delta(base, &delta).unwrap();
1280 assert_eq!(result, b"hello");
1281 }
1282
1283 #[test]
1284 fn test_apply_delta_copy_only() {
1285 let base = b"abcdef";
1286 let mut delta = Vec::new();
1287 delta.push(6u8); delta.push(3u8); delta.push(0x91u8);
1292 delta.push(2u8); delta.push(3u8); let result = apply_delta(base, &delta).unwrap();
1296 assert_eq!(result, b"cde");
1297 }
1298
1299 #[test]
1300 fn test_apply_delta_size_zero_means_65536() {
1301 let base = vec![0xABu8; 65536];
1303 let mut delta = Vec::new();
1304 delta.push(0x80 | (65536 & 0x7f) as u8); delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); delta.push(((65536 >> 14) & 0x7f) as u8); delta.push(0x80 | (65536 & 0x7f) as u8);
1310 delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1311 delta.push(((65536 >> 14) & 0x7f) as u8);
1312 delta.push(0x80u8);
1315
1316 let result = apply_delta(&base, &delta).unwrap();
1317 assert_eq!(result.len(), 65536);
1318 assert!(result.iter().all(|&b| b == 0xAB));
1319 }
1320
1321 #[test]
1322 fn test_unpack_objects_blobs() {
1323 use tempfile::TempDir;
1324 let tmp = TempDir::new().unwrap();
1325 let objects_dir = tmp.path().join("objects");
1326 std::fs::create_dir_all(&objects_dir).unwrap();
1327 let odb = Odb::new(&objects_dir);
1328
1329 let pack = make_pack(&[
1330 (ObjectKind::Blob, b"hello\n"),
1331 (ObjectKind::Blob, b"world\n"),
1332 ]);
1333
1334 let opts = UnpackOptions::default();
1335 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1336 assert_eq!(count, 2);
1337
1338 let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1340 let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1341 let obj1 = odb.read(&oid1).unwrap();
1342 let obj2 = odb.read(&oid2).unwrap();
1343 assert_eq!(obj1.data, b"hello\n");
1344 assert_eq!(obj2.data, b"world\n");
1345 }
1346
1347 #[test]
1348 fn test_unpack_objects_empty_tree() {
1349 use tempfile::TempDir;
1350 let tmp = TempDir::new().unwrap();
1351 let objects_dir = tmp.path().join("objects");
1352 std::fs::create_dir_all(&objects_dir).unwrap();
1353 let odb = Odb::new(&objects_dir);
1354
1355 let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1356 let opts = UnpackOptions::default();
1357 assert_eq!(
1358 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1359 1
1360 );
1361 let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1362 assert!(odb.exists(&oid));
1363 let loose = objects_dir
1364 .join(oid.loose_prefix())
1365 .join(oid.loose_suffix());
1366 assert!(
1367 loose.is_file(),
1368 "empty tree must be materialized as a loose object during unpack"
1369 );
1370 }
1371
1372 #[test]
1373 fn test_strict_skips_gitlink_tree_entries() {
1374 use crate::index::{MODE_GITLINK, MODE_REGULAR};
1375 use crate::objects::{serialize_tree, TreeEntry};
1376
1377 let submodule_oid = ObjectId::from_hex(&"7f".repeat(20)).unwrap();
1380
1381 let tree_data = serialize_tree(&[TreeEntry {
1383 mode: MODE_GITLINK,
1384 name: b"sub".to_vec(),
1385 oid: submodule_oid,
1386 }]);
1387 let tree_oid = Odb::hash_object_data(ObjectKind::Tree, &tree_data);
1388
1389 let mut pack = HashMap::new();
1392 pack.insert(tree_oid, (ObjectKind::Tree, tree_data.clone()));
1393 assert!(strict_verify_packed_references(None, &pack).is_ok());
1394
1395 let bad_tree = serialize_tree(&[TreeEntry {
1398 mode: MODE_REGULAR,
1399 name: b"file".to_vec(),
1400 oid: ObjectId::from_hex(&"ab".repeat(20)).unwrap(),
1401 }]);
1402 let bad_oid = Odb::hash_object_data(ObjectKind::Tree, &bad_tree);
1403 let mut bad_pack = HashMap::new();
1404 bad_pack.insert(bad_oid, (ObjectKind::Tree, bad_tree));
1405 assert!(matches!(
1406 strict_verify_packed_references(None, &bad_pack),
1407 Err(Error::CorruptObject(_))
1408 ));
1409 }
1410
1411 struct ChunkedReader<'a> {
1413 data: &'a [u8],
1414 pos: usize,
1415 max_len: usize,
1416 }
1417
1418 impl io::Read for ChunkedReader<'_> {
1419 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1420 if self.pos >= self.data.len() {
1421 return Ok(0);
1422 }
1423 let take = (self.data.len() - self.pos)
1424 .min(self.max_len)
1425 .min(buf.len());
1426 buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1427 self.pos += take;
1428 Ok(take)
1429 }
1430 }
1431
1432 #[test]
1433 fn test_unpack_objects_chunked_read_matches_full_buffer() {
1434 use tempfile::TempDir;
1435 let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1436 let opts = UnpackOptions::default();
1437 let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1438
1439 let tmp = TempDir::new().unwrap();
1440 let objects_dir = tmp.path().join("objects");
1441 std::fs::create_dir_all(&objects_dir).unwrap();
1442 let odb = Odb::new(&objects_dir);
1443 assert_eq!(
1444 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1445 1
1446 );
1447 assert!(odb.exists(&oid));
1448
1449 let tmp2 = TempDir::new().unwrap();
1450 let objects_dir2 = tmp2.path().join("objects");
1451 std::fs::create_dir_all(&objects_dir2).unwrap();
1452 let odb2 = Odb::new(&objects_dir2);
1453 let mut chunked = ChunkedReader {
1454 data: pack.as_slice(),
1455 pos: 0,
1456 max_len: 8,
1457 };
1458 assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1459 assert!(odb2.exists(&oid));
1460 }
1461
1462 #[test]
1463 fn test_unpack_objects_dry_run_writes_nothing() {
1464 use tempfile::TempDir;
1465 let tmp = TempDir::new().unwrap();
1466 let objects_dir = tmp.path().join("objects");
1467 std::fs::create_dir_all(&objects_dir).unwrap();
1468 let odb = Odb::new(&objects_dir);
1469
1470 let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1471
1472 let opts = UnpackOptions {
1473 dry_run: true,
1474 quiet: true,
1475 strict: false,
1476 allowed_missing: Default::default(),
1477 allow_promisor_missing_references: false,
1478 max_input_bytes: None,
1479 ..Default::default()
1480 };
1481 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1482 assert_eq!(count, 1);
1483
1484 let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1486 assert!(!odb.exists(&oid));
1487 }
1488
1489 #[test]
1490 fn test_unpack_objects_bad_signature() {
1491 use tempfile::TempDir;
1492 let tmp = TempDir::new().unwrap();
1493 let objects_dir = tmp.path().join("objects");
1494 std::fs::create_dir_all(&objects_dir).unwrap();
1495 let odb = Odb::new(&objects_dir);
1496
1497 let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1498 bad.extend_from_slice(&[0u8; 20]);
1499 let opts = UnpackOptions::default();
1500 let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1501 assert!(err.to_string().contains("invalid signature"));
1502 }
1503
1504 #[test]
1505 fn test_unpack_objects_checksum_mismatch() {
1506 use tempfile::TempDir;
1507 let tmp = TempDir::new().unwrap();
1508 let objects_dir = tmp.path().join("objects");
1509 std::fs::create_dir_all(&objects_dir).unwrap();
1510 let odb = Odb::new(&objects_dir);
1511
1512 let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1513 let n = pack.len();
1515 pack[n - 1] ^= 0xFF;
1516
1517 let opts = UnpackOptions::default();
1518 let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1519 assert!(err.to_string().contains("checksum"));
1520 }
1521
1522 #[test]
1523 fn test_apply_delta_source_size_mismatch() {
1524 let base = b"hi";
1525 let delta = [3u8, 2u8, 2u8, b'h', b'i']; let err = apply_delta(base, &delta).unwrap_err();
1527 assert!(err.to_string().contains("source size"));
1528 }
1529}