1use std::borrow::Cow;
13use std::collections::HashMap;
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19
20use crate::error::{Error, Result};
21use crate::gitmodules;
22use crate::index::MODE_GITLINK;
23use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
24use crate::odb::Odb;
25
26#[derive(Debug, Default)]
28pub struct UnpackOptions {
29 pub dry_run: bool,
31 pub quiet: bool,
33 pub strict: bool,
35 pub max_input_bytes: Option<u64>,
40}
41
42struct PendingDelta {
44 offset: usize,
47 base_oid: Option<ObjectId>,
49 base_offset: Option<usize>,
51 delta_data: Vec<u8>,
53}
54
55pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
70 const MAX_RETAIN_BYTES: usize = 1024 * 1024;
74
75 let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes);
76
77 let sig = rd.read_exact_n(4)?;
79 if sig != b"PACK" {
80 return Err(Error::CorruptObject(
81 "not a pack stream: invalid signature".to_owned(),
82 ));
83 }
84 let version = rd.read_u32_be()?;
85 if version != 2 && version != 3 {
86 return Err(Error::CorruptObject(format!(
87 "unsupported pack version {version}"
88 )));
89 }
90 let nr_objects = rd.read_u32_be()? as usize;
91
92 let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
94 let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
96
97 let mut pending: Vec<PendingDelta> = Vec::new();
98 let mut count = 0usize;
99
100 for _ in 0..nr_objects {
101 let obj_offset = rd.stream_pos();
102 let (type_code, size) = rd.read_type_size()?;
103
104 match type_code {
105 1..=4 => {
106 let kind = type_code_to_kind(type_code)?;
107 let data = rd.decompress(size)?;
108 let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
109 let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
110 by_offset.insert(obj_offset, entry.clone());
111 by_oid.insert(oid, entry);
112 count += 1;
113 }
114 6 => {
115 let neg = rd.read_ofs_neg_offset()?;
117 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
118 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
119 })?;
120 let delta_data = rd.decompress(size)?;
121 pending.push(PendingDelta {
122 offset: obj_offset,
123 base_oid: None,
124 base_offset: Some(base_offset),
125 delta_data,
126 });
127 }
128 7 => {
129 let base_bytes = rd.read_exact_n(20)?;
131 let base_oid = ObjectId::from_bytes(&base_bytes)?;
132 let delta_data = rd.decompress(size)?;
133 pending.push(PendingDelta {
134 offset: obj_offset,
135 base_oid: Some(base_oid),
136 base_offset: None,
137 delta_data,
138 });
139 }
140 other => {
141 return Err(Error::CorruptObject(format!(
142 "unknown packed-object type {other}"
143 )))
144 }
145 }
146 }
147
148 let digest = rd.finalize_hasher();
150 let trailing = rd.read_trailer_20()?;
151 if digest.as_slice() != trailing {
152 return Err(Error::CorruptObject(
153 "pack trailing checksum mismatch".to_owned(),
154 ));
155 }
156
157 let mut remaining = pending;
160 loop {
161 if remaining.is_empty() {
162 break;
163 }
164 let before = remaining.len();
165 let mut still_pending: Vec<PendingDelta> = Vec::new();
166
167 for delta in remaining {
168 let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
169 if let Some(base_off) = delta.base_offset {
170 by_offset
171 .get(&base_off)
172 .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
173 } else if let Some(ref base_id) = delta.base_oid {
174 if let Some(e) = by_oid.get(base_id) {
175 Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
176 } else if !opts.dry_run {
177 odb.read(base_id)
178 .ok()
179 .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
180 } else {
181 None
182 }
183 } else {
184 None
185 };
186
187 match base_res {
188 Some(Ok((base_kind, base_data))) => {
189 let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
190 let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
191 let new_entry = packed_entry_after_write(
192 base_kind,
193 result,
194 oid,
195 odb,
196 opts,
197 MAX_RETAIN_BYTES,
198 );
199 by_offset.insert(delta.offset, new_entry.clone());
200 by_oid.insert(oid, new_entry);
201 count += 1;
202 }
203 Some(Err(e)) => return Err(e),
204 None => still_pending.push(delta),
205 }
206 }
207
208 remaining = still_pending;
209 if remaining.len() == before {
210 return Err(Error::CorruptObject(format!(
211 "{} delta(s) could not be resolved",
212 remaining.len()
213 )));
214 }
215 }
216
217 if opts.strict {
218 let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
219 HashMap::with_capacity(by_oid.len());
220 for (oid, entry) in &by_oid {
221 let kind = entry.kind();
222 let data = match entry {
223 PackedObjectEntry::InMemory { data, .. } => data.clone(),
224 PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
225 };
226 dot_fsck_map.insert(*oid, (kind, data));
227 }
228 gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
229 strict_verify_packed_references_map(Some(odb), &by_oid)?;
230 }
231
232 Ok(count)
233}
234
235#[derive(Debug, Clone)]
237enum PackedObjectEntry {
238 InMemory { kind: ObjectKind, data: Vec<u8> },
239 BlobOnDisk { oid: ObjectId },
240}
241
242impl PackedObjectEntry {
243 fn kind(&self) -> ObjectKind {
244 match self {
245 PackedObjectEntry::InMemory { kind, .. } => *kind,
246 PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
247 }
248 }
249}
250
251fn packed_entry_after_write(
252 kind: ObjectKind,
253 data: Vec<u8>,
254 oid: ObjectId,
255 _odb: &Odb,
256 opts: &UnpackOptions,
257 max_retain: usize,
258) -> PackedObjectEntry {
259 if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
260 PackedObjectEntry::BlobOnDisk { oid }
261 } else {
262 PackedObjectEntry::InMemory { kind, data }
263 }
264}
265
266fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
267 match entry {
268 PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
269 PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
270 }
271}
272
273fn strict_verify_packed_references_map(
274 odb: Option<&Odb>,
275 pack: &HashMap<ObjectId, PackedObjectEntry>,
276) -> Result<()> {
277 for entry in pack.values() {
278 match entry {
279 PackedObjectEntry::BlobOnDisk { .. } => {}
280 PackedObjectEntry::InMemory { kind, data } => match kind {
281 ObjectKind::Tree => {
282 for e in parse_tree(data)? {
283 if e.mode == MODE_GITLINK {
288 continue;
289 }
290 if !strict_ref_resolves_map(&e.oid, pack, odb) {
291 return Err(Error::CorruptObject(format!(
292 "strict: missing object {} referenced by tree",
293 e.oid.to_hex()
294 )));
295 }
296 }
297 }
298 ObjectKind::Commit => {
299 let c = parse_commit(data)?;
300 if !strict_ref_resolves_map(&c.tree, pack, odb) {
301 return Err(Error::CorruptObject(format!(
302 "strict: missing tree {} referenced by commit",
303 c.tree.to_hex()
304 )));
305 }
306 for p in &c.parents {
307 if !strict_ref_resolves_map(p, pack, odb) {
308 return Err(Error::CorruptObject(format!(
309 "strict: missing parent {} referenced by commit",
310 p.to_hex()
311 )));
312 }
313 }
314 }
315 ObjectKind::Tag => {
316 let t = parse_tag(data)?;
317 if !strict_ref_resolves_map(&t.object, pack, odb) {
318 return Err(Error::CorruptObject(format!(
319 "strict: missing object {} referenced by tag",
320 t.object.to_hex()
321 )));
322 }
323 }
324 ObjectKind::Blob => {}
325 },
326 }
327 }
328 Ok(())
329}
330
331fn strict_ref_resolves_map(
332 oid: &ObjectId,
333 pack: &HashMap<ObjectId, PackedObjectEntry>,
334 odb: Option<&Odb>,
335) -> bool {
336 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
337}
338
339fn strict_ref_resolves(
340 oid: &ObjectId,
341 pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
342 odb: Option<&Odb>,
343) -> bool {
344 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
345}
346
347pub fn strict_verify_packed_references(
353 odb: Option<&Odb>,
354 pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
355) -> Result<()> {
356 for (kind, data) in pack.values() {
357 match kind {
358 ObjectKind::Tree => {
359 for e in parse_tree(data)? {
360 if e.mode == MODE_GITLINK {
365 continue;
366 }
367 if !strict_ref_resolves(&e.oid, pack, odb) {
368 return Err(Error::CorruptObject(format!(
369 "strict: missing object {} referenced by tree",
370 e.oid.to_hex()
371 )));
372 }
373 }
374 }
375 ObjectKind::Commit => {
376 let c = parse_commit(data)?;
377 if !strict_ref_resolves(&c.tree, pack, odb) {
378 return Err(Error::CorruptObject(format!(
379 "strict: missing tree {} referenced by commit",
380 c.tree.to_hex()
381 )));
382 }
383 for p in &c.parents {
384 if !strict_ref_resolves(p, pack, odb) {
385 return Err(Error::CorruptObject(format!(
386 "strict: missing parent {} referenced by commit",
387 p.to_hex()
388 )));
389 }
390 }
391 }
392 ObjectKind::Tag => {
393 let t = parse_tag(data)?;
394 if !strict_ref_resolves(&t.object, pack, odb) {
395 return Err(Error::CorruptObject(format!(
396 "strict: missing object {} referenced by tag",
397 t.object.to_hex()
398 )));
399 }
400 }
401 ObjectKind::Blob => {}
402 }
403 }
404 Ok(())
405}
406
407pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
414 let rd = PackReader::new(data.to_vec());
415 build_pack_object_map(rd, odb)
416}
417
418fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
419 let sig = rd.read_exact(4)?;
420 if sig != b"PACK" {
421 return Err(Error::CorruptObject(
422 "not a pack stream: invalid signature".to_owned(),
423 ));
424 }
425 let version = rd.read_u32_be()?;
426 if version != 2 && version != 3 {
427 return Err(Error::CorruptObject(format!(
428 "unsupported pack version {version}"
429 )));
430 }
431 let nr_objects = rd.read_u32_be()? as usize;
432
433 let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
434 let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
435 let mut pending: Vec<PendingDelta> = Vec::new();
436
437 fn base_from_pack_or_odb(
438 by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
439 odb: &Odb,
440 id: &ObjectId,
441 ) -> Option<(ObjectKind, Vec<u8>)> {
442 if let Some(e) = by_oid.get(id) {
443 return Some(e.clone());
444 }
445 odb.read(id).ok().map(|o| (o.kind, o.data))
446 }
447
448 for _ in 0..nr_objects {
449 let obj_offset = rd.pos;
450 let (type_code, size) = rd.read_type_size()?;
451
452 match type_code {
453 1..=4 => {
454 let kind = type_code_to_kind(type_code)?;
455 let data = rd.decompress(size)?;
456 let oid = Odb::hash_object_data(kind, &data);
457 by_offset.insert(obj_offset, (kind, data.clone()));
458 by_oid.insert(oid, (kind, data));
459 }
460 6 => {
461 let neg = rd.read_ofs_neg_offset()?;
462 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
463 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
464 })?;
465 let delta_data = rd.decompress(size)?;
466 pending.push(PendingDelta {
467 offset: obj_offset,
468 base_oid: None,
469 base_offset: Some(base_offset),
470 delta_data,
471 });
472 }
473 7 => {
474 let base_bytes = rd.read_exact(20)?;
475 let base_oid = ObjectId::from_bytes(base_bytes)?;
476 let delta_data = rd.decompress(size)?;
477 pending.push(PendingDelta {
478 offset: obj_offset,
479 base_oid: Some(base_oid),
480 base_offset: None,
481 delta_data,
482 });
483 }
484 other => {
485 return Err(Error::CorruptObject(format!(
486 "unknown packed-object type {other}"
487 )))
488 }
489 }
490 }
491
492 let consumed = rd.pos;
493 {
494 let mut hasher = Sha1::new();
495 hasher.update(&rd.data[..consumed]);
496 let digest = hasher.finalize();
497 let trailing = rd.read_exact(20)?;
498 if digest.as_slice() != trailing {
499 return Err(Error::CorruptObject(
500 "pack trailing checksum mismatch".to_owned(),
501 ));
502 }
503 }
504
505 let mut remaining = pending;
506 loop {
507 if remaining.is_empty() {
508 break;
509 }
510 let before = remaining.len();
511 let mut still_pending: Vec<PendingDelta> = Vec::new();
512
513 for delta in remaining {
514 let base = if let Some(base_off) = delta.base_offset {
515 by_offset.get(&base_off).cloned()
516 } else if let Some(ref base_id) = delta.base_oid {
517 base_from_pack_or_odb(&by_oid, odb, base_id)
518 } else {
519 None
520 };
521
522 if let Some((base_kind, base_data)) = base {
523 let result = apply_delta(&base_data, &delta.delta_data)?;
524 let oid = Odb::hash_object_data(base_kind, &result);
525 by_offset.insert(delta.offset, (base_kind, result.clone()));
526 by_oid.insert(oid, (base_kind, result));
527 } else {
528 still_pending.push(delta);
529 }
530 }
531
532 remaining = still_pending;
533 if remaining.len() == before {
534 return Err(Error::CorruptObject(format!(
535 "{} delta(s) could not be resolved",
536 remaining.len()
537 )));
538 }
539 }
540
541 Ok(by_oid
542 .into_iter()
543 .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
544 .collect())
545}
546
547fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
550 if dry_run {
551 Ok(Odb::hash_object_data(kind, data))
552 } else {
553 odb.write_local(kind, data)
556 }
557}
558
559fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
561 match code {
562 1 => Ok(ObjectKind::Commit),
563 2 => Ok(ObjectKind::Tree),
564 3 => Ok(ObjectKind::Blob),
565 4 => Ok(ObjectKind::Tag),
566 _ => Err(Error::CorruptObject(format!(
567 "type code {code} is not a regular object type"
568 ))),
569 }
570}
571
572struct PackReader {
574 data: Vec<u8>,
575 pos: usize,
576}
577
578impl PackReader {
579 fn new(data: Vec<u8>) -> Self {
580 Self { data, pos: 0 }
581 }
582
583 fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
586 if self.pos + n > self.data.len() {
587 return Err(Error::CorruptObject(format!(
588 "pack stream truncated: need {n} bytes at offset {}",
589 self.pos
590 )));
591 }
592 let slice = &self.data[self.pos..self.pos + n];
593 self.pos += n;
594 Ok(slice)
595 }
596
597 fn read_byte(&mut self) -> Result<u8> {
599 if self.pos >= self.data.len() {
600 return Err(Error::CorruptObject(
601 "unexpected end of pack stream".to_owned(),
602 ));
603 }
604 let b = self.data[self.pos];
605 self.pos += 1;
606 Ok(b)
607 }
608
609 fn read_u32_be(&mut self) -> Result<u32> {
611 let bytes = self.read_exact(4)?;
612 Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
613 Error::CorruptObject("u32 read failed".to_owned())
614 })?))
615 }
616
617 fn read_type_size(&mut self) -> Result<(u8, usize)> {
622 let c = self.read_byte()?;
623 let type_code = (c >> 4) & 0x7;
624 let mut size = (c & 0x0f) as usize;
625 let mut shift = 4u32;
626 let mut cur = c;
627 while cur & 0x80 != 0 {
628 cur = self.read_byte()?;
629 size |= ((cur & 0x7f) as usize) << shift;
630 shift += 7;
631 }
632 Ok((type_code, size))
633 }
634
635 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
640 let mut c = self.read_byte()?;
641 let mut value = (c & 0x7f) as usize;
642 while c & 0x80 != 0 {
643 c = self.read_byte()?;
644 value = (value + 1) << 7 | (c & 0x7f) as usize;
645 }
646 Ok(value)
647 }
648
649 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
654 let slice = &self.data[self.pos..];
655 let mut decoder = ZlibDecoder::new(slice);
656 let mut out = Vec::with_capacity(expected_size);
657 decoder
658 .read_to_end(&mut out)
659 .map_err(|e| Error::Zlib(e.to_string()))?;
660 if out.len() != expected_size {
661 return Err(Error::CorruptObject(format!(
662 "decompressed {} bytes but expected {}",
663 out.len(),
664 expected_size
665 )));
666 }
667 self.pos += decoder.total_in() as usize;
668 Ok(out)
669 }
670}
671
672fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
673 if e.kind() == io::ErrorKind::UnexpectedEof {
674 Error::CorruptObject(format!(
675 "pack stream truncated ({context}) at offset {stream_pos}"
676 ))
677 } else {
678 Error::Io(e)
679 }
680}
681
682struct StreamingPackReader<'a> {
688 inner: &'a mut dyn Read,
689 pack_hasher: Sha1,
690 stream_pos: usize,
691 max_input_bytes: Option<u64>,
692 pending: Vec<u8>,
695}
696
697impl<'a> StreamingPackReader<'a> {
698 fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>) -> Self {
699 Self {
700 inner,
701 pack_hasher: Sha1::new(),
702 stream_pos: 0,
703 max_input_bytes,
704 pending: Vec::new(),
705 }
706 }
707
708 fn stream_pos(&self) -> usize {
709 self.stream_pos
710 }
711
712 fn enforce_max_input(&self) -> Result<()> {
713 if let Some(limit) = self.max_input_bytes {
714 let pos = u64::try_from(self.stream_pos)
715 .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
716 if pos > limit {
717 return Err(Error::CorruptObject(
718 "pack exceeds maximum allowed size".to_owned(),
719 ));
720 }
721 }
722 Ok(())
723 }
724
725 fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
727 let n = if !self.pending.is_empty() {
728 let take = buf.len().min(self.pending.len());
729 buf[..take].copy_from_slice(&self.pending[..take]);
730 self.pending.drain(..take);
731 take
732 } else {
733 self.inner.read(buf).map_err(Error::Io)?
734 };
735 if n > 0 {
736 self.pack_hasher.update(&buf[..n]);
737 self.stream_pos += n;
738 self.enforce_max_input()?;
739 }
740 Ok(n)
741 }
742
743 fn read_byte(&mut self) -> Result<u8> {
744 let mut b = [0u8; 1];
745 let n = self.read_from_source(&mut b)?;
746 if n == 0 {
747 return Err(Error::CorruptObject(format!(
748 "pack stream truncated (read byte) at offset {}",
749 self.stream_pos
750 )));
751 }
752 Ok(b[0])
753 }
754
755 fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
756 let mut v = vec![0u8; n];
757 let mut got = 0usize;
758 while got < n {
759 let m = self.read_from_source(&mut v[got..n])?;
760 if m == 0 {
761 return Err(Error::CorruptObject(format!(
762 "pack stream truncated (read exact) at offset {}",
763 self.stream_pos
764 )));
765 }
766 got += m;
767 }
768 Ok(v)
769 }
770
771 fn read_u32_be(&mut self) -> Result<u32> {
772 let mut b = [0u8; 4];
773 let mut got = 0usize;
774 while got < 4 {
775 let m = self.read_from_source(&mut b[got..4])?;
776 if m == 0 {
777 return Err(Error::CorruptObject(format!(
778 "pack stream truncated (read u32) at offset {}",
779 self.stream_pos
780 )));
781 }
782 got += m;
783 }
784 Ok(u32::from_be_bytes(b))
785 }
786
787 fn read_type_size(&mut self) -> Result<(u8, usize)> {
788 let c = self.read_byte()?;
789 let type_code = (c >> 4) & 0x7;
790 let mut size = (c & 0x0f) as usize;
791 let mut shift = 4u32;
792 let mut cur = c;
793 while cur & 0x80 != 0 {
794 cur = self.read_byte()?;
795 size |= ((cur & 0x7f) as usize) << shift;
796 shift += 7;
797 }
798 Ok((type_code, size))
799 }
800
801 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
802 let mut c = self.read_byte()?;
803 let mut value = (c & 0x7f) as usize;
804 while c & 0x80 != 0 {
805 c = self.read_byte()?;
806 value = (value + 1) << 7 | (c & 0x7f) as usize;
807 }
808 Ok(value)
809 }
810
811 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
821 if expected_size == 0 {
825 const CHUNK: usize = 64 * 1024;
826 let mut scratch = [0u8; CHUNK];
827 loop {
828 let mut cursor = std::io::Cursor::new(self.pending.as_slice());
829 let mut z = ZlibDecoder::new(&mut cursor);
830 let mut sink = [0u8; 1];
831 match z.read(&mut sink) {
832 Ok(0) => {
833 let consumed = z.total_in() as usize;
834 if consumed > self.pending.len() {
835 return Err(Error::CorruptObject(
836 "zlib total_in exceeds pending buffer".to_owned(),
837 ));
838 }
839 if consumed == 0 {
840 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
841 if n == 0 {
842 return Err(Error::CorruptObject(format!(
843 "pack stream truncated (zlib) at offset {}",
844 self.stream_pos
845 )));
846 }
847 self.pending.extend_from_slice(&scratch[..n]);
848 continue;
849 }
850 self.pack_hasher.update(&self.pending[..consumed]);
851 self.stream_pos += consumed;
852 self.pending.drain(..consumed);
853 self.enforce_max_input()?;
854 return Ok(Vec::new());
855 }
856 Ok(_) => {
857 return Err(Error::CorruptObject(
858 "0-byte packed object inflated to non-empty output".to_owned(),
859 ));
860 }
861 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
862 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
863 if n == 0 {
864 return Err(Error::CorruptObject(format!(
865 "pack stream truncated (zlib) at offset {}",
866 self.stream_pos
867 )));
868 }
869 self.pending.extend_from_slice(&scratch[..n]);
870 }
871 Err(e) => return Err(Error::Zlib(e.to_string())),
872 }
873 }
874 }
875
876 const CHUNK: usize = 64 * 1024;
877 let mut scratch = [0u8; CHUNK];
878
879 let mut out = vec![0u8; expected_size];
880 let mut z = Decompress::new(true);
881 let mut out_pos = 0usize;
882 let mut eof = false;
883 loop {
884 if self.pending.is_empty() && !eof {
885 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
886 if n == 0 {
887 eof = true;
888 } else {
889 self.pending.extend_from_slice(&scratch[..n]);
890 }
891 }
892
893 let flush = if eof && self.pending.is_empty() {
894 FlushDecompress::Finish
895 } else {
896 FlushDecompress::None
897 };
898
899 let before_in = z.total_in();
900 let before_out = z.total_out();
901 let status = z
902 .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
903 .map_err(|e| Error::Zlib(e.to_string()))?;
904 let consumed = (z.total_in() - before_in) as usize;
905 if consumed > self.pending.len() {
906 return Err(Error::CorruptObject(
907 "zlib consumed more than pending buffer".to_owned(),
908 ));
909 }
910 self.pack_hasher.update(&self.pending[..consumed]);
911 self.stream_pos += consumed;
912 self.pending.drain(..consumed);
913 self.enforce_max_input()?;
914 out_pos += (z.total_out() - before_out) as usize;
915
916 match status {
917 Status::StreamEnd => {
918 if out_pos != expected_size {
919 return Err(Error::CorruptObject(format!(
920 "decompressed size mismatch: got {out_pos}, want {expected_size}"
921 )));
922 }
923 return Ok(out);
924 }
925 Status::Ok | Status::BufError => {
926 if consumed == 0 && !eof {
927 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
928 if n == 0 {
929 eof = true;
930 } else {
931 self.pending.extend_from_slice(&scratch[..n]);
932 }
933 } else if eof && self.pending.is_empty() && out_pos != expected_size {
934 return Err(Error::CorruptObject(format!(
935 "pack stream truncated (zlib) at offset {}",
936 self.stream_pos
937 )));
938 }
939 }
940 }
941 }
942 }
943
944 fn finalize_hasher(
946 &self,
947 ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
948 self.pack_hasher.clone().finalize()
949 }
950
951 fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
953 let mut b = [0u8; 20];
954 if self.pending.len() >= 20 {
955 b.copy_from_slice(&self.pending[..20]);
956 self.pending.drain(..20);
957 self.stream_pos += 20;
958 self.enforce_max_input()?;
959 return Ok(b);
960 }
961 let tail = self.pending.len();
962 if tail > 0 {
963 b[..tail].copy_from_slice(&self.pending[..]);
964 self.pending.clear();
965 }
966 self.inner
967 .read_exact(&mut b[tail..])
968 .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
969 self.stream_pos += 20;
970 self.enforce_max_input()?;
971 Ok(b)
972 }
973}
974
975pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
989 let mut pos = 0usize;
990
991 let src_size = read_delta_varint(delta, &mut pos)?;
992 if src_size != base.len() {
993 return Err(Error::CorruptObject(format!(
994 "delta source size {src_size} != base size {}",
995 base.len()
996 )));
997 }
998 let dest_size = read_delta_varint(delta, &mut pos)?;
999 let mut result = Vec::with_capacity(dest_size);
1000
1001 while pos < delta.len() {
1002 let cmd = delta[pos];
1003 pos += 1;
1004 if cmd == 0 {
1005 return Err(Error::CorruptObject(
1006 "reserved opcode 0 in delta stream".to_owned(),
1007 ));
1008 }
1009 if cmd & 0x80 != 0 {
1010 let mut offset = 0usize;
1013 let mut size = 0usize;
1014
1015 macro_rules! maybe_read_byte {
1016 ($flag:expr, $shift:expr, $target:expr) => {
1017 if cmd & $flag != 0 {
1018 let b = *delta.get(pos).ok_or_else(|| {
1019 Error::CorruptObject("truncated delta COPY operand".to_owned())
1020 })?;
1021 pos += 1;
1022 $target |= (b as usize) << $shift;
1023 }
1024 };
1025 }
1026
1027 maybe_read_byte!(0x01, 0, offset);
1028 maybe_read_byte!(0x02, 8, offset);
1029 maybe_read_byte!(0x04, 16, offset);
1030 maybe_read_byte!(0x08, 24, offset);
1031 maybe_read_byte!(0x10, 0, size);
1032 maybe_read_byte!(0x20, 8, size);
1033 maybe_read_byte!(0x40, 16, size);
1034
1035 if size == 0 {
1036 size = 0x10000;
1037 }
1038
1039 let end = offset.checked_add(size).ok_or_else(|| {
1040 Error::CorruptObject("delta COPY range overflows usize".to_owned())
1041 })?;
1042 let chunk = base.get(offset..end).ok_or_else(|| {
1043 Error::CorruptObject(format!(
1044 "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1045 base.len()
1046 ))
1047 })?;
1048 result.extend_from_slice(chunk);
1049 } else {
1050 let n = cmd as usize;
1052 let chunk = delta
1053 .get(pos..pos + n)
1054 .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1055 result.extend_from_slice(chunk);
1056 pos += n;
1057 }
1058 }
1059
1060 if result.len() != dest_size {
1061 return Err(Error::CorruptObject(format!(
1062 "delta produced {} bytes but expected {dest_size}",
1063 result.len()
1064 )));
1065 }
1066
1067 Ok(result)
1068}
1069
1070fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1074 let mut value = 0usize;
1075 let mut shift = 0u32;
1076 loop {
1077 let b = *data
1078 .get(*pos)
1079 .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1080 *pos += 1;
1081 value |= ((b & 0x7f) as usize) << shift;
1082 shift += 7;
1083 if b & 0x80 == 0 {
1084 break;
1085 }
1086 }
1087 Ok(value)
1088}
1089
1090#[cfg(test)]
1091mod tests {
1092 use super::*;
1093
1094 fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1097 use flate2::write::ZlibEncoder;
1098 use std::io::Write;
1099
1100 let mut entries: Vec<Vec<u8>> = Vec::new();
1101 for (kind, data) in objects {
1102 let type_code: u8 = match kind {
1103 ObjectKind::Commit => 1,
1104 ObjectKind::Tree => 2,
1105 ObjectKind::Blob => 3,
1106 ObjectKind::Tag => 4,
1107 };
1108 let mut header = Vec::new();
1110 let mut size = data.len();
1111 let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1112 size >>= 4;
1113 if size > 0 {
1114 header.push(first | 0x80);
1115 while size > 0 {
1116 let b = (size & 0x7f) as u8;
1117 size >>= 7;
1118 header.push(if size > 0 { b | 0x80 } else { b });
1119 }
1120 } else {
1121 header.push(first);
1122 }
1123 let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1125 enc.write_all(data).unwrap();
1126 let compressed = enc.finish().unwrap();
1127 let mut entry = header;
1128 entry.extend_from_slice(&compressed);
1129 entries.push(entry);
1130 }
1131
1132 let mut pack = Vec::new();
1134 pack.extend_from_slice(b"PACK");
1135 pack.extend_from_slice(&2u32.to_be_bytes());
1136 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1137 for entry in &entries {
1138 pack.extend_from_slice(entry);
1139 }
1140 let mut hasher = Sha1::new();
1141 hasher.update(&pack);
1142 let digest = hasher.finalize();
1143 pack.extend_from_slice(digest.as_slice());
1144 pack
1145 }
1146
1147 #[test]
1148 fn test_apply_delta_simple() {
1149 let base = b"hello";
1151 let mut delta = Vec::new();
1152 delta.push(5u8);
1154 delta.push(11u8);
1156 delta.push(0x80 | 0x01 | 0x10); delta.push(0u8); delta.push(5u8); delta.push(6u8);
1163 delta.extend_from_slice(b" world");
1164
1165 let result = apply_delta(base, &delta).unwrap();
1166 assert_eq!(result, b"hello world");
1167 }
1168
1169 #[test]
1170 fn test_apply_delta_insert_only() {
1171 let base = b"";
1172 let mut delta = Vec::new();
1173 delta.push(0u8); delta.push(5u8); delta.push(5u8); delta.extend_from_slice(b"hello");
1177
1178 let result = apply_delta(base, &delta).unwrap();
1179 assert_eq!(result, b"hello");
1180 }
1181
1182 #[test]
1183 fn test_apply_delta_copy_only() {
1184 let base = b"abcdef";
1185 let mut delta = Vec::new();
1186 delta.push(6u8); delta.push(3u8); delta.push(0x91u8);
1191 delta.push(2u8); delta.push(3u8); let result = apply_delta(base, &delta).unwrap();
1195 assert_eq!(result, b"cde");
1196 }
1197
1198 #[test]
1199 fn test_apply_delta_size_zero_means_65536() {
1200 let base = vec![0xABu8; 65536];
1202 let mut delta = Vec::new();
1203 delta.push(0x80 | (65536 & 0x7f) as u8); delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); delta.push(((65536 >> 14) & 0x7f) as u8); delta.push(0x80 | (65536 & 0x7f) as u8);
1209 delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1210 delta.push(((65536 >> 14) & 0x7f) as u8);
1211 delta.push(0x80u8);
1214
1215 let result = apply_delta(&base, &delta).unwrap();
1216 assert_eq!(result.len(), 65536);
1217 assert!(result.iter().all(|&b| b == 0xAB));
1218 }
1219
1220 #[test]
1221 fn test_unpack_objects_blobs() {
1222 use tempfile::TempDir;
1223 let tmp = TempDir::new().unwrap();
1224 let objects_dir = tmp.path().join("objects");
1225 std::fs::create_dir_all(&objects_dir).unwrap();
1226 let odb = Odb::new(&objects_dir);
1227
1228 let pack = make_pack(&[
1229 (ObjectKind::Blob, b"hello\n"),
1230 (ObjectKind::Blob, b"world\n"),
1231 ]);
1232
1233 let opts = UnpackOptions::default();
1234 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1235 assert_eq!(count, 2);
1236
1237 let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1239 let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1240 let obj1 = odb.read(&oid1).unwrap();
1241 let obj2 = odb.read(&oid2).unwrap();
1242 assert_eq!(obj1.data, b"hello\n");
1243 assert_eq!(obj2.data, b"world\n");
1244 }
1245
1246 #[test]
1247 fn test_unpack_objects_empty_tree() {
1248 use tempfile::TempDir;
1249 let tmp = TempDir::new().unwrap();
1250 let objects_dir = tmp.path().join("objects");
1251 std::fs::create_dir_all(&objects_dir).unwrap();
1252 let odb = Odb::new(&objects_dir);
1253
1254 let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1255 let opts = UnpackOptions::default();
1256 assert_eq!(
1257 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1258 1
1259 );
1260 let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1261 assert!(odb.exists(&oid));
1262 let loose = objects_dir
1263 .join(oid.loose_prefix())
1264 .join(oid.loose_suffix());
1265 assert!(
1266 loose.is_file(),
1267 "empty tree must be materialized as a loose object during unpack"
1268 );
1269 }
1270
1271 #[test]
1272 fn test_strict_skips_gitlink_tree_entries() {
1273 use crate::index::{MODE_GITLINK, MODE_REGULAR};
1274 use crate::objects::{serialize_tree, TreeEntry};
1275
1276 let submodule_oid = ObjectId::from_hex(&"7f".repeat(20)).unwrap();
1279
1280 let tree_data = serialize_tree(&[TreeEntry {
1282 mode: MODE_GITLINK,
1283 name: b"sub".to_vec(),
1284 oid: submodule_oid,
1285 }]);
1286 let tree_oid = Odb::hash_object_data(ObjectKind::Tree, &tree_data);
1287
1288 let mut pack = HashMap::new();
1291 pack.insert(tree_oid, (ObjectKind::Tree, tree_data.clone()));
1292 assert!(strict_verify_packed_references(None, &pack).is_ok());
1293
1294 let bad_tree = serialize_tree(&[TreeEntry {
1297 mode: MODE_REGULAR,
1298 name: b"file".to_vec(),
1299 oid: ObjectId::from_hex(&"ab".repeat(20)).unwrap(),
1300 }]);
1301 let bad_oid = Odb::hash_object_data(ObjectKind::Tree, &bad_tree);
1302 let mut bad_pack = HashMap::new();
1303 bad_pack.insert(bad_oid, (ObjectKind::Tree, bad_tree));
1304 assert!(matches!(
1305 strict_verify_packed_references(None, &bad_pack),
1306 Err(Error::CorruptObject(_))
1307 ));
1308 }
1309
1310 struct ChunkedReader<'a> {
1312 data: &'a [u8],
1313 pos: usize,
1314 max_len: usize,
1315 }
1316
1317 impl io::Read for ChunkedReader<'_> {
1318 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1319 if self.pos >= self.data.len() {
1320 return Ok(0);
1321 }
1322 let take = (self.data.len() - self.pos)
1323 .min(self.max_len)
1324 .min(buf.len());
1325 buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1326 self.pos += take;
1327 Ok(take)
1328 }
1329 }
1330
1331 #[test]
1332 fn test_unpack_objects_chunked_read_matches_full_buffer() {
1333 use tempfile::TempDir;
1334 let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1335 let opts = UnpackOptions::default();
1336 let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1337
1338 let tmp = TempDir::new().unwrap();
1339 let objects_dir = tmp.path().join("objects");
1340 std::fs::create_dir_all(&objects_dir).unwrap();
1341 let odb = Odb::new(&objects_dir);
1342 assert_eq!(
1343 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1344 1
1345 );
1346 assert!(odb.exists(&oid));
1347
1348 let tmp2 = TempDir::new().unwrap();
1349 let objects_dir2 = tmp2.path().join("objects");
1350 std::fs::create_dir_all(&objects_dir2).unwrap();
1351 let odb2 = Odb::new(&objects_dir2);
1352 let mut chunked = ChunkedReader {
1353 data: pack.as_slice(),
1354 pos: 0,
1355 max_len: 8,
1356 };
1357 assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1358 assert!(odb2.exists(&oid));
1359 }
1360
1361 #[test]
1362 fn test_unpack_objects_dry_run_writes_nothing() {
1363 use tempfile::TempDir;
1364 let tmp = TempDir::new().unwrap();
1365 let objects_dir = tmp.path().join("objects");
1366 std::fs::create_dir_all(&objects_dir).unwrap();
1367 let odb = Odb::new(&objects_dir);
1368
1369 let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1370
1371 let opts = UnpackOptions {
1372 dry_run: true,
1373 quiet: true,
1374 strict: false,
1375 max_input_bytes: None,
1376 };
1377 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1378 assert_eq!(count, 1);
1379
1380 let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1382 assert!(!odb.exists(&oid));
1383 }
1384
1385 #[test]
1386 fn test_unpack_objects_bad_signature() {
1387 use tempfile::TempDir;
1388 let tmp = TempDir::new().unwrap();
1389 let objects_dir = tmp.path().join("objects");
1390 std::fs::create_dir_all(&objects_dir).unwrap();
1391 let odb = Odb::new(&objects_dir);
1392
1393 let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1394 bad.extend_from_slice(&[0u8; 20]);
1395 let opts = UnpackOptions::default();
1396 let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1397 assert!(err.to_string().contains("invalid signature"));
1398 }
1399
1400 #[test]
1401 fn test_unpack_objects_checksum_mismatch() {
1402 use tempfile::TempDir;
1403 let tmp = TempDir::new().unwrap();
1404 let objects_dir = tmp.path().join("objects");
1405 std::fs::create_dir_all(&objects_dir).unwrap();
1406 let odb = Odb::new(&objects_dir);
1407
1408 let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1409 let n = pack.len();
1411 pack[n - 1] ^= 0xFF;
1412
1413 let opts = UnpackOptions::default();
1414 let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1415 assert!(err.to_string().contains("checksum"));
1416 }
1417
1418 #[test]
1419 fn test_apply_delta_source_size_mismatch() {
1420 let base = b"hi";
1421 let delta = [3u8, 2u8, 2u8, b'h', b'i']; let err = apply_delta(base, &delta).unwrap_err();
1423 assert!(err.to_string().contains("source size"));
1424 }
1425}