1use std::borrow::Cow;
13use std::collections::HashMap;
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19
20use crate::error::{Error, Result};
21use crate::gitmodules;
22use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
23use crate::odb::Odb;
24
25#[derive(Debug, Default)]
27pub struct UnpackOptions {
28 pub dry_run: bool,
30 pub quiet: bool,
32 pub strict: bool,
34 pub max_input_bytes: Option<u64>,
39}
40
41struct PendingDelta {
43 offset: usize,
46 base_oid: Option<ObjectId>,
48 base_offset: Option<usize>,
50 delta_data: Vec<u8>,
52}
53
54pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
69 const MAX_RETAIN_BYTES: usize = 1024 * 1024;
73
74 let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes);
75
76 let sig = rd.read_exact_n(4)?;
78 if sig != b"PACK" {
79 return Err(Error::CorruptObject(
80 "not a pack stream: invalid signature".to_owned(),
81 ));
82 }
83 let version = rd.read_u32_be()?;
84 if version != 2 && version != 3 {
85 return Err(Error::CorruptObject(format!(
86 "unsupported pack version {version}"
87 )));
88 }
89 let nr_objects = rd.read_u32_be()? as usize;
90
91 let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
93 let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
95
96 let mut pending: Vec<PendingDelta> = Vec::new();
97 let mut count = 0usize;
98
99 for _ in 0..nr_objects {
100 let obj_offset = rd.stream_pos();
101 let (type_code, size) = rd.read_type_size()?;
102
103 match type_code {
104 1..=4 => {
105 let kind = type_code_to_kind(type_code)?;
106 let data = rd.decompress(size)?;
107 let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
108 let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
109 by_offset.insert(obj_offset, entry.clone());
110 by_oid.insert(oid, entry);
111 count += 1;
112 }
113 6 => {
114 let neg = rd.read_ofs_neg_offset()?;
116 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
117 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
118 })?;
119 let delta_data = rd.decompress(size)?;
120 pending.push(PendingDelta {
121 offset: obj_offset,
122 base_oid: None,
123 base_offset: Some(base_offset),
124 delta_data,
125 });
126 }
127 7 => {
128 let base_bytes = rd.read_exact_n(20)?;
130 let base_oid = ObjectId::from_bytes(&base_bytes)?;
131 let delta_data = rd.decompress(size)?;
132 pending.push(PendingDelta {
133 offset: obj_offset,
134 base_oid: Some(base_oid),
135 base_offset: None,
136 delta_data,
137 });
138 }
139 other => {
140 return Err(Error::CorruptObject(format!(
141 "unknown packed-object type {other}"
142 )))
143 }
144 }
145 }
146
147 let digest = rd.finalize_hasher();
149 let trailing = rd.read_trailer_20()?;
150 if digest.as_slice() != trailing {
151 return Err(Error::CorruptObject(
152 "pack trailing checksum mismatch".to_owned(),
153 ));
154 }
155
156 let mut remaining = pending;
159 loop {
160 if remaining.is_empty() {
161 break;
162 }
163 let before = remaining.len();
164 let mut still_pending: Vec<PendingDelta> = Vec::new();
165
166 for delta in remaining {
167 let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
168 if let Some(base_off) = delta.base_offset {
169 by_offset
170 .get(&base_off)
171 .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
172 } else if let Some(ref base_id) = delta.base_oid {
173 if let Some(e) = by_oid.get(base_id) {
174 Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
175 } else if !opts.dry_run {
176 odb.read(base_id)
177 .ok()
178 .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
179 } else {
180 None
181 }
182 } else {
183 None
184 };
185
186 match base_res {
187 Some(Ok((base_kind, base_data))) => {
188 let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
189 let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
190 let new_entry = packed_entry_after_write(
191 base_kind,
192 result,
193 oid,
194 odb,
195 opts,
196 MAX_RETAIN_BYTES,
197 );
198 by_offset.insert(delta.offset, new_entry.clone());
199 by_oid.insert(oid, new_entry);
200 count += 1;
201 }
202 Some(Err(e)) => return Err(e),
203 None => still_pending.push(delta),
204 }
205 }
206
207 remaining = still_pending;
208 if remaining.len() == before {
209 return Err(Error::CorruptObject(format!(
210 "{} delta(s) could not be resolved",
211 remaining.len()
212 )));
213 }
214 }
215
216 if opts.strict {
217 let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
218 HashMap::with_capacity(by_oid.len());
219 for (oid, entry) in &by_oid {
220 let kind = entry.kind();
221 let data = match entry {
222 PackedObjectEntry::InMemory { data, .. } => data.clone(),
223 PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
224 };
225 dot_fsck_map.insert(*oid, (kind, data));
226 }
227 gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
228 strict_verify_packed_references_map(Some(odb), &by_oid)?;
229 }
230
231 Ok(count)
232}
233
234#[derive(Debug, Clone)]
236enum PackedObjectEntry {
237 InMemory { kind: ObjectKind, data: Vec<u8> },
238 BlobOnDisk { oid: ObjectId },
239}
240
241impl PackedObjectEntry {
242 fn kind(&self) -> ObjectKind {
243 match self {
244 PackedObjectEntry::InMemory { kind, .. } => *kind,
245 PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
246 }
247 }
248}
249
250fn packed_entry_after_write(
251 kind: ObjectKind,
252 data: Vec<u8>,
253 oid: ObjectId,
254 _odb: &Odb,
255 opts: &UnpackOptions,
256 max_retain: usize,
257) -> PackedObjectEntry {
258 if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
259 PackedObjectEntry::BlobOnDisk { oid }
260 } else {
261 PackedObjectEntry::InMemory { kind, data }
262 }
263}
264
265fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
266 match entry {
267 PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
268 PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
269 }
270}
271
272fn strict_verify_packed_references_map(
273 odb: Option<&Odb>,
274 pack: &HashMap<ObjectId, PackedObjectEntry>,
275) -> Result<()> {
276 for entry in pack.values() {
277 match entry {
278 PackedObjectEntry::BlobOnDisk { .. } => {}
279 PackedObjectEntry::InMemory { kind, data } => match kind {
280 ObjectKind::Tree => {
281 for e in parse_tree(data)? {
282 if !strict_ref_resolves_map(&e.oid, pack, odb) {
283 return Err(Error::CorruptObject(format!(
284 "strict: missing object {} referenced by tree",
285 e.oid.to_hex()
286 )));
287 }
288 }
289 }
290 ObjectKind::Commit => {
291 let c = parse_commit(data)?;
292 if !strict_ref_resolves_map(&c.tree, pack, odb) {
293 return Err(Error::CorruptObject(format!(
294 "strict: missing tree {} referenced by commit",
295 c.tree.to_hex()
296 )));
297 }
298 for p in &c.parents {
299 if !strict_ref_resolves_map(p, pack, odb) {
300 return Err(Error::CorruptObject(format!(
301 "strict: missing parent {} referenced by commit",
302 p.to_hex()
303 )));
304 }
305 }
306 }
307 ObjectKind::Tag => {
308 let t = parse_tag(data)?;
309 if !strict_ref_resolves_map(&t.object, pack, odb) {
310 return Err(Error::CorruptObject(format!(
311 "strict: missing object {} referenced by tag",
312 t.object.to_hex()
313 )));
314 }
315 }
316 ObjectKind::Blob => {}
317 },
318 }
319 }
320 Ok(())
321}
322
323fn strict_ref_resolves_map(
324 oid: &ObjectId,
325 pack: &HashMap<ObjectId, PackedObjectEntry>,
326 odb: Option<&Odb>,
327) -> bool {
328 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
329}
330
331fn strict_ref_resolves(
332 oid: &ObjectId,
333 pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
334 odb: Option<&Odb>,
335) -> bool {
336 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
337}
338
339pub fn strict_verify_packed_references(
345 odb: Option<&Odb>,
346 pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
347) -> Result<()> {
348 for (kind, data) in pack.values() {
349 match kind {
350 ObjectKind::Tree => {
351 for e in parse_tree(data)? {
352 if !strict_ref_resolves(&e.oid, pack, odb) {
353 return Err(Error::CorruptObject(format!(
354 "strict: missing object {} referenced by tree",
355 e.oid.to_hex()
356 )));
357 }
358 }
359 }
360 ObjectKind::Commit => {
361 let c = parse_commit(data)?;
362 if !strict_ref_resolves(&c.tree, pack, odb) {
363 return Err(Error::CorruptObject(format!(
364 "strict: missing tree {} referenced by commit",
365 c.tree.to_hex()
366 )));
367 }
368 for p in &c.parents {
369 if !strict_ref_resolves(p, pack, odb) {
370 return Err(Error::CorruptObject(format!(
371 "strict: missing parent {} referenced by commit",
372 p.to_hex()
373 )));
374 }
375 }
376 }
377 ObjectKind::Tag => {
378 let t = parse_tag(data)?;
379 if !strict_ref_resolves(&t.object, pack, odb) {
380 return Err(Error::CorruptObject(format!(
381 "strict: missing object {} referenced by tag",
382 t.object.to_hex()
383 )));
384 }
385 }
386 ObjectKind::Blob => {}
387 }
388 }
389 Ok(())
390}
391
392pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
399 let rd = PackReader::new(data.to_vec());
400 build_pack_object_map(rd, odb)
401}
402
403fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
404 let sig = rd.read_exact(4)?;
405 if sig != b"PACK" {
406 return Err(Error::CorruptObject(
407 "not a pack stream: invalid signature".to_owned(),
408 ));
409 }
410 let version = rd.read_u32_be()?;
411 if version != 2 && version != 3 {
412 return Err(Error::CorruptObject(format!(
413 "unsupported pack version {version}"
414 )));
415 }
416 let nr_objects = rd.read_u32_be()? as usize;
417
418 let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
419 let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
420 let mut pending: Vec<PendingDelta> = Vec::new();
421
422 fn base_from_pack_or_odb(
423 by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
424 odb: &Odb,
425 id: &ObjectId,
426 ) -> Option<(ObjectKind, Vec<u8>)> {
427 if let Some(e) = by_oid.get(id) {
428 return Some(e.clone());
429 }
430 odb.read(id).ok().map(|o| (o.kind, o.data))
431 }
432
433 for _ in 0..nr_objects {
434 let obj_offset = rd.pos;
435 let (type_code, size) = rd.read_type_size()?;
436
437 match type_code {
438 1..=4 => {
439 let kind = type_code_to_kind(type_code)?;
440 let data = rd.decompress(size)?;
441 let oid = Odb::hash_object_data(kind, &data);
442 by_offset.insert(obj_offset, (kind, data.clone()));
443 by_oid.insert(oid, (kind, data));
444 }
445 6 => {
446 let neg = rd.read_ofs_neg_offset()?;
447 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
448 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
449 })?;
450 let delta_data = rd.decompress(size)?;
451 pending.push(PendingDelta {
452 offset: obj_offset,
453 base_oid: None,
454 base_offset: Some(base_offset),
455 delta_data,
456 });
457 }
458 7 => {
459 let base_bytes = rd.read_exact(20)?;
460 let base_oid = ObjectId::from_bytes(base_bytes)?;
461 let delta_data = rd.decompress(size)?;
462 pending.push(PendingDelta {
463 offset: obj_offset,
464 base_oid: Some(base_oid),
465 base_offset: None,
466 delta_data,
467 });
468 }
469 other => {
470 return Err(Error::CorruptObject(format!(
471 "unknown packed-object type {other}"
472 )))
473 }
474 }
475 }
476
477 let consumed = rd.pos;
478 {
479 let mut hasher = Sha1::new();
480 hasher.update(&rd.data[..consumed]);
481 let digest = hasher.finalize();
482 let trailing = rd.read_exact(20)?;
483 if digest.as_slice() != trailing {
484 return Err(Error::CorruptObject(
485 "pack trailing checksum mismatch".to_owned(),
486 ));
487 }
488 }
489
490 let mut remaining = pending;
491 loop {
492 if remaining.is_empty() {
493 break;
494 }
495 let before = remaining.len();
496 let mut still_pending: Vec<PendingDelta> = Vec::new();
497
498 for delta in remaining {
499 let base = if let Some(base_off) = delta.base_offset {
500 by_offset.get(&base_off).cloned()
501 } else if let Some(ref base_id) = delta.base_oid {
502 base_from_pack_or_odb(&by_oid, odb, base_id)
503 } else {
504 None
505 };
506
507 if let Some((base_kind, base_data)) = base {
508 let result = apply_delta(&base_data, &delta.delta_data)?;
509 let oid = Odb::hash_object_data(base_kind, &result);
510 by_offset.insert(delta.offset, (base_kind, result.clone()));
511 by_oid.insert(oid, (base_kind, result));
512 } else {
513 still_pending.push(delta);
514 }
515 }
516
517 remaining = still_pending;
518 if remaining.len() == before {
519 return Err(Error::CorruptObject(format!(
520 "{} delta(s) could not be resolved",
521 remaining.len()
522 )));
523 }
524 }
525
526 Ok(by_oid
527 .into_iter()
528 .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
529 .collect())
530}
531
532fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
535 if dry_run {
536 Ok(Odb::hash_object_data(kind, data))
537 } else {
538 odb.write_local(kind, data)
541 }
542}
543
544fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
546 match code {
547 1 => Ok(ObjectKind::Commit),
548 2 => Ok(ObjectKind::Tree),
549 3 => Ok(ObjectKind::Blob),
550 4 => Ok(ObjectKind::Tag),
551 _ => Err(Error::CorruptObject(format!(
552 "type code {code} is not a regular object type"
553 ))),
554 }
555}
556
557struct PackReader {
559 data: Vec<u8>,
560 pos: usize,
561}
562
563impl PackReader {
564 fn new(data: Vec<u8>) -> Self {
565 Self { data, pos: 0 }
566 }
567
568 fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
571 if self.pos + n > self.data.len() {
572 return Err(Error::CorruptObject(format!(
573 "pack stream truncated: need {n} bytes at offset {}",
574 self.pos
575 )));
576 }
577 let slice = &self.data[self.pos..self.pos + n];
578 self.pos += n;
579 Ok(slice)
580 }
581
582 fn read_byte(&mut self) -> Result<u8> {
584 if self.pos >= self.data.len() {
585 return Err(Error::CorruptObject(
586 "unexpected end of pack stream".to_owned(),
587 ));
588 }
589 let b = self.data[self.pos];
590 self.pos += 1;
591 Ok(b)
592 }
593
594 fn read_u32_be(&mut self) -> Result<u32> {
596 let bytes = self.read_exact(4)?;
597 Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
598 Error::CorruptObject("u32 read failed".to_owned())
599 })?))
600 }
601
602 fn read_type_size(&mut self) -> Result<(u8, usize)> {
607 let c = self.read_byte()?;
608 let type_code = (c >> 4) & 0x7;
609 let mut size = (c & 0x0f) as usize;
610 let mut shift = 4u32;
611 let mut cur = c;
612 while cur & 0x80 != 0 {
613 cur = self.read_byte()?;
614 size |= ((cur & 0x7f) as usize) << shift;
615 shift += 7;
616 }
617 Ok((type_code, size))
618 }
619
620 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
625 let mut c = self.read_byte()?;
626 let mut value = (c & 0x7f) as usize;
627 while c & 0x80 != 0 {
628 c = self.read_byte()?;
629 value = (value + 1) << 7 | (c & 0x7f) as usize;
630 }
631 Ok(value)
632 }
633
634 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
639 let slice = &self.data[self.pos..];
640 let mut decoder = ZlibDecoder::new(slice);
641 let mut out = Vec::with_capacity(expected_size);
642 decoder
643 .read_to_end(&mut out)
644 .map_err(|e| Error::Zlib(e.to_string()))?;
645 if out.len() != expected_size {
646 return Err(Error::CorruptObject(format!(
647 "decompressed {} bytes but expected {}",
648 out.len(),
649 expected_size
650 )));
651 }
652 self.pos += decoder.total_in() as usize;
653 Ok(out)
654 }
655}
656
657fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
658 if e.kind() == io::ErrorKind::UnexpectedEof {
659 Error::CorruptObject(format!(
660 "pack stream truncated ({context}) at offset {stream_pos}"
661 ))
662 } else {
663 Error::Io(e)
664 }
665}
666
667struct StreamingPackReader<'a> {
673 inner: &'a mut dyn Read,
674 pack_hasher: Sha1,
675 stream_pos: usize,
676 max_input_bytes: Option<u64>,
677 pending: Vec<u8>,
680}
681
682impl<'a> StreamingPackReader<'a> {
683 fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>) -> Self {
684 Self {
685 inner,
686 pack_hasher: Sha1::new(),
687 stream_pos: 0,
688 max_input_bytes,
689 pending: Vec::new(),
690 }
691 }
692
693 fn stream_pos(&self) -> usize {
694 self.stream_pos
695 }
696
697 fn enforce_max_input(&self) -> Result<()> {
698 if let Some(limit) = self.max_input_bytes {
699 let pos = u64::try_from(self.stream_pos)
700 .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
701 if pos > limit {
702 return Err(Error::CorruptObject(
703 "pack exceeds maximum allowed size".to_owned(),
704 ));
705 }
706 }
707 Ok(())
708 }
709
710 fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
712 let n = if !self.pending.is_empty() {
713 let take = buf.len().min(self.pending.len());
714 buf[..take].copy_from_slice(&self.pending[..take]);
715 self.pending.drain(..take);
716 take
717 } else {
718 self.inner.read(buf).map_err(Error::Io)?
719 };
720 if n > 0 {
721 self.pack_hasher.update(&buf[..n]);
722 self.stream_pos += n;
723 self.enforce_max_input()?;
724 }
725 Ok(n)
726 }
727
728 fn read_byte(&mut self) -> Result<u8> {
729 let mut b = [0u8; 1];
730 let n = self.read_from_source(&mut b)?;
731 if n == 0 {
732 return Err(Error::CorruptObject(format!(
733 "pack stream truncated (read byte) at offset {}",
734 self.stream_pos
735 )));
736 }
737 Ok(b[0])
738 }
739
740 fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
741 let mut v = vec![0u8; n];
742 let mut got = 0usize;
743 while got < n {
744 let m = self.read_from_source(&mut v[got..n])?;
745 if m == 0 {
746 return Err(Error::CorruptObject(format!(
747 "pack stream truncated (read exact) at offset {}",
748 self.stream_pos
749 )));
750 }
751 got += m;
752 }
753 Ok(v)
754 }
755
756 fn read_u32_be(&mut self) -> Result<u32> {
757 let mut b = [0u8; 4];
758 let mut got = 0usize;
759 while got < 4 {
760 let m = self.read_from_source(&mut b[got..4])?;
761 if m == 0 {
762 return Err(Error::CorruptObject(format!(
763 "pack stream truncated (read u32) at offset {}",
764 self.stream_pos
765 )));
766 }
767 got += m;
768 }
769 Ok(u32::from_be_bytes(b))
770 }
771
772 fn read_type_size(&mut self) -> Result<(u8, usize)> {
773 let c = self.read_byte()?;
774 let type_code = (c >> 4) & 0x7;
775 let mut size = (c & 0x0f) as usize;
776 let mut shift = 4u32;
777 let mut cur = c;
778 while cur & 0x80 != 0 {
779 cur = self.read_byte()?;
780 size |= ((cur & 0x7f) as usize) << shift;
781 shift += 7;
782 }
783 Ok((type_code, size))
784 }
785
786 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
787 let mut c = self.read_byte()?;
788 let mut value = (c & 0x7f) as usize;
789 while c & 0x80 != 0 {
790 c = self.read_byte()?;
791 value = (value + 1) << 7 | (c & 0x7f) as usize;
792 }
793 Ok(value)
794 }
795
796 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
806 if expected_size == 0 {
810 const CHUNK: usize = 64 * 1024;
811 let mut scratch = [0u8; CHUNK];
812 loop {
813 let mut cursor = std::io::Cursor::new(self.pending.as_slice());
814 let mut z = ZlibDecoder::new(&mut cursor);
815 let mut sink = [0u8; 1];
816 match z.read(&mut sink) {
817 Ok(0) => {
818 let consumed = z.total_in() as usize;
819 if consumed > self.pending.len() {
820 return Err(Error::CorruptObject(
821 "zlib total_in exceeds pending buffer".to_owned(),
822 ));
823 }
824 if consumed == 0 {
825 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
826 if n == 0 {
827 return Err(Error::CorruptObject(format!(
828 "pack stream truncated (zlib) at offset {}",
829 self.stream_pos
830 )));
831 }
832 self.pending.extend_from_slice(&scratch[..n]);
833 continue;
834 }
835 self.pack_hasher.update(&self.pending[..consumed]);
836 self.stream_pos += consumed;
837 self.pending.drain(..consumed);
838 self.enforce_max_input()?;
839 return Ok(Vec::new());
840 }
841 Ok(_) => {
842 return Err(Error::CorruptObject(
843 "0-byte packed object inflated to non-empty output".to_owned(),
844 ));
845 }
846 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
847 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
848 if n == 0 {
849 return Err(Error::CorruptObject(format!(
850 "pack stream truncated (zlib) at offset {}",
851 self.stream_pos
852 )));
853 }
854 self.pending.extend_from_slice(&scratch[..n]);
855 }
856 Err(e) => return Err(Error::Zlib(e.to_string())),
857 }
858 }
859 }
860
861 const CHUNK: usize = 64 * 1024;
862 let mut scratch = [0u8; CHUNK];
863
864 let mut out = vec![0u8; expected_size];
865 let mut z = Decompress::new(true);
866 let mut out_pos = 0usize;
867 let mut eof = false;
868 loop {
869 if self.pending.is_empty() && !eof {
870 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
871 if n == 0 {
872 eof = true;
873 } else {
874 self.pending.extend_from_slice(&scratch[..n]);
875 }
876 }
877
878 let flush = if eof && self.pending.is_empty() {
879 FlushDecompress::Finish
880 } else {
881 FlushDecompress::None
882 };
883
884 let before_in = z.total_in();
885 let before_out = z.total_out();
886 let status = z
887 .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
888 .map_err(|e| Error::Zlib(e.to_string()))?;
889 let consumed = (z.total_in() - before_in) as usize;
890 if consumed > self.pending.len() {
891 return Err(Error::CorruptObject(
892 "zlib consumed more than pending buffer".to_owned(),
893 ));
894 }
895 self.pack_hasher.update(&self.pending[..consumed]);
896 self.stream_pos += consumed;
897 self.pending.drain(..consumed);
898 self.enforce_max_input()?;
899 out_pos += (z.total_out() - before_out) as usize;
900
901 match status {
902 Status::StreamEnd => {
903 if out_pos != expected_size {
904 return Err(Error::CorruptObject(format!(
905 "decompressed size mismatch: got {out_pos}, want {expected_size}"
906 )));
907 }
908 return Ok(out);
909 }
910 Status::Ok | Status::BufError => {
911 if consumed == 0 && !eof {
912 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
913 if n == 0 {
914 eof = true;
915 } else {
916 self.pending.extend_from_slice(&scratch[..n]);
917 }
918 } else if eof && self.pending.is_empty() && out_pos != expected_size {
919 return Err(Error::CorruptObject(format!(
920 "pack stream truncated (zlib) at offset {}",
921 self.stream_pos
922 )));
923 }
924 }
925 }
926 }
927 }
928
929 fn finalize_hasher(
931 &self,
932 ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
933 self.pack_hasher.clone().finalize()
934 }
935
936 fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
938 let mut b = [0u8; 20];
939 if self.pending.len() >= 20 {
940 b.copy_from_slice(&self.pending[..20]);
941 self.pending.drain(..20);
942 self.stream_pos += 20;
943 self.enforce_max_input()?;
944 return Ok(b);
945 }
946 let tail = self.pending.len();
947 if tail > 0 {
948 b[..tail].copy_from_slice(&self.pending[..]);
949 self.pending.clear();
950 }
951 self.inner
952 .read_exact(&mut b[tail..])
953 .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
954 self.stream_pos += 20;
955 self.enforce_max_input()?;
956 Ok(b)
957 }
958}
959
960pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
974 let mut pos = 0usize;
975
976 let src_size = read_delta_varint(delta, &mut pos)?;
977 if src_size != base.len() {
978 return Err(Error::CorruptObject(format!(
979 "delta source size {src_size} != base size {}",
980 base.len()
981 )));
982 }
983 let dest_size = read_delta_varint(delta, &mut pos)?;
984 let mut result = Vec::with_capacity(dest_size);
985
986 while pos < delta.len() {
987 let cmd = delta[pos];
988 pos += 1;
989 if cmd == 0 {
990 return Err(Error::CorruptObject(
991 "reserved opcode 0 in delta stream".to_owned(),
992 ));
993 }
994 if cmd & 0x80 != 0 {
995 let mut offset = 0usize;
998 let mut size = 0usize;
999
1000 macro_rules! maybe_read_byte {
1001 ($flag:expr, $shift:expr, $target:expr) => {
1002 if cmd & $flag != 0 {
1003 let b = *delta.get(pos).ok_or_else(|| {
1004 Error::CorruptObject("truncated delta COPY operand".to_owned())
1005 })?;
1006 pos += 1;
1007 $target |= (b as usize) << $shift;
1008 }
1009 };
1010 }
1011
1012 maybe_read_byte!(0x01, 0, offset);
1013 maybe_read_byte!(0x02, 8, offset);
1014 maybe_read_byte!(0x04, 16, offset);
1015 maybe_read_byte!(0x08, 24, offset);
1016 maybe_read_byte!(0x10, 0, size);
1017 maybe_read_byte!(0x20, 8, size);
1018 maybe_read_byte!(0x40, 16, size);
1019
1020 if size == 0 {
1021 size = 0x10000;
1022 }
1023
1024 let end = offset.checked_add(size).ok_or_else(|| {
1025 Error::CorruptObject("delta COPY range overflows usize".to_owned())
1026 })?;
1027 let chunk = base.get(offset..end).ok_or_else(|| {
1028 Error::CorruptObject(format!(
1029 "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1030 base.len()
1031 ))
1032 })?;
1033 result.extend_from_slice(chunk);
1034 } else {
1035 let n = cmd as usize;
1037 let chunk = delta
1038 .get(pos..pos + n)
1039 .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1040 result.extend_from_slice(chunk);
1041 pos += n;
1042 }
1043 }
1044
1045 if result.len() != dest_size {
1046 return Err(Error::CorruptObject(format!(
1047 "delta produced {} bytes but expected {dest_size}",
1048 result.len()
1049 )));
1050 }
1051
1052 Ok(result)
1053}
1054
1055fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1059 let mut value = 0usize;
1060 let mut shift = 0u32;
1061 loop {
1062 let b = *data
1063 .get(*pos)
1064 .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1065 *pos += 1;
1066 value |= ((b & 0x7f) as usize) << shift;
1067 shift += 7;
1068 if b & 0x80 == 0 {
1069 break;
1070 }
1071 }
1072 Ok(value)
1073}
1074
1075#[cfg(test)]
1076mod tests {
1077 use super::*;
1078
1079 fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1082 use flate2::write::ZlibEncoder;
1083 use std::io::Write;
1084
1085 let mut entries: Vec<Vec<u8>> = Vec::new();
1086 for (kind, data) in objects {
1087 let type_code: u8 = match kind {
1088 ObjectKind::Commit => 1,
1089 ObjectKind::Tree => 2,
1090 ObjectKind::Blob => 3,
1091 ObjectKind::Tag => 4,
1092 };
1093 let mut header = Vec::new();
1095 let mut size = data.len();
1096 let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1097 size >>= 4;
1098 if size > 0 {
1099 header.push(first | 0x80);
1100 while size > 0 {
1101 let b = (size & 0x7f) as u8;
1102 size >>= 7;
1103 header.push(if size > 0 { b | 0x80 } else { b });
1104 }
1105 } else {
1106 header.push(first);
1107 }
1108 let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1110 enc.write_all(data).unwrap();
1111 let compressed = enc.finish().unwrap();
1112 let mut entry = header;
1113 entry.extend_from_slice(&compressed);
1114 entries.push(entry);
1115 }
1116
1117 let mut pack = Vec::new();
1119 pack.extend_from_slice(b"PACK");
1120 pack.extend_from_slice(&2u32.to_be_bytes());
1121 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1122 for entry in &entries {
1123 pack.extend_from_slice(entry);
1124 }
1125 let mut hasher = Sha1::new();
1126 hasher.update(&pack);
1127 let digest = hasher.finalize();
1128 pack.extend_from_slice(digest.as_slice());
1129 pack
1130 }
1131
1132 #[test]
1133 fn test_apply_delta_simple() {
1134 let base = b"hello";
1136 let mut delta = Vec::new();
1137 delta.push(5u8);
1139 delta.push(11u8);
1141 delta.push(0x80 | 0x01 | 0x10); delta.push(0u8); delta.push(5u8); delta.push(6u8);
1148 delta.extend_from_slice(b" world");
1149
1150 let result = apply_delta(base, &delta).unwrap();
1151 assert_eq!(result, b"hello world");
1152 }
1153
1154 #[test]
1155 fn test_apply_delta_insert_only() {
1156 let base = b"";
1157 let mut delta = Vec::new();
1158 delta.push(0u8); delta.push(5u8); delta.push(5u8); delta.extend_from_slice(b"hello");
1162
1163 let result = apply_delta(base, &delta).unwrap();
1164 assert_eq!(result, b"hello");
1165 }
1166
1167 #[test]
1168 fn test_apply_delta_copy_only() {
1169 let base = b"abcdef";
1170 let mut delta = Vec::new();
1171 delta.push(6u8); delta.push(3u8); delta.push(0x91u8);
1176 delta.push(2u8); delta.push(3u8); let result = apply_delta(base, &delta).unwrap();
1180 assert_eq!(result, b"cde");
1181 }
1182
1183 #[test]
1184 fn test_apply_delta_size_zero_means_65536() {
1185 let base = vec![0xABu8; 65536];
1187 let mut delta = Vec::new();
1188 delta.push(0x80 | (65536 & 0x7f) as u8); delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); delta.push(((65536 >> 14) & 0x7f) as u8); delta.push(0x80 | (65536 & 0x7f) as u8);
1194 delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1195 delta.push(((65536 >> 14) & 0x7f) as u8);
1196 delta.push(0x80u8);
1199
1200 let result = apply_delta(&base, &delta).unwrap();
1201 assert_eq!(result.len(), 65536);
1202 assert!(result.iter().all(|&b| b == 0xAB));
1203 }
1204
1205 #[test]
1206 fn test_unpack_objects_blobs() {
1207 use tempfile::TempDir;
1208 let tmp = TempDir::new().unwrap();
1209 let objects_dir = tmp.path().join("objects");
1210 std::fs::create_dir_all(&objects_dir).unwrap();
1211 let odb = Odb::new(&objects_dir);
1212
1213 let pack = make_pack(&[
1214 (ObjectKind::Blob, b"hello\n"),
1215 (ObjectKind::Blob, b"world\n"),
1216 ]);
1217
1218 let opts = UnpackOptions::default();
1219 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1220 assert_eq!(count, 2);
1221
1222 let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1224 let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1225 let obj1 = odb.read(&oid1).unwrap();
1226 let obj2 = odb.read(&oid2).unwrap();
1227 assert_eq!(obj1.data, b"hello\n");
1228 assert_eq!(obj2.data, b"world\n");
1229 }
1230
1231 #[test]
1232 fn test_unpack_objects_empty_tree() {
1233 use tempfile::TempDir;
1234 let tmp = TempDir::new().unwrap();
1235 let objects_dir = tmp.path().join("objects");
1236 std::fs::create_dir_all(&objects_dir).unwrap();
1237 let odb = Odb::new(&objects_dir);
1238
1239 let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1240 let opts = UnpackOptions::default();
1241 assert_eq!(
1242 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1243 1
1244 );
1245 let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1246 assert!(odb.exists(&oid));
1247 }
1248
1249 struct ChunkedReader<'a> {
1251 data: &'a [u8],
1252 pos: usize,
1253 max_len: usize,
1254 }
1255
1256 impl io::Read for ChunkedReader<'_> {
1257 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1258 if self.pos >= self.data.len() {
1259 return Ok(0);
1260 }
1261 let take = (self.data.len() - self.pos)
1262 .min(self.max_len)
1263 .min(buf.len());
1264 buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1265 self.pos += take;
1266 Ok(take)
1267 }
1268 }
1269
1270 #[test]
1271 fn test_unpack_objects_chunked_read_matches_full_buffer() {
1272 use tempfile::TempDir;
1273 let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1274 let opts = UnpackOptions::default();
1275 let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1276
1277 let tmp = TempDir::new().unwrap();
1278 let objects_dir = tmp.path().join("objects");
1279 std::fs::create_dir_all(&objects_dir).unwrap();
1280 let odb = Odb::new(&objects_dir);
1281 assert_eq!(
1282 unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1283 1
1284 );
1285 assert!(odb.exists(&oid));
1286
1287 let tmp2 = TempDir::new().unwrap();
1288 let objects_dir2 = tmp2.path().join("objects");
1289 std::fs::create_dir_all(&objects_dir2).unwrap();
1290 let odb2 = Odb::new(&objects_dir2);
1291 let mut chunked = ChunkedReader {
1292 data: pack.as_slice(),
1293 pos: 0,
1294 max_len: 8,
1295 };
1296 assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1297 assert!(odb2.exists(&oid));
1298 }
1299
1300 #[test]
1301 fn test_unpack_objects_dry_run_writes_nothing() {
1302 use tempfile::TempDir;
1303 let tmp = TempDir::new().unwrap();
1304 let objects_dir = tmp.path().join("objects");
1305 std::fs::create_dir_all(&objects_dir).unwrap();
1306 let odb = Odb::new(&objects_dir);
1307
1308 let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1309
1310 let opts = UnpackOptions {
1311 dry_run: true,
1312 quiet: true,
1313 strict: false,
1314 max_input_bytes: None,
1315 };
1316 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1317 assert_eq!(count, 1);
1318
1319 let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1321 assert!(!odb.exists(&oid));
1322 }
1323
1324 #[test]
1325 fn test_unpack_objects_bad_signature() {
1326 use tempfile::TempDir;
1327 let tmp = TempDir::new().unwrap();
1328 let objects_dir = tmp.path().join("objects");
1329 std::fs::create_dir_all(&objects_dir).unwrap();
1330 let odb = Odb::new(&objects_dir);
1331
1332 let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1333 bad.extend_from_slice(&[0u8; 20]);
1334 let opts = UnpackOptions::default();
1335 let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1336 assert!(err.to_string().contains("invalid signature"));
1337 }
1338
1339 #[test]
1340 fn test_unpack_objects_checksum_mismatch() {
1341 use tempfile::TempDir;
1342 let tmp = TempDir::new().unwrap();
1343 let objects_dir = tmp.path().join("objects");
1344 std::fs::create_dir_all(&objects_dir).unwrap();
1345 let odb = Odb::new(&objects_dir);
1346
1347 let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1348 let n = pack.len();
1350 pack[n - 1] ^= 0xFF;
1351
1352 let opts = UnpackOptions::default();
1353 let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1354 assert!(err.to_string().contains("checksum"));
1355 }
1356
1357 #[test]
1358 fn test_apply_delta_source_size_mismatch() {
1359 let base = b"hi";
1360 let delta = [3u8, 2u8, 2u8, b'h', b'i']; let err = apply_delta(base, &delta).unwrap_err();
1362 assert!(err.to_string().contains("source size"));
1363 }
1364}