1use std::borrow::Cow;
13use std::collections::HashMap;
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
21use crate::odb::Odb;
22
23#[derive(Debug, Default)]
25pub struct UnpackOptions {
26 pub dry_run: bool,
28 pub quiet: bool,
30 pub strict: bool,
32}
33
34struct PendingDelta {
36 offset: usize,
39 base_oid: Option<ObjectId>,
41 base_offset: Option<usize>,
43 delta_data: Vec<u8>,
45}
46
47pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
62 const MAX_RETAIN_BYTES: usize = 1024 * 1024;
66
67 let mut rd = StreamingPackReader::new(reader);
68
69 let sig = rd.read_exact_n(4)?;
71 if sig != b"PACK" {
72 return Err(Error::CorruptObject(
73 "not a pack stream: invalid signature".to_owned(),
74 ));
75 }
76 let version = rd.read_u32_be()?;
77 if version != 2 && version != 3 {
78 return Err(Error::CorruptObject(format!(
79 "unsupported pack version {version}"
80 )));
81 }
82 let nr_objects = rd.read_u32_be()? as usize;
83
84 let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
86 let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
88
89 let mut pending: Vec<PendingDelta> = Vec::new();
90 let mut count = 0usize;
91
92 for _ in 0..nr_objects {
93 let obj_offset = rd.stream_pos();
94 let (type_code, size) = rd.read_type_size()?;
95
96 match type_code {
97 1..=4 => {
98 let kind = type_code_to_kind(type_code)?;
99 let data = rd.decompress(size)?;
100 let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
101 let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
102 by_offset.insert(obj_offset, entry.clone());
103 by_oid.insert(oid, entry);
104 count += 1;
105 }
106 6 => {
107 let neg = rd.read_ofs_neg_offset()?;
109 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
110 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
111 })?;
112 let delta_data = rd.decompress(size)?;
113 pending.push(PendingDelta {
114 offset: obj_offset,
115 base_oid: None,
116 base_offset: Some(base_offset),
117 delta_data,
118 });
119 }
120 7 => {
121 let base_bytes = rd.read_exact_n(20)?;
123 let base_oid = ObjectId::from_bytes(&base_bytes)?;
124 let delta_data = rd.decompress(size)?;
125 pending.push(PendingDelta {
126 offset: obj_offset,
127 base_oid: Some(base_oid),
128 base_offset: None,
129 delta_data,
130 });
131 }
132 other => {
133 return Err(Error::CorruptObject(format!(
134 "unknown packed-object type {other}"
135 )))
136 }
137 }
138 }
139
140 let digest = rd.finalize_hasher();
142 let trailing = rd.read_trailer_20()?;
143 if digest.as_slice() != trailing {
144 return Err(Error::CorruptObject(
145 "pack trailing checksum mismatch".to_owned(),
146 ));
147 }
148
149 let mut remaining = pending;
152 loop {
153 if remaining.is_empty() {
154 break;
155 }
156 let before = remaining.len();
157 let mut still_pending: Vec<PendingDelta> = Vec::new();
158
159 for delta in remaining {
160 let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
161 if let Some(base_off) = delta.base_offset {
162 by_offset
163 .get(&base_off)
164 .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
165 } else if let Some(ref base_id) = delta.base_oid {
166 if let Some(e) = by_oid.get(base_id) {
167 Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
168 } else if !opts.dry_run {
169 odb.read(base_id)
170 .ok()
171 .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
172 } else {
173 None
174 }
175 } else {
176 None
177 };
178
179 match base_res {
180 Some(Ok((base_kind, base_data))) => {
181 let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
182 let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
183 let new_entry = packed_entry_after_write(
184 base_kind,
185 result,
186 oid,
187 odb,
188 opts,
189 MAX_RETAIN_BYTES,
190 );
191 by_offset.insert(delta.offset, new_entry.clone());
192 by_oid.insert(oid, new_entry);
193 count += 1;
194 }
195 Some(Err(e)) => return Err(e),
196 None => still_pending.push(delta),
197 }
198 }
199
200 remaining = still_pending;
201 if remaining.len() == before {
202 return Err(Error::CorruptObject(format!(
203 "{} delta(s) could not be resolved",
204 remaining.len()
205 )));
206 }
207 }
208
209 if opts.strict {
210 strict_verify_packed_references_map(Some(odb), &by_oid)?;
211 }
212
213 Ok(count)
214}
215
216#[derive(Debug, Clone)]
218enum PackedObjectEntry {
219 InMemory { kind: ObjectKind, data: Vec<u8> },
220 BlobOnDisk { oid: ObjectId },
221}
222
223impl PackedObjectEntry {
224 fn kind(&self) -> ObjectKind {
225 match self {
226 PackedObjectEntry::InMemory { kind, .. } => *kind,
227 PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
228 }
229 }
230}
231
232fn packed_entry_after_write(
233 kind: ObjectKind,
234 data: Vec<u8>,
235 oid: ObjectId,
236 _odb: &Odb,
237 opts: &UnpackOptions,
238 max_retain: usize,
239) -> PackedObjectEntry {
240 if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
241 PackedObjectEntry::BlobOnDisk { oid }
242 } else {
243 PackedObjectEntry::InMemory { kind, data }
244 }
245}
246
247fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
248 match entry {
249 PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
250 PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
251 }
252}
253
254fn strict_verify_packed_references_map(
255 odb: Option<&Odb>,
256 pack: &HashMap<ObjectId, PackedObjectEntry>,
257) -> Result<()> {
258 for entry in pack.values() {
259 match entry {
260 PackedObjectEntry::BlobOnDisk { .. } => {}
261 PackedObjectEntry::InMemory { kind, data } => match kind {
262 ObjectKind::Tree => {
263 for e in parse_tree(data)? {
264 if !strict_ref_resolves_map(&e.oid, pack, odb) {
265 return Err(Error::CorruptObject(format!(
266 "strict: missing object {} referenced by tree",
267 e.oid.to_hex()
268 )));
269 }
270 }
271 }
272 ObjectKind::Commit => {
273 let c = parse_commit(data)?;
274 if !strict_ref_resolves_map(&c.tree, pack, odb) {
275 return Err(Error::CorruptObject(format!(
276 "strict: missing tree {} referenced by commit",
277 c.tree.to_hex()
278 )));
279 }
280 for p in &c.parents {
281 if !strict_ref_resolves_map(p, pack, odb) {
282 return Err(Error::CorruptObject(format!(
283 "strict: missing parent {} referenced by commit",
284 p.to_hex()
285 )));
286 }
287 }
288 }
289 ObjectKind::Tag => {
290 let t = parse_tag(data)?;
291 if !strict_ref_resolves_map(&t.object, pack, odb) {
292 return Err(Error::CorruptObject(format!(
293 "strict: missing object {} referenced by tag",
294 t.object.to_hex()
295 )));
296 }
297 }
298 ObjectKind::Blob => {}
299 },
300 }
301 }
302 Ok(())
303}
304
305fn strict_ref_resolves_map(
306 oid: &ObjectId,
307 pack: &HashMap<ObjectId, PackedObjectEntry>,
308 odb: Option<&Odb>,
309) -> bool {
310 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
311}
312
313fn strict_ref_resolves(
314 oid: &ObjectId,
315 pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
316 odb: Option<&Odb>,
317) -> bool {
318 pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
319}
320
321pub fn strict_verify_packed_references(
327 odb: Option<&Odb>,
328 pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
329) -> Result<()> {
330 for (kind, data) in pack.values() {
331 match kind {
332 ObjectKind::Tree => {
333 for e in parse_tree(data)? {
334 if !strict_ref_resolves(&e.oid, pack, odb) {
335 return Err(Error::CorruptObject(format!(
336 "strict: missing object {} referenced by tree",
337 e.oid.to_hex()
338 )));
339 }
340 }
341 }
342 ObjectKind::Commit => {
343 let c = parse_commit(data)?;
344 if !strict_ref_resolves(&c.tree, pack, odb) {
345 return Err(Error::CorruptObject(format!(
346 "strict: missing tree {} referenced by commit",
347 c.tree.to_hex()
348 )));
349 }
350 for p in &c.parents {
351 if !strict_ref_resolves(p, pack, odb) {
352 return Err(Error::CorruptObject(format!(
353 "strict: missing parent {} referenced by commit",
354 p.to_hex()
355 )));
356 }
357 }
358 }
359 ObjectKind::Tag => {
360 let t = parse_tag(data)?;
361 if !strict_ref_resolves(&t.object, pack, odb) {
362 return Err(Error::CorruptObject(format!(
363 "strict: missing object {} referenced by tag",
364 t.object.to_hex()
365 )));
366 }
367 }
368 ObjectKind::Blob => {}
369 }
370 }
371 Ok(())
372}
373
374pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
381 let rd = PackReader::new(data.to_vec());
382 build_pack_object_map(rd, odb)
383}
384
385fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
386 let sig = rd.read_exact(4)?;
387 if sig != b"PACK" {
388 return Err(Error::CorruptObject(
389 "not a pack stream: invalid signature".to_owned(),
390 ));
391 }
392 let version = rd.read_u32_be()?;
393 if version != 2 && version != 3 {
394 return Err(Error::CorruptObject(format!(
395 "unsupported pack version {version}"
396 )));
397 }
398 let nr_objects = rd.read_u32_be()? as usize;
399
400 let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
401 let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
402 let mut pending: Vec<PendingDelta> = Vec::new();
403
404 fn base_from_pack_or_odb(
405 by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
406 odb: &Odb,
407 id: &ObjectId,
408 ) -> Option<(ObjectKind, Vec<u8>)> {
409 if let Some(e) = by_oid.get(id) {
410 return Some(e.clone());
411 }
412 odb.read(id).ok().map(|o| (o.kind, o.data))
413 }
414
415 for _ in 0..nr_objects {
416 let obj_offset = rd.pos;
417 let (type_code, size) = rd.read_type_size()?;
418
419 match type_code {
420 1..=4 => {
421 let kind = type_code_to_kind(type_code)?;
422 let data = rd.decompress(size)?;
423 let oid = Odb::hash_object_data(kind, &data);
424 by_offset.insert(obj_offset, (kind, data.clone()));
425 by_oid.insert(oid, (kind, data));
426 }
427 6 => {
428 let neg = rd.read_ofs_neg_offset()?;
429 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
430 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
431 })?;
432 let delta_data = rd.decompress(size)?;
433 pending.push(PendingDelta {
434 offset: obj_offset,
435 base_oid: None,
436 base_offset: Some(base_offset),
437 delta_data,
438 });
439 }
440 7 => {
441 let base_bytes = rd.read_exact(20)?;
442 let base_oid = ObjectId::from_bytes(base_bytes)?;
443 let delta_data = rd.decompress(size)?;
444 pending.push(PendingDelta {
445 offset: obj_offset,
446 base_oid: Some(base_oid),
447 base_offset: None,
448 delta_data,
449 });
450 }
451 other => {
452 return Err(Error::CorruptObject(format!(
453 "unknown packed-object type {other}"
454 )))
455 }
456 }
457 }
458
459 let consumed = rd.pos;
460 {
461 let mut hasher = Sha1::new();
462 hasher.update(&rd.data[..consumed]);
463 let digest = hasher.finalize();
464 let trailing = rd.read_exact(20)?;
465 if digest.as_slice() != trailing {
466 return Err(Error::CorruptObject(
467 "pack trailing checksum mismatch".to_owned(),
468 ));
469 }
470 }
471
472 let mut remaining = pending;
473 loop {
474 if remaining.is_empty() {
475 break;
476 }
477 let before = remaining.len();
478 let mut still_pending: Vec<PendingDelta> = Vec::new();
479
480 for delta in remaining {
481 let base = if let Some(base_off) = delta.base_offset {
482 by_offset.get(&base_off).cloned()
483 } else if let Some(ref base_id) = delta.base_oid {
484 base_from_pack_or_odb(&by_oid, odb, base_id)
485 } else {
486 None
487 };
488
489 if let Some((base_kind, base_data)) = base {
490 let result = apply_delta(&base_data, &delta.delta_data)?;
491 let oid = Odb::hash_object_data(base_kind, &result);
492 by_offset.insert(delta.offset, (base_kind, result.clone()));
493 by_oid.insert(oid, (base_kind, result));
494 } else {
495 still_pending.push(delta);
496 }
497 }
498
499 remaining = still_pending;
500 if remaining.len() == before {
501 return Err(Error::CorruptObject(format!(
502 "{} delta(s) could not be resolved",
503 remaining.len()
504 )));
505 }
506 }
507
508 Ok(by_oid
509 .into_iter()
510 .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
511 .collect())
512}
513
514fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
517 if dry_run {
518 Ok(Odb::hash_object_data(kind, data))
519 } else {
520 odb.write(kind, data)
521 }
522}
523
524fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
526 match code {
527 1 => Ok(ObjectKind::Commit),
528 2 => Ok(ObjectKind::Tree),
529 3 => Ok(ObjectKind::Blob),
530 4 => Ok(ObjectKind::Tag),
531 _ => Err(Error::CorruptObject(format!(
532 "type code {code} is not a regular object type"
533 ))),
534 }
535}
536
537struct PackReader {
539 data: Vec<u8>,
540 pos: usize,
541}
542
543impl PackReader {
544 fn new(data: Vec<u8>) -> Self {
545 Self { data, pos: 0 }
546 }
547
548 fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
551 if self.pos + n > self.data.len() {
552 return Err(Error::CorruptObject(format!(
553 "pack stream truncated: need {n} bytes at offset {}",
554 self.pos
555 )));
556 }
557 let slice = &self.data[self.pos..self.pos + n];
558 self.pos += n;
559 Ok(slice)
560 }
561
562 fn read_byte(&mut self) -> Result<u8> {
564 if self.pos >= self.data.len() {
565 return Err(Error::CorruptObject(
566 "unexpected end of pack stream".to_owned(),
567 ));
568 }
569 let b = self.data[self.pos];
570 self.pos += 1;
571 Ok(b)
572 }
573
574 fn read_u32_be(&mut self) -> Result<u32> {
576 let bytes = self.read_exact(4)?;
577 Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
578 Error::CorruptObject("u32 read failed".to_owned())
579 })?))
580 }
581
582 fn read_type_size(&mut self) -> Result<(u8, usize)> {
587 let c = self.read_byte()?;
588 let type_code = (c >> 4) & 0x7;
589 let mut size = (c & 0x0f) as usize;
590 let mut shift = 4u32;
591 let mut cur = c;
592 while cur & 0x80 != 0 {
593 cur = self.read_byte()?;
594 size |= ((cur & 0x7f) as usize) << shift;
595 shift += 7;
596 }
597 Ok((type_code, size))
598 }
599
600 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
605 let mut c = self.read_byte()?;
606 let mut value = (c & 0x7f) as usize;
607 while c & 0x80 != 0 {
608 c = self.read_byte()?;
609 value = (value + 1) << 7 | (c & 0x7f) as usize;
610 }
611 Ok(value)
612 }
613
614 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
619 let slice = &self.data[self.pos..];
620 let mut decoder = ZlibDecoder::new(slice);
621 let mut out = Vec::with_capacity(expected_size);
622 decoder
623 .read_to_end(&mut out)
624 .map_err(|e| Error::Zlib(e.to_string()))?;
625 if out.len() != expected_size {
626 return Err(Error::CorruptObject(format!(
627 "decompressed {} bytes but expected {}",
628 out.len(),
629 expected_size
630 )));
631 }
632 self.pos += decoder.total_in() as usize;
633 Ok(out)
634 }
635}
636
637fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
638 if e.kind() == io::ErrorKind::UnexpectedEof {
639 Error::CorruptObject(format!(
640 "pack stream truncated ({context}) at offset {stream_pos}"
641 ))
642 } else {
643 Error::Io(e)
644 }
645}
646
647struct StreamingPackReader<'a> {
653 inner: &'a mut dyn Read,
654 pack_hasher: Sha1,
655 stream_pos: usize,
656 pending: Vec<u8>,
659}
660
661impl<'a> StreamingPackReader<'a> {
662 fn new(inner: &'a mut dyn Read) -> Self {
663 Self {
664 inner,
665 pack_hasher: Sha1::new(),
666 stream_pos: 0,
667 pending: Vec::new(),
668 }
669 }
670
671 fn stream_pos(&self) -> usize {
672 self.stream_pos
673 }
674
675 fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
677 let n = if !self.pending.is_empty() {
678 let take = buf.len().min(self.pending.len());
679 buf[..take].copy_from_slice(&self.pending[..take]);
680 self.pending.drain(..take);
681 take
682 } else {
683 self.inner.read(buf).map_err(Error::Io)?
684 };
685 if n > 0 {
686 self.pack_hasher.update(&buf[..n]);
687 self.stream_pos += n;
688 }
689 Ok(n)
690 }
691
692 fn read_byte(&mut self) -> Result<u8> {
693 let mut b = [0u8; 1];
694 let n = self.read_from_source(&mut b)?;
695 if n == 0 {
696 return Err(Error::CorruptObject(format!(
697 "pack stream truncated (read byte) at offset {}",
698 self.stream_pos
699 )));
700 }
701 Ok(b[0])
702 }
703
704 fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
705 let mut v = vec![0u8; n];
706 let mut got = 0usize;
707 while got < n {
708 let m = self.read_from_source(&mut v[got..n])?;
709 if m == 0 {
710 return Err(Error::CorruptObject(format!(
711 "pack stream truncated (read exact) at offset {}",
712 self.stream_pos
713 )));
714 }
715 got += m;
716 }
717 Ok(v)
718 }
719
720 fn read_u32_be(&mut self) -> Result<u32> {
721 let mut b = [0u8; 4];
722 let mut got = 0usize;
723 while got < 4 {
724 let m = self.read_from_source(&mut b[got..4])?;
725 if m == 0 {
726 return Err(Error::CorruptObject(format!(
727 "pack stream truncated (read u32) at offset {}",
728 self.stream_pos
729 )));
730 }
731 got += m;
732 }
733 Ok(u32::from_be_bytes(b))
734 }
735
736 fn read_type_size(&mut self) -> Result<(u8, usize)> {
737 let c = self.read_byte()?;
738 let type_code = (c >> 4) & 0x7;
739 let mut size = (c & 0x0f) as usize;
740 let mut shift = 4u32;
741 let mut cur = c;
742 while cur & 0x80 != 0 {
743 cur = self.read_byte()?;
744 size |= ((cur & 0x7f) as usize) << shift;
745 shift += 7;
746 }
747 Ok((type_code, size))
748 }
749
750 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
751 let mut c = self.read_byte()?;
752 let mut value = (c & 0x7f) as usize;
753 while c & 0x80 != 0 {
754 c = self.read_byte()?;
755 value = (value + 1) << 7 | (c & 0x7f) as usize;
756 }
757 Ok(value)
758 }
759
760 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
766 const CHUNK: usize = 64 * 1024;
767 let mut scratch = [0u8; CHUNK];
768 let mut out = vec![0u8; expected_size];
769 loop {
770 let mut cursor = std::io::Cursor::new(self.pending.as_slice());
771 let mut z = ZlibDecoder::new(&mut cursor);
772 match z.read_exact(&mut out) {
773 Ok(()) => {
774 let consumed = z.total_in() as usize;
775 if consumed > self.pending.len() {
776 return Err(Error::CorruptObject(
777 "zlib total_in exceeds pending buffer".to_owned(),
778 ));
779 }
780 self.pack_hasher.update(&self.pending[..consumed]);
781 self.stream_pos += consumed;
782 self.pending.drain(..consumed);
783 return Ok(out);
784 }
785 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
786 let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
787 if n == 0 {
788 return Err(Error::CorruptObject(format!(
789 "pack stream truncated (zlib) at offset {}",
790 self.stream_pos
791 )));
792 }
793 self.pending.extend_from_slice(&scratch[..n]);
794 }
795 Err(e) => return Err(Error::Zlib(e.to_string())),
796 }
797 }
798 }
799
800 fn finalize_hasher(
802 &self,
803 ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
804 self.pack_hasher.clone().finalize()
805 }
806
807 fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
809 let mut b = [0u8; 20];
810 if self.pending.len() >= 20 {
811 b.copy_from_slice(&self.pending[..20]);
812 self.pending.drain(..20);
813 self.stream_pos += 20;
814 return Ok(b);
815 }
816 let tail = self.pending.len();
817 if tail > 0 {
818 b[..tail].copy_from_slice(&self.pending[..]);
819 self.pending.clear();
820 }
821 self.inner
822 .read_exact(&mut b[tail..])
823 .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
824 self.stream_pos += 20;
825 Ok(b)
826 }
827}
828
829pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
843 let mut pos = 0usize;
844
845 let src_size = read_delta_varint(delta, &mut pos)?;
846 if src_size != base.len() {
847 return Err(Error::CorruptObject(format!(
848 "delta source size {src_size} != base size {}",
849 base.len()
850 )));
851 }
852 let dest_size = read_delta_varint(delta, &mut pos)?;
853 let mut result = Vec::with_capacity(dest_size);
854
855 while pos < delta.len() {
856 let cmd = delta[pos];
857 pos += 1;
858 if cmd == 0 {
859 return Err(Error::CorruptObject(
860 "reserved opcode 0 in delta stream".to_owned(),
861 ));
862 }
863 if cmd & 0x80 != 0 {
864 let mut offset = 0usize;
867 let mut size = 0usize;
868
869 macro_rules! maybe_read_byte {
870 ($flag:expr, $shift:expr, $target:expr) => {
871 if cmd & $flag != 0 {
872 let b = *delta.get(pos).ok_or_else(|| {
873 Error::CorruptObject("truncated delta COPY operand".to_owned())
874 })?;
875 pos += 1;
876 $target |= (b as usize) << $shift;
877 }
878 };
879 }
880
881 maybe_read_byte!(0x01, 0, offset);
882 maybe_read_byte!(0x02, 8, offset);
883 maybe_read_byte!(0x04, 16, offset);
884 maybe_read_byte!(0x08, 24, offset);
885 maybe_read_byte!(0x10, 0, size);
886 maybe_read_byte!(0x20, 8, size);
887 maybe_read_byte!(0x40, 16, size);
888
889 if size == 0 {
890 size = 0x10000;
891 }
892
893 let end = offset.checked_add(size).ok_or_else(|| {
894 Error::CorruptObject("delta COPY range overflows usize".to_owned())
895 })?;
896 let chunk = base.get(offset..end).ok_or_else(|| {
897 Error::CorruptObject(format!(
898 "delta COPY [{offset},{end}) out of range (base is {} bytes)",
899 base.len()
900 ))
901 })?;
902 result.extend_from_slice(chunk);
903 } else {
904 let n = cmd as usize;
906 let chunk = delta
907 .get(pos..pos + n)
908 .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
909 result.extend_from_slice(chunk);
910 pos += n;
911 }
912 }
913
914 if result.len() != dest_size {
915 return Err(Error::CorruptObject(format!(
916 "delta produced {} bytes but expected {dest_size}",
917 result.len()
918 )));
919 }
920
921 Ok(result)
922}
923
924fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
928 let mut value = 0usize;
929 let mut shift = 0u32;
930 loop {
931 let b = *data
932 .get(*pos)
933 .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
934 *pos += 1;
935 value |= ((b & 0x7f) as usize) << shift;
936 shift += 7;
937 if b & 0x80 == 0 {
938 break;
939 }
940 }
941 Ok(value)
942}
943
944#[cfg(test)]
945mod tests {
946 use super::*;
947
948 fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
951 use flate2::write::ZlibEncoder;
952 use std::io::Write;
953
954 let mut entries: Vec<Vec<u8>> = Vec::new();
955 for (kind, data) in objects {
956 let type_code: u8 = match kind {
957 ObjectKind::Commit => 1,
958 ObjectKind::Tree => 2,
959 ObjectKind::Blob => 3,
960 ObjectKind::Tag => 4,
961 };
962 let mut header = Vec::new();
964 let mut size = data.len();
965 let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
966 size >>= 4;
967 if size > 0 {
968 header.push(first | 0x80);
969 while size > 0 {
970 let b = (size & 0x7f) as u8;
971 size >>= 7;
972 header.push(if size > 0 { b | 0x80 } else { b });
973 }
974 } else {
975 header.push(first);
976 }
977 let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
979 enc.write_all(data).unwrap();
980 let compressed = enc.finish().unwrap();
981 let mut entry = header;
982 entry.extend_from_slice(&compressed);
983 entries.push(entry);
984 }
985
986 let mut pack = Vec::new();
988 pack.extend_from_slice(b"PACK");
989 pack.extend_from_slice(&2u32.to_be_bytes());
990 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
991 for entry in &entries {
992 pack.extend_from_slice(entry);
993 }
994 let mut hasher = Sha1::new();
995 hasher.update(&pack);
996 let digest = hasher.finalize();
997 pack.extend_from_slice(digest.as_slice());
998 pack
999 }
1000
1001 #[test]
1002 fn test_apply_delta_simple() {
1003 let base = b"hello";
1005 let mut delta = Vec::new();
1006 delta.push(5u8);
1008 delta.push(11u8);
1010 delta.push(0x80 | 0x01 | 0x10); delta.push(0u8); delta.push(5u8); delta.push(6u8);
1017 delta.extend_from_slice(b" world");
1018
1019 let result = apply_delta(base, &delta).unwrap();
1020 assert_eq!(result, b"hello world");
1021 }
1022
1023 #[test]
1024 fn test_apply_delta_insert_only() {
1025 let base = b"";
1026 let mut delta = Vec::new();
1027 delta.push(0u8); delta.push(5u8); delta.push(5u8); delta.extend_from_slice(b"hello");
1031
1032 let result = apply_delta(base, &delta).unwrap();
1033 assert_eq!(result, b"hello");
1034 }
1035
1036 #[test]
1037 fn test_apply_delta_copy_only() {
1038 let base = b"abcdef";
1039 let mut delta = Vec::new();
1040 delta.push(6u8); delta.push(3u8); delta.push(0x91u8);
1045 delta.push(2u8); delta.push(3u8); let result = apply_delta(base, &delta).unwrap();
1049 assert_eq!(result, b"cde");
1050 }
1051
1052 #[test]
1053 fn test_apply_delta_size_zero_means_65536() {
1054 let base = vec![0xABu8; 65536];
1056 let mut delta = Vec::new();
1057 delta.push(0x80 | (65536 & 0x7f) as u8); delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); delta.push(((65536 >> 14) & 0x7f) as u8); delta.push(0x80 | (65536 & 0x7f) as u8);
1063 delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1064 delta.push(((65536 >> 14) & 0x7f) as u8);
1065 delta.push(0x80u8);
1068
1069 let result = apply_delta(&base, &delta).unwrap();
1070 assert_eq!(result.len(), 65536);
1071 assert!(result.iter().all(|&b| b == 0xAB));
1072 }
1073
1074 #[test]
1075 fn test_unpack_objects_blobs() {
1076 use tempfile::TempDir;
1077 let tmp = TempDir::new().unwrap();
1078 let objects_dir = tmp.path().join("objects");
1079 std::fs::create_dir_all(&objects_dir).unwrap();
1080 let odb = Odb::new(&objects_dir);
1081
1082 let pack = make_pack(&[
1083 (ObjectKind::Blob, b"hello\n"),
1084 (ObjectKind::Blob, b"world\n"),
1085 ]);
1086
1087 let opts = UnpackOptions::default();
1088 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1089 assert_eq!(count, 2);
1090
1091 let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1093 let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1094 let obj1 = odb.read(&oid1).unwrap();
1095 let obj2 = odb.read(&oid2).unwrap();
1096 assert_eq!(obj1.data, b"hello\n");
1097 assert_eq!(obj2.data, b"world\n");
1098 }
1099
1100 #[test]
1101 fn test_unpack_objects_dry_run_writes_nothing() {
1102 use tempfile::TempDir;
1103 let tmp = TempDir::new().unwrap();
1104 let objects_dir = tmp.path().join("objects");
1105 std::fs::create_dir_all(&objects_dir).unwrap();
1106 let odb = Odb::new(&objects_dir);
1107
1108 let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1109
1110 let opts = UnpackOptions {
1111 dry_run: true,
1112 quiet: true,
1113 strict: false,
1114 };
1115 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1116 assert_eq!(count, 1);
1117
1118 let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1120 assert!(!odb.exists(&oid));
1121 }
1122
1123 #[test]
1124 fn test_unpack_objects_bad_signature() {
1125 use tempfile::TempDir;
1126 let tmp = TempDir::new().unwrap();
1127 let objects_dir = tmp.path().join("objects");
1128 std::fs::create_dir_all(&objects_dir).unwrap();
1129 let odb = Odb::new(&objects_dir);
1130
1131 let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1132 bad.extend_from_slice(&[0u8; 20]);
1133 let opts = UnpackOptions::default();
1134 let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1135 assert!(err.to_string().contains("invalid signature"));
1136 }
1137
1138 #[test]
1139 fn test_unpack_objects_checksum_mismatch() {
1140 use tempfile::TempDir;
1141 let tmp = TempDir::new().unwrap();
1142 let objects_dir = tmp.path().join("objects");
1143 std::fs::create_dir_all(&objects_dir).unwrap();
1144 let odb = Odb::new(&objects_dir);
1145
1146 let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1147 let n = pack.len();
1149 pack[n - 1] ^= 0xFF;
1150
1151 let opts = UnpackOptions::default();
1152 let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1153 assert!(err.to_string().contains("checksum"));
1154 }
1155
1156 #[test]
1157 fn test_apply_delta_source_size_mismatch() {
1158 let base = b"hi";
1159 let delta = [3u8, 2u8, 2u8, b'h', b'i']; let err = apply_delta(base, &delta).unwrap_err();
1161 assert!(err.to_string().contains("source size"));
1162 }
1163}