1use std::collections::HashMap;
9use std::io::Read;
10
11use flate2::read::ZlibDecoder;
12use sha1::{Digest, Sha1};
13
14use crate::error::{Error, Result};
15use crate::objects::{ObjectId, ObjectKind};
16use crate::odb::Odb;
17
18#[derive(Debug, Default)]
20pub struct UnpackOptions {
21 pub dry_run: bool,
23 pub quiet: bool,
25}
26
27struct PendingDelta {
29 offset: usize,
32 base_oid: Option<ObjectId>,
34 base_offset: Option<usize>,
36 delta_data: Vec<u8>,
38}
39
40pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
55 let mut raw = Vec::new();
56 reader.read_to_end(&mut raw).map_err(Error::Io)?;
57
58 let mut rd = PackReader::new(raw);
59
60 let sig = rd.read_exact(4)?;
62 if sig != b"PACK" {
63 return Err(Error::CorruptObject(
64 "not a pack stream: invalid signature".to_owned(),
65 ));
66 }
67 let version = rd.read_u32_be()?;
68 if version != 2 && version != 3 {
69 return Err(Error::CorruptObject(format!(
70 "unsupported pack version {version}"
71 )));
72 }
73 let nr_objects = rd.read_u32_be()? as usize;
74
75 let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
79 let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
81
82 let mut pending: Vec<PendingDelta> = Vec::new();
83 let mut count = 0usize;
84
85 for _ in 0..nr_objects {
86 let obj_offset = rd.pos;
87 let (type_code, size) = rd.read_type_size()?;
88
89 match type_code {
90 1..=4 => {
91 let kind = type_code_to_kind(type_code)?;
92 let data = rd.decompress(size)?;
93 let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
94 by_offset.insert(obj_offset, (kind, data.clone()));
95 by_oid.insert(oid, (kind, data));
96 count += 1;
97 }
98 6 => {
99 let neg = rd.read_ofs_neg_offset()?;
101 let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
102 Error::CorruptObject("ofs-delta base offset underflow".to_owned())
103 })?;
104 let delta_data = rd.decompress(size)?;
105 pending.push(PendingDelta {
106 offset: obj_offset,
107 base_oid: None,
108 base_offset: Some(base_offset),
109 delta_data,
110 });
111 }
112 7 => {
113 let base_bytes = rd.read_exact(20)?;
115 let base_oid = ObjectId::from_bytes(base_bytes)?;
116 let delta_data = rd.decompress(size)?;
117 pending.push(PendingDelta {
118 offset: obj_offset,
119 base_oid: Some(base_oid),
120 base_offset: None,
121 delta_data,
122 });
123 }
124 other => {
125 return Err(Error::CorruptObject(format!(
126 "unknown packed-object type {other}"
127 )))
128 }
129 }
130 }
131
132 let consumed = rd.pos;
134 {
135 let mut hasher = Sha1::new();
136 hasher.update(&rd.data[..consumed]);
137 let digest = hasher.finalize();
138 let trailing = rd.read_exact(20)?;
139 if digest.as_slice() != trailing {
140 return Err(Error::CorruptObject(
141 "pack trailing checksum mismatch".to_owned(),
142 ));
143 }
144 }
145
146 let mut remaining = pending;
149 loop {
150 if remaining.is_empty() {
151 break;
152 }
153 let before = remaining.len();
154 let mut still_pending: Vec<PendingDelta> = Vec::new();
155
156 for delta in remaining {
157 let base = if let Some(base_off) = delta.base_offset {
158 by_offset.get(&base_off).cloned()
159 } else if let Some(ref base_id) = delta.base_oid {
160 if let Some(entry) = by_oid.get(base_id) {
161 Some(entry.clone())
162 } else if !opts.dry_run {
163 odb.read(base_id).ok().map(|obj| (obj.kind, obj.data))
164 } else {
165 None
166 }
167 } else {
168 None
169 };
170
171 if let Some((base_kind, base_data)) = base {
172 let result = apply_delta(&base_data, &delta.delta_data)?;
173 let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
174 by_offset.insert(delta.offset, (base_kind, result.clone()));
175 by_oid.insert(oid, (base_kind, result));
176 count += 1;
177 } else {
178 still_pending.push(delta);
179 }
180 }
181
182 remaining = still_pending;
183 if remaining.len() == before {
184 return Err(Error::CorruptObject(format!(
185 "{} delta(s) could not be resolved",
186 remaining.len()
187 )));
188 }
189 }
190
191 Ok(count)
192}
193
194fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
197 if dry_run {
198 Ok(Odb::hash_object_data(kind, data))
199 } else {
200 odb.write(kind, data)
201 }
202}
203
204fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
206 match code {
207 1 => Ok(ObjectKind::Commit),
208 2 => Ok(ObjectKind::Tree),
209 3 => Ok(ObjectKind::Blob),
210 4 => Ok(ObjectKind::Tag),
211 _ => Err(Error::CorruptObject(format!(
212 "type code {code} is not a regular object type"
213 ))),
214 }
215}
216
217struct PackReader {
219 data: Vec<u8>,
220 pos: usize,
221}
222
223impl PackReader {
224 fn new(data: Vec<u8>) -> Self {
225 Self { data, pos: 0 }
226 }
227
228 fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
231 if self.pos + n > self.data.len() {
232 return Err(Error::CorruptObject(format!(
233 "pack stream truncated: need {n} bytes at offset {}",
234 self.pos
235 )));
236 }
237 let slice = &self.data[self.pos..self.pos + n];
238 self.pos += n;
239 Ok(slice)
240 }
241
242 fn read_byte(&mut self) -> Result<u8> {
244 if self.pos >= self.data.len() {
245 return Err(Error::CorruptObject(
246 "unexpected end of pack stream".to_owned(),
247 ));
248 }
249 let b = self.data[self.pos];
250 self.pos += 1;
251 Ok(b)
252 }
253
254 fn read_u32_be(&mut self) -> Result<u32> {
256 let bytes = self.read_exact(4)?;
257 Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
258 Error::CorruptObject("u32 read failed".to_owned())
259 })?))
260 }
261
262 fn read_type_size(&mut self) -> Result<(u8, usize)> {
267 let c = self.read_byte()?;
268 let type_code = (c >> 4) & 0x7;
269 let mut size = (c & 0x0f) as usize;
270 let mut shift = 4u32;
271 let mut cur = c;
272 while cur & 0x80 != 0 {
273 cur = self.read_byte()?;
274 size |= ((cur & 0x7f) as usize) << shift;
275 shift += 7;
276 }
277 Ok((type_code, size))
278 }
279
280 fn read_ofs_neg_offset(&mut self) -> Result<usize> {
285 let mut c = self.read_byte()?;
286 let mut value = (c & 0x7f) as usize;
287 while c & 0x80 != 0 {
288 c = self.read_byte()?;
289 value = (value + 1) << 7 | (c & 0x7f) as usize;
290 }
291 Ok(value)
292 }
293
294 fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
299 let slice = &self.data[self.pos..];
300 let mut decoder = ZlibDecoder::new(slice);
301 let mut out = Vec::with_capacity(expected_size);
302 decoder
303 .read_to_end(&mut out)
304 .map_err(|e| Error::Zlib(e.to_string()))?;
305 if out.len() != expected_size {
306 return Err(Error::CorruptObject(format!(
307 "decompressed {} bytes but expected {}",
308 out.len(),
309 expected_size
310 )));
311 }
312 self.pos += decoder.total_in() as usize;
313 Ok(out)
314 }
315}
316
317pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
331 let mut pos = 0usize;
332
333 let src_size = read_delta_varint(delta, &mut pos)?;
334 if src_size != base.len() {
335 return Err(Error::CorruptObject(format!(
336 "delta source size {src_size} != base size {}",
337 base.len()
338 )));
339 }
340 let dest_size = read_delta_varint(delta, &mut pos)?;
341 let mut result = Vec::with_capacity(dest_size);
342
343 while pos < delta.len() {
344 let cmd = delta[pos];
345 pos += 1;
346 if cmd == 0 {
347 return Err(Error::CorruptObject(
348 "reserved opcode 0 in delta stream".to_owned(),
349 ));
350 }
351 if cmd & 0x80 != 0 {
352 let mut offset = 0usize;
355 let mut size = 0usize;
356
357 macro_rules! maybe_read_byte {
358 ($flag:expr, $shift:expr, $target:expr) => {
359 if cmd & $flag != 0 {
360 let b = *delta.get(pos).ok_or_else(|| {
361 Error::CorruptObject("truncated delta COPY operand".to_owned())
362 })?;
363 pos += 1;
364 $target |= (b as usize) << $shift;
365 }
366 };
367 }
368
369 maybe_read_byte!(0x01, 0, offset);
370 maybe_read_byte!(0x02, 8, offset);
371 maybe_read_byte!(0x04, 16, offset);
372 maybe_read_byte!(0x08, 24, offset);
373 maybe_read_byte!(0x10, 0, size);
374 maybe_read_byte!(0x20, 8, size);
375 maybe_read_byte!(0x40, 16, size);
376
377 if size == 0 {
378 size = 0x10000;
379 }
380
381 let end = offset.checked_add(size).ok_or_else(|| {
382 Error::CorruptObject("delta COPY range overflows usize".to_owned())
383 })?;
384 let chunk = base.get(offset..end).ok_or_else(|| {
385 Error::CorruptObject(format!(
386 "delta COPY [{offset},{end}) out of range (base is {} bytes)",
387 base.len()
388 ))
389 })?;
390 result.extend_from_slice(chunk);
391 } else {
392 let n = cmd as usize;
394 let chunk = delta
395 .get(pos..pos + n)
396 .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
397 result.extend_from_slice(chunk);
398 pos += n;
399 }
400 }
401
402 if result.len() != dest_size {
403 return Err(Error::CorruptObject(format!(
404 "delta produced {} bytes but expected {dest_size}",
405 result.len()
406 )));
407 }
408
409 Ok(result)
410}
411
412fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
416 let mut value = 0usize;
417 let mut shift = 0u32;
418 loop {
419 let b = *data
420 .get(*pos)
421 .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
422 *pos += 1;
423 value |= ((b & 0x7f) as usize) << shift;
424 shift += 7;
425 if b & 0x80 == 0 {
426 break;
427 }
428 }
429 Ok(value)
430}
431
432#[cfg(test)]
433mod tests {
434 use super::*;
435
436 fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
439 use flate2::write::ZlibEncoder;
440 use std::io::Write;
441
442 let mut entries: Vec<Vec<u8>> = Vec::new();
443 for (kind, data) in objects {
444 let type_code: u8 = match kind {
445 ObjectKind::Commit => 1,
446 ObjectKind::Tree => 2,
447 ObjectKind::Blob => 3,
448 ObjectKind::Tag => 4,
449 };
450 let mut header = Vec::new();
452 let mut size = data.len();
453 let first = ((type_code & 0x7) << 4) as u8 | (size & 0x0f) as u8;
454 size >>= 4;
455 if size > 0 {
456 header.push(first | 0x80);
457 while size > 0 {
458 let b = (size & 0x7f) as u8;
459 size >>= 7;
460 header.push(if size > 0 { b | 0x80 } else { b });
461 }
462 } else {
463 header.push(first);
464 }
465 let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
467 enc.write_all(data).unwrap();
468 let compressed = enc.finish().unwrap();
469 let mut entry = header;
470 entry.extend_from_slice(&compressed);
471 entries.push(entry);
472 }
473
474 let mut pack = Vec::new();
476 pack.extend_from_slice(b"PACK");
477 pack.extend_from_slice(&2u32.to_be_bytes());
478 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
479 for entry in &entries {
480 pack.extend_from_slice(entry);
481 }
482 let mut hasher = Sha1::new();
483 hasher.update(&pack);
484 let digest = hasher.finalize();
485 pack.extend_from_slice(digest.as_slice());
486 pack
487 }
488
489 #[test]
490 fn test_apply_delta_simple() {
491 let base = b"hello";
493 let mut delta = Vec::new();
494 delta.push(5u8);
496 delta.push(11u8);
498 delta.push(0x80 | 0x01 | 0x10); delta.push(0u8); delta.push(5u8); delta.push(6u8);
505 delta.extend_from_slice(b" world");
506
507 let result = apply_delta(base, &delta).unwrap();
508 assert_eq!(result, b"hello world");
509 }
510
511 #[test]
512 fn test_apply_delta_insert_only() {
513 let base = b"";
514 let mut delta = Vec::new();
515 delta.push(0u8); delta.push(5u8); delta.push(5u8); delta.extend_from_slice(b"hello");
519
520 let result = apply_delta(base, &delta).unwrap();
521 assert_eq!(result, b"hello");
522 }
523
524 #[test]
525 fn test_apply_delta_copy_only() {
526 let base = b"abcdef";
527 let mut delta = Vec::new();
528 delta.push(6u8); delta.push(3u8); delta.push(0x91u8);
533 delta.push(2u8); delta.push(3u8); let result = apply_delta(base, &delta).unwrap();
537 assert_eq!(result, b"cde");
538 }
539
540 #[test]
541 fn test_apply_delta_size_zero_means_65536() {
542 let base = vec![0xABu8; 65536];
544 let mut delta = Vec::new();
545 delta.push(0x80 | (65536 & 0x7f) as u8); delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); delta.push(((65536 >> 14) & 0x7f) as u8); delta.push(0x80 | (65536 & 0x7f) as u8);
551 delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
552 delta.push(((65536 >> 14) & 0x7f) as u8);
553 delta.push(0x80u8);
556
557 let result = apply_delta(&base, &delta).unwrap();
558 assert_eq!(result.len(), 65536);
559 assert!(result.iter().all(|&b| b == 0xAB));
560 }
561
562 #[test]
563 fn test_unpack_objects_blobs() {
564 use tempfile::TempDir;
565 let tmp = TempDir::new().unwrap();
566 let objects_dir = tmp.path().join("objects");
567 std::fs::create_dir_all(&objects_dir).unwrap();
568 let odb = Odb::new(&objects_dir);
569
570 let pack = make_pack(&[
571 (ObjectKind::Blob, b"hello\n"),
572 (ObjectKind::Blob, b"world\n"),
573 ]);
574
575 let opts = UnpackOptions::default();
576 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
577 assert_eq!(count, 2);
578
579 let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
581 let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
582 let obj1 = odb.read(&oid1).unwrap();
583 let obj2 = odb.read(&oid2).unwrap();
584 assert_eq!(obj1.data, b"hello\n");
585 assert_eq!(obj2.data, b"world\n");
586 }
587
588 #[test]
589 fn test_unpack_objects_dry_run_writes_nothing() {
590 use tempfile::TempDir;
591 let tmp = TempDir::new().unwrap();
592 let objects_dir = tmp.path().join("objects");
593 std::fs::create_dir_all(&objects_dir).unwrap();
594 let odb = Odb::new(&objects_dir);
595
596 let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
597
598 let opts = UnpackOptions {
599 dry_run: true,
600 quiet: true,
601 };
602 let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
603 assert_eq!(count, 1);
604
605 let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
607 assert!(!odb.exists(&oid));
608 }
609
610 #[test]
611 fn test_unpack_objects_bad_signature() {
612 use tempfile::TempDir;
613 let tmp = TempDir::new().unwrap();
614 let objects_dir = tmp.path().join("objects");
615 std::fs::create_dir_all(&objects_dir).unwrap();
616 let odb = Odb::new(&objects_dir);
617
618 let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
619 bad.extend_from_slice(&[0u8; 20]);
620 let opts = UnpackOptions::default();
621 let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
622 assert!(err.to_string().contains("invalid signature"));
623 }
624
625 #[test]
626 fn test_unpack_objects_checksum_mismatch() {
627 use tempfile::TempDir;
628 let tmp = TempDir::new().unwrap();
629 let objects_dir = tmp.path().join("objects");
630 std::fs::create_dir_all(&objects_dir).unwrap();
631 let odb = Odb::new(&objects_dir);
632
633 let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
634 let n = pack.len();
636 pack[n - 1] ^= 0xFF;
637
638 let opts = UnpackOptions::default();
639 let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
640 assert!(err.to_string().contains("checksum"));
641 }
642
643 #[test]
644 fn test_apply_delta_source_size_mismatch() {
645 let base = b"hi";
646 let delta = [3u8, 2u8, 2u8, b'h', b'i']; let err = apply_delta(base, &delta).unwrap_err();
648 assert!(err.to_string().contains("source size"));
649 }
650}