Skip to main content

grit_lib/
unpack_objects.rs

1//! `unpack-objects`: unpack a pack stream into loose objects.
2//!
3//! Reads a pack-format byte stream, validates the trailing checksum, and
4//! writes each object as a loose file in the object database.  Delta objects
5//! (both `OFS_DELTA` and `REF_DELTA`) are resolved against already-unpacked
6//! objects or objects already present in the ODB.
7//!
8//! Large blobs are written to the ODB and dropped from the in-memory maps so
9//! cloning multi-gigabyte repositories does not require holding the full pack
10//! in RAM (streaming read + bounded retention).
11
12use std::borrow::Cow;
13use std::collections::HashMap;
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19
20use crate::error::{Error, Result};
21use crate::gitmodules;
22use crate::index::MODE_GITLINK;
23use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
24use crate::odb::Odb;
25
26/// Options controlling `unpack-objects` behaviour.
27#[derive(Debug, Default)]
28pub struct UnpackOptions {
29    /// Validate and decompress objects but do not write them to the ODB.
30    pub dry_run: bool,
31    /// Suppress informational output.
32    pub quiet: bool,
33    /// Reject packs whose commits/trees/tags reference missing objects.
34    pub strict: bool,
35    /// Maximum number of raw pack bytes that may be consumed (including the 20-byte trailer).
36    ///
37    /// Matches Git's `unpack-objects --max-input-size` / `receive.maxInputSize`: counts every
38    /// byte read from the pack stream after crossing the limit. `None` means no limit.
39    pub max_input_bytes: Option<u64>,
40}
41
42/// A delta that could not yet be resolved because its base was not yet known.
43struct PendingDelta {
44    /// Byte offset of this object in the pack stream (used to anchor
45    /// `OFS_DELTA` back-references from later objects).
46    offset: usize,
47    /// For `REF_DELTA`: SHA-1 of the base object.
48    base_oid: Option<ObjectId>,
49    /// For `OFS_DELTA`: absolute byte offset of the base object.
50    base_offset: Option<usize>,
51    /// Decompressed delta data.
52    delta_data: Vec<u8>,
53}
54
55/// Unpack a pack stream from `reader` into `odb`.
56///
57/// Reads the complete pack from `reader`, validates the trailing SHA-1
58/// checksum, unpacks all objects (including full delta-chain resolution), and —
59/// unless [`UnpackOptions::dry_run`] is set — writes each object to `odb`.
60///
61/// Returns the total number of objects processed.
62///
63/// # Errors
64///
65/// - [`Error::CorruptObject`] — invalid pack format, checksum mismatch, or
66///   unresolvable delta chains.
67/// - [`Error::Io`] — I/O failure reading from `reader`.
68/// - [`Error::Zlib`] — decompression failure.
69pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
70    /// Blobs larger than this stay on disk only (after write) so huge packs do
71    /// not retain every blob in RAM. Smaller objects are kept for delta bases
72    /// and `--strict` graph walks without extra ODB reads.
73    const MAX_RETAIN_BYTES: usize = 1024 * 1024;
74
75    let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes);
76
77    // Validate magic and version.
78    let sig = rd.read_exact_n(4)?;
79    if sig != b"PACK" {
80        return Err(Error::CorruptObject(
81            "not a pack stream: invalid signature".to_owned(),
82        ));
83    }
84    let version = rd.read_u32_be()?;
85    if version != 2 && version != 3 {
86        return Err(Error::CorruptObject(format!(
87            "unsupported pack version {version}"
88        )));
89    }
90    let nr_objects = rd.read_u32_be()? as usize;
91
92    // pack-stream offset → resolved object (see [`PackedObjectEntry`]).
93    let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
94    // ObjectId → in-pack object for REF_DELTA resolution and strict checks.
95    let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
96
97    let mut pending: Vec<PendingDelta> = Vec::new();
98    let mut count = 0usize;
99
100    for _ in 0..nr_objects {
101        let obj_offset = rd.stream_pos();
102        let (type_code, size) = rd.read_type_size()?;
103
104        match type_code {
105            1..=4 => {
106                let kind = type_code_to_kind(type_code)?;
107                let data = rd.decompress(size)?;
108                let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
109                let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
110                by_offset.insert(obj_offset, entry.clone());
111                by_oid.insert(oid, entry);
112                count += 1;
113            }
114            6 => {
115                // OFS_DELTA: base at a negative encoded offset from this object.
116                let neg = rd.read_ofs_neg_offset()?;
117                let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
118                    Error::CorruptObject("ofs-delta base offset underflow".to_owned())
119                })?;
120                let delta_data = rd.decompress(size)?;
121                pending.push(PendingDelta {
122                    offset: obj_offset,
123                    base_oid: None,
124                    base_offset: Some(base_offset),
125                    delta_data,
126                });
127            }
128            7 => {
129                // REF_DELTA: base identified by its SHA-1.
130                let base_bytes = rd.read_exact_n(20)?;
131                let base_oid = ObjectId::from_bytes(&base_bytes)?;
132                let delta_data = rd.decompress(size)?;
133                pending.push(PendingDelta {
134                    offset: obj_offset,
135                    base_oid: Some(base_oid),
136                    base_offset: None,
137                    delta_data,
138                });
139            }
140            other => {
141                return Err(Error::CorruptObject(format!(
142                    "unknown packed-object type {other}"
143                )))
144            }
145        }
146    }
147
148    // Trailing pack checksum (SHA-1 of all preceding bytes); not included in the hash.
149    let digest = rd.finalize_hasher();
150    let trailing = rd.read_trailer_20()?;
151    if digest.as_slice() != trailing {
152        return Err(Error::CorruptObject(
153            "pack trailing checksum mismatch".to_owned(),
154        ));
155    }
156
157    // Resolve pending deltas iteratively.  Each pass resolves all deltas whose
158    // base is now known; repeat until none remain or we stall (corrupt pack).
159    let mut remaining = pending;
160    loop {
161        if remaining.is_empty() {
162            break;
163        }
164        let before = remaining.len();
165        let mut still_pending: Vec<PendingDelta> = Vec::new();
166
167        for delta in remaining {
168            let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
169                if let Some(base_off) = delta.base_offset {
170                    by_offset
171                        .get(&base_off)
172                        .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
173                } else if let Some(ref base_id) = delta.base_oid {
174                    if let Some(e) = by_oid.get(base_id) {
175                        Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
176                    } else if !opts.dry_run {
177                        odb.read(base_id)
178                            .ok()
179                            .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
180                    } else {
181                        None
182                    }
183                } else {
184                    None
185                };
186
187            match base_res {
188                Some(Ok((base_kind, base_data))) => {
189                    let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
190                    let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
191                    let new_entry = packed_entry_after_write(
192                        base_kind,
193                        result,
194                        oid,
195                        odb,
196                        opts,
197                        MAX_RETAIN_BYTES,
198                    );
199                    by_offset.insert(delta.offset, new_entry.clone());
200                    by_oid.insert(oid, new_entry);
201                    count += 1;
202                }
203                Some(Err(e)) => return Err(e),
204                None => still_pending.push(delta),
205            }
206        }
207
208        remaining = still_pending;
209        if remaining.len() == before {
210            return Err(Error::CorruptObject(format!(
211                "{} delta(s) could not be resolved",
212                remaining.len()
213            )));
214        }
215    }
216
217    if opts.strict {
218        let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
219            HashMap::with_capacity(by_oid.len());
220        for (oid, entry) in &by_oid {
221            let kind = entry.kind();
222            let data = match entry {
223                PackedObjectEntry::InMemory { data, .. } => data.clone(),
224                PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
225            };
226            dot_fsck_map.insert(*oid, (kind, data));
227        }
228        gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
229        strict_verify_packed_references_map(Some(odb), &by_oid)?;
230    }
231
232    Ok(count)
233}
234
235/// Resolved non-delta object: either full bytes in memory or a large blob on disk.
236#[derive(Debug, Clone)]
237enum PackedObjectEntry {
238    InMemory { kind: ObjectKind, data: Vec<u8> },
239    BlobOnDisk { oid: ObjectId },
240}
241
242impl PackedObjectEntry {
243    fn kind(&self) -> ObjectKind {
244        match self {
245            PackedObjectEntry::InMemory { kind, .. } => *kind,
246            PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
247        }
248    }
249}
250
251fn packed_entry_after_write(
252    kind: ObjectKind,
253    data: Vec<u8>,
254    oid: ObjectId,
255    _odb: &Odb,
256    opts: &UnpackOptions,
257    max_retain: usize,
258) -> PackedObjectEntry {
259    if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
260        PackedObjectEntry::BlobOnDisk { oid }
261    } else {
262        PackedObjectEntry::InMemory { kind, data }
263    }
264}
265
266fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
267    match entry {
268        PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
269        PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
270    }
271}
272
273fn strict_verify_packed_references_map(
274    odb: Option<&Odb>,
275    pack: &HashMap<ObjectId, PackedObjectEntry>,
276) -> Result<()> {
277    for entry in pack.values() {
278        match entry {
279            PackedObjectEntry::BlobOnDisk { .. } => {}
280            PackedObjectEntry::InMemory { kind, data } => match kind {
281                ObjectKind::Tree => {
282                    for e in parse_tree(data)? {
283                        // Gitlink (submodule) entries point at commits that live
284                        // in the submodule repository, not the superproject's
285                        // pack/ODB. Skip them in the connectivity walk, matching
286                        // upstream git (git/fsck.c:374 `if (S_ISGITLINK) continue;`).
287                        if e.mode == MODE_GITLINK {
288                            continue;
289                        }
290                        if !strict_ref_resolves_map(&e.oid, pack, odb) {
291                            return Err(Error::CorruptObject(format!(
292                                "strict: missing object {} referenced by tree",
293                                e.oid.to_hex()
294                            )));
295                        }
296                    }
297                }
298                ObjectKind::Commit => {
299                    let c = parse_commit(data)?;
300                    if !strict_ref_resolves_map(&c.tree, pack, odb) {
301                        return Err(Error::CorruptObject(format!(
302                            "strict: missing tree {} referenced by commit",
303                            c.tree.to_hex()
304                        )));
305                    }
306                    for p in &c.parents {
307                        if !strict_ref_resolves_map(p, pack, odb) {
308                            return Err(Error::CorruptObject(format!(
309                                "strict: missing parent {} referenced by commit",
310                                p.to_hex()
311                            )));
312                        }
313                    }
314                }
315                ObjectKind::Tag => {
316                    let t = parse_tag(data)?;
317                    if !strict_ref_resolves_map(&t.object, pack, odb) {
318                        return Err(Error::CorruptObject(format!(
319                            "strict: missing object {} referenced by tag",
320                            t.object.to_hex()
321                        )));
322                    }
323                }
324                ObjectKind::Blob => {}
325            },
326        }
327    }
328    Ok(())
329}
330
331fn strict_ref_resolves_map(
332    oid: &ObjectId,
333    pack: &HashMap<ObjectId, PackedObjectEntry>,
334    odb: Option<&Odb>,
335) -> bool {
336    pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
337}
338
339fn strict_ref_resolves(
340    oid: &ObjectId,
341    pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
342    odb: Option<&Odb>,
343) -> bool {
344    pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
345}
346
347/// Verifies that references from commits, trees, and tags resolve to objects present in `pack`
348/// or, when `odb` is [`Some`], to loose objects in that database.
349///
350/// Use [`None`] for `odb` when indexing or unpacking in a context with no repository (Git allows
351/// `index-pack --strict` outside a work tree when the pack is self-contained).
352pub fn strict_verify_packed_references(
353    odb: Option<&Odb>,
354    pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
355) -> Result<()> {
356    for (kind, data) in pack.values() {
357        match kind {
358            ObjectKind::Tree => {
359                for e in parse_tree(data)? {
360                    // Gitlink (submodule) entries point at commits that live in
361                    // the submodule repository, not this pack/ODB. Skip them in
362                    // the connectivity walk, matching upstream git
363                    // (git/fsck.c:374 `if (S_ISGITLINK) continue;`).
364                    if e.mode == MODE_GITLINK {
365                        continue;
366                    }
367                    if !strict_ref_resolves(&e.oid, pack, odb) {
368                        return Err(Error::CorruptObject(format!(
369                            "strict: missing object {} referenced by tree",
370                            e.oid.to_hex()
371                        )));
372                    }
373                }
374            }
375            ObjectKind::Commit => {
376                let c = parse_commit(data)?;
377                if !strict_ref_resolves(&c.tree, pack, odb) {
378                    return Err(Error::CorruptObject(format!(
379                        "strict: missing tree {} referenced by commit",
380                        c.tree.to_hex()
381                    )));
382                }
383                for p in &c.parents {
384                    if !strict_ref_resolves(p, pack, odb) {
385                        return Err(Error::CorruptObject(format!(
386                            "strict: missing parent {} referenced by commit",
387                            p.to_hex()
388                        )));
389                    }
390                }
391            }
392            ObjectKind::Tag => {
393                let t = parse_tag(data)?;
394                if !strict_ref_resolves(&t.object, pack, odb) {
395                    return Err(Error::CorruptObject(format!(
396                        "strict: missing object {} referenced by tag",
397                        t.object.to_hex()
398                    )));
399                }
400            }
401            ObjectKind::Blob => {}
402        }
403    }
404    Ok(())
405}
406
407/// Parse a pack byte stream and return every resolved object (after delta resolution) keyed by OID.
408///
409/// Does not write to any object database. Used for receive-pack connectivity checks before
410/// applying a push to the permanent ODB.
411///
412/// Thin-pack bases may be resolved from `odb` when they are not present in the pack.
413pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
414    let rd = PackReader::new(data.to_vec());
415    build_pack_object_map(rd, odb)
416}
417
418fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
419    let sig = rd.read_exact(4)?;
420    if sig != b"PACK" {
421        return Err(Error::CorruptObject(
422            "not a pack stream: invalid signature".to_owned(),
423        ));
424    }
425    let version = rd.read_u32_be()?;
426    if version != 2 && version != 3 {
427        return Err(Error::CorruptObject(format!(
428            "unsupported pack version {version}"
429        )));
430    }
431    let nr_objects = rd.read_u32_be()? as usize;
432
433    let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
434    let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
435    let mut pending: Vec<PendingDelta> = Vec::new();
436
437    fn base_from_pack_or_odb(
438        by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
439        odb: &Odb,
440        id: &ObjectId,
441    ) -> Option<(ObjectKind, Vec<u8>)> {
442        if let Some(e) = by_oid.get(id) {
443            return Some(e.clone());
444        }
445        odb.read(id).ok().map(|o| (o.kind, o.data))
446    }
447
448    for _ in 0..nr_objects {
449        let obj_offset = rd.pos;
450        let (type_code, size) = rd.read_type_size()?;
451
452        match type_code {
453            1..=4 => {
454                let kind = type_code_to_kind(type_code)?;
455                let data = rd.decompress(size)?;
456                let oid = Odb::hash_object_data(kind, &data);
457                by_offset.insert(obj_offset, (kind, data.clone()));
458                by_oid.insert(oid, (kind, data));
459            }
460            6 => {
461                let neg = rd.read_ofs_neg_offset()?;
462                let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
463                    Error::CorruptObject("ofs-delta base offset underflow".to_owned())
464                })?;
465                let delta_data = rd.decompress(size)?;
466                pending.push(PendingDelta {
467                    offset: obj_offset,
468                    base_oid: None,
469                    base_offset: Some(base_offset),
470                    delta_data,
471                });
472            }
473            7 => {
474                let base_bytes = rd.read_exact(20)?;
475                let base_oid = ObjectId::from_bytes(base_bytes)?;
476                let delta_data = rd.decompress(size)?;
477                pending.push(PendingDelta {
478                    offset: obj_offset,
479                    base_oid: Some(base_oid),
480                    base_offset: None,
481                    delta_data,
482                });
483            }
484            other => {
485                return Err(Error::CorruptObject(format!(
486                    "unknown packed-object type {other}"
487                )))
488            }
489        }
490    }
491
492    let consumed = rd.pos;
493    {
494        let mut hasher = Sha1::new();
495        hasher.update(&rd.data[..consumed]);
496        let digest = hasher.finalize();
497        let trailing = rd.read_exact(20)?;
498        if digest.as_slice() != trailing {
499            return Err(Error::CorruptObject(
500                "pack trailing checksum mismatch".to_owned(),
501            ));
502        }
503    }
504
505    let mut remaining = pending;
506    loop {
507        if remaining.is_empty() {
508            break;
509        }
510        let before = remaining.len();
511        let mut still_pending: Vec<PendingDelta> = Vec::new();
512
513        for delta in remaining {
514            let base = if let Some(base_off) = delta.base_offset {
515                by_offset.get(&base_off).cloned()
516            } else if let Some(ref base_id) = delta.base_oid {
517                base_from_pack_or_odb(&by_oid, odb, base_id)
518            } else {
519                None
520            };
521
522            if let Some((base_kind, base_data)) = base {
523                let result = apply_delta(&base_data, &delta.delta_data)?;
524                let oid = Odb::hash_object_data(base_kind, &result);
525                by_offset.insert(delta.offset, (base_kind, result.clone()));
526                by_oid.insert(oid, (base_kind, result));
527            } else {
528                still_pending.push(delta);
529            }
530        }
531
532        remaining = still_pending;
533        if remaining.len() == before {
534            return Err(Error::CorruptObject(format!(
535                "{} delta(s) could not be resolved",
536                remaining.len()
537            )));
538        }
539    }
540
541    Ok(by_oid
542        .into_iter()
543        .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
544        .collect())
545}
546
547/// Either write `data` as a loose object (if `!dry_run`) or just compute its
548/// [`ObjectId`] without touching the filesystem.
549fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
550    if dry_run {
551        Ok(Odb::hash_object_data(kind, data))
552    } else {
553        // Always materialize into this ODB: objects reachable only via alternates must still be
554        // written locally (matches git unpack-objects; t5519-push-alternates).
555        odb.write_local(kind, data)
556    }
557}
558
559/// Convert a pack object type code to an [`ObjectKind`].
560fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
561    match code {
562        1 => Ok(ObjectKind::Commit),
563        2 => Ok(ObjectKind::Tree),
564        3 => Ok(ObjectKind::Blob),
565        4 => Ok(ObjectKind::Tag),
566        _ => Err(Error::CorruptObject(format!(
567            "type code {code} is not a regular object type"
568        ))),
569    }
570}
571
572/// Low-level cursor over a buffered pack byte stream (in-memory pack parsing).
573struct PackReader {
574    data: Vec<u8>,
575    pos: usize,
576}
577
578impl PackReader {
579    fn new(data: Vec<u8>) -> Self {
580        Self { data, pos: 0 }
581    }
582
583    /// Read exactly `n` bytes and advance the cursor, returning a slice into
584    /// the internal buffer.
585    fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
586        if self.pos + n > self.data.len() {
587            return Err(Error::CorruptObject(format!(
588                "pack stream truncated: need {n} bytes at offset {}",
589                self.pos
590            )));
591        }
592        let slice = &self.data[self.pos..self.pos + n];
593        self.pos += n;
594        Ok(slice)
595    }
596
597    /// Read a single byte and advance the cursor.
598    fn read_byte(&mut self) -> Result<u8> {
599        if self.pos >= self.data.len() {
600            return Err(Error::CorruptObject(
601                "unexpected end of pack stream".to_owned(),
602            ));
603        }
604        let b = self.data[self.pos];
605        self.pos += 1;
606        Ok(b)
607    }
608
609    /// Read a big-endian `u32`.
610    fn read_u32_be(&mut self) -> Result<u32> {
611        let bytes = self.read_exact(4)?;
612        Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
613            Error::CorruptObject("u32 read failed".to_owned())
614        })?))
615    }
616
617    /// Read the packed-object type + size header (variable-length big-endian
618    /// encoding with the type in bits 4-6 of the first byte).
619    ///
620    /// Returns `(type_code, uncompressed_size)`.
621    fn read_type_size(&mut self) -> Result<(u8, usize)> {
622        let c = self.read_byte()?;
623        let type_code = (c >> 4) & 0x7;
624        let mut size = (c & 0x0f) as usize;
625        let mut shift = 4u32;
626        let mut cur = c;
627        while cur & 0x80 != 0 {
628            cur = self.read_byte()?;
629            size |= ((cur & 0x7f) as usize) << shift;
630            shift += 7;
631        }
632        Ok((type_code, size))
633    }
634
635    /// Read an `OFS_DELTA` negative-offset value.
636    ///
637    /// The encoding uses a big-endian variable-length integer with a +1 bias
638    /// on each continuation byte, yielding values ≥ 1.
639    fn read_ofs_neg_offset(&mut self) -> Result<usize> {
640        let mut c = self.read_byte()?;
641        let mut value = (c & 0x7f) as usize;
642        while c & 0x80 != 0 {
643            c = self.read_byte()?;
644            value = (value + 1) << 7 | (c & 0x7f) as usize;
645        }
646        Ok(value)
647    }
648
649    /// Decompress zlib-compressed data starting at the current cursor position.
650    ///
651    /// Advances the cursor by exactly the number of compressed bytes consumed.
652    /// Returns an error if the decompressed length differs from `expected_size`.
653    fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
654        let slice = &self.data[self.pos..];
655        let mut decoder = ZlibDecoder::new(slice);
656        let mut out = Vec::with_capacity(expected_size);
657        decoder
658            .read_to_end(&mut out)
659            .map_err(|e| Error::Zlib(e.to_string()))?;
660        if out.len() != expected_size {
661            return Err(Error::CorruptObject(format!(
662                "decompressed {} bytes but expected {}",
663                out.len(),
664                expected_size
665            )));
666        }
667        self.pos += decoder.total_in() as usize;
668        Ok(out)
669    }
670}
671
672fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
673    if e.kind() == io::ErrorKind::UnexpectedEof {
674        Error::CorruptObject(format!(
675            "pack stream truncated ({context}) at offset {stream_pos}"
676        ))
677    } else {
678        Error::Io(e)
679    }
680}
681
682/// Streaming cursor over a pack file: hashes body bytes incrementally (no full-buffer read).
683///
684/// Raw pack bytes are either consumed as object headers (via [`Self::read_byte`]) or as zlib
685/// payloads.  Zlib decoders may read ahead; overflow bytes stay in [`Self::pending`] so the next
686/// object header or zlib stream starts at the correct offset.
687struct StreamingPackReader<'a> {
688    inner: &'a mut dyn Read,
689    pack_hasher: Sha1,
690    stream_pos: usize,
691    max_input_bytes: Option<u64>,
692    /// Compressed (or other) bytes already read from `inner` and hashed but not yet consumed by
693    /// the current parsing step.
694    pending: Vec<u8>,
695}
696
697impl<'a> StreamingPackReader<'a> {
698    fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>) -> Self {
699        Self {
700            inner,
701            pack_hasher: Sha1::new(),
702            stream_pos: 0,
703            max_input_bytes,
704            pending: Vec::new(),
705        }
706    }
707
708    fn stream_pos(&self) -> usize {
709        self.stream_pos
710    }
711
712    fn enforce_max_input(&self) -> Result<()> {
713        if let Some(limit) = self.max_input_bytes {
714            let pos = u64::try_from(self.stream_pos)
715                .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
716            if pos > limit {
717                return Err(Error::CorruptObject(
718                    "pack exceeds maximum allowed size".to_owned(),
719                ));
720            }
721        }
722        Ok(())
723    }
724
725    /// Read pack-body bytes (hashed). Used for headers and non-zlib payload reads only.
726    fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
727        let n = if !self.pending.is_empty() {
728            let take = buf.len().min(self.pending.len());
729            buf[..take].copy_from_slice(&self.pending[..take]);
730            self.pending.drain(..take);
731            take
732        } else {
733            self.inner.read(buf).map_err(Error::Io)?
734        };
735        if n > 0 {
736            self.pack_hasher.update(&buf[..n]);
737            self.stream_pos += n;
738            self.enforce_max_input()?;
739        }
740        Ok(n)
741    }
742
743    fn read_byte(&mut self) -> Result<u8> {
744        let mut b = [0u8; 1];
745        let n = self.read_from_source(&mut b)?;
746        if n == 0 {
747            return Err(Error::CorruptObject(format!(
748                "pack stream truncated (read byte) at offset {}",
749                self.stream_pos
750            )));
751        }
752        Ok(b[0])
753    }
754
755    fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
756        let mut v = vec![0u8; n];
757        let mut got = 0usize;
758        while got < n {
759            let m = self.read_from_source(&mut v[got..n])?;
760            if m == 0 {
761                return Err(Error::CorruptObject(format!(
762                    "pack stream truncated (read exact) at offset {}",
763                    self.stream_pos
764                )));
765            }
766            got += m;
767        }
768        Ok(v)
769    }
770
771    fn read_u32_be(&mut self) -> Result<u32> {
772        let mut b = [0u8; 4];
773        let mut got = 0usize;
774        while got < 4 {
775            let m = self.read_from_source(&mut b[got..4])?;
776            if m == 0 {
777                return Err(Error::CorruptObject(format!(
778                    "pack stream truncated (read u32) at offset {}",
779                    self.stream_pos
780                )));
781            }
782            got += m;
783        }
784        Ok(u32::from_be_bytes(b))
785    }
786
787    fn read_type_size(&mut self) -> Result<(u8, usize)> {
788        let c = self.read_byte()?;
789        let type_code = (c >> 4) & 0x7;
790        let mut size = (c & 0x0f) as usize;
791        let mut shift = 4u32;
792        let mut cur = c;
793        while cur & 0x80 != 0 {
794            cur = self.read_byte()?;
795            size |= ((cur & 0x7f) as usize) << shift;
796            shift += 7;
797        }
798        Ok((type_code, size))
799    }
800
801    fn read_ofs_neg_offset(&mut self) -> Result<usize> {
802        let mut c = self.read_byte()?;
803        let mut value = (c & 0x7f) as usize;
804        while c & 0x80 != 0 {
805            c = self.read_byte()?;
806            value = (value + 1) << 7 | (c & 0x7f) as usize;
807        }
808        Ok(value)
809    }
810
811    /// Pull zlib-compressed bytes until one object inflates to `expected_size` bytes.
812    ///
813    /// Bytes read from `inner` into `pending` are not hashed until we know how many belong to the
814    /// zlib stream (`total_in()`). Lookahead past the zlib end (including the 20-byte pack
815    /// trailer) must never be fed to the pack checksum.
816    ///
817    /// When the pack arrives in small chunks (e.g. side-band-64k from `upload-pack`), `flate2` may
818    /// return an error before the full deflate stream is in `pending`. Retry after reading more
819    /// from `inner` (same idea as [`PackReader::decompress`], which sees the whole zlib at once).
820    fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
821        // `Read::read_exact` into an empty buffer returns `Ok` immediately without touching the
822        // decoder, so a 0-byte packed object would leave the zlib header in `pending` and desync
823        // the pack stream (bundle / clone unpack). Always run the zlib decoder once.
824        if expected_size == 0 {
825            const CHUNK: usize = 64 * 1024;
826            let mut scratch = [0u8; CHUNK];
827            loop {
828                let mut cursor = std::io::Cursor::new(self.pending.as_slice());
829                let mut z = ZlibDecoder::new(&mut cursor);
830                let mut sink = [0u8; 1];
831                match z.read(&mut sink) {
832                    Ok(0) => {
833                        let consumed = z.total_in() as usize;
834                        if consumed > self.pending.len() {
835                            return Err(Error::CorruptObject(
836                                "zlib total_in exceeds pending buffer".to_owned(),
837                            ));
838                        }
839                        if consumed == 0 {
840                            let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
841                            if n == 0 {
842                                return Err(Error::CorruptObject(format!(
843                                    "pack stream truncated (zlib) at offset {}",
844                                    self.stream_pos
845                                )));
846                            }
847                            self.pending.extend_from_slice(&scratch[..n]);
848                            continue;
849                        }
850                        self.pack_hasher.update(&self.pending[..consumed]);
851                        self.stream_pos += consumed;
852                        self.pending.drain(..consumed);
853                        self.enforce_max_input()?;
854                        return Ok(Vec::new());
855                    }
856                    Ok(_) => {
857                        return Err(Error::CorruptObject(
858                            "0-byte packed object inflated to non-empty output".to_owned(),
859                        ));
860                    }
861                    Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
862                        let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
863                        if n == 0 {
864                            return Err(Error::CorruptObject(format!(
865                                "pack stream truncated (zlib) at offset {}",
866                                self.stream_pos
867                            )));
868                        }
869                        self.pending.extend_from_slice(&scratch[..n]);
870                    }
871                    Err(e) => return Err(Error::Zlib(e.to_string())),
872                }
873            }
874        }
875
876        const CHUNK: usize = 64 * 1024;
877        let mut scratch = [0u8; CHUNK];
878
879        let mut out = vec![0u8; expected_size];
880        let mut z = Decompress::new(true);
881        let mut out_pos = 0usize;
882        let mut eof = false;
883        loop {
884            if self.pending.is_empty() && !eof {
885                let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
886                if n == 0 {
887                    eof = true;
888                } else {
889                    self.pending.extend_from_slice(&scratch[..n]);
890                }
891            }
892
893            let flush = if eof && self.pending.is_empty() {
894                FlushDecompress::Finish
895            } else {
896                FlushDecompress::None
897            };
898
899            let before_in = z.total_in();
900            let before_out = z.total_out();
901            let status = z
902                .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
903                .map_err(|e| Error::Zlib(e.to_string()))?;
904            let consumed = (z.total_in() - before_in) as usize;
905            if consumed > self.pending.len() {
906                return Err(Error::CorruptObject(
907                    "zlib consumed more than pending buffer".to_owned(),
908                ));
909            }
910            self.pack_hasher.update(&self.pending[..consumed]);
911            self.stream_pos += consumed;
912            self.pending.drain(..consumed);
913            self.enforce_max_input()?;
914            out_pos += (z.total_out() - before_out) as usize;
915
916            match status {
917                Status::StreamEnd => {
918                    if out_pos != expected_size {
919                        return Err(Error::CorruptObject(format!(
920                            "decompressed size mismatch: got {out_pos}, want {expected_size}"
921                        )));
922                    }
923                    return Ok(out);
924                }
925                Status::Ok | Status::BufError => {
926                    if consumed == 0 && !eof {
927                        let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
928                        if n == 0 {
929                            eof = true;
930                        } else {
931                            self.pending.extend_from_slice(&scratch[..n]);
932                        }
933                    } else if eof && self.pending.is_empty() && out_pos != expected_size {
934                        return Err(Error::CorruptObject(format!(
935                            "pack stream truncated (zlib) at offset {}",
936                            self.stream_pos
937                        )));
938                    }
939                }
940            }
941        }
942    }
943
944    /// SHA-1 over all pack bytes read so far (objects only; trailer not yet read).
945    fn finalize_hasher(
946        &self,
947    ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
948        self.pack_hasher.clone().finalize()
949    }
950
951    /// Trailing pack checksum; not included in [`Self::finalize_hasher`].
952    fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
953        let mut b = [0u8; 20];
954        if self.pending.len() >= 20 {
955            b.copy_from_slice(&self.pending[..20]);
956            self.pending.drain(..20);
957            self.stream_pos += 20;
958            self.enforce_max_input()?;
959            return Ok(b);
960        }
961        let tail = self.pending.len();
962        if tail > 0 {
963            b[..tail].copy_from_slice(&self.pending[..]);
964            self.pending.clear();
965        }
966        self.inner
967            .read_exact(&mut b[tail..])
968            .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
969        self.stream_pos += 20;
970        self.enforce_max_input()?;
971        Ok(b)
972    }
973}
974
975/// Apply a git "patch delta" to `base`, producing the patched result.
976///
977/// The delta binary format is:
978/// 1. Source size: variable-length little-endian integer (must equal
979///    `base.len()`).
980/// 2. Destination size: variable-length little-endian integer.
981/// 3. A sequence of COPY (MSB set) and INSERT (MSB clear) instructions.
982///
983/// # Errors
984///
985/// Returns [`Error::CorruptObject`] if the delta is malformed, the source-size
986/// field does not match `base.len()`, or the result length does not match the
987/// declared destination size.
988pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
989    let mut pos = 0usize;
990
991    let src_size = read_delta_varint(delta, &mut pos)?;
992    if src_size != base.len() {
993        return Err(Error::CorruptObject(format!(
994            "delta source size {src_size} != base size {}",
995            base.len()
996        )));
997    }
998    let dest_size = read_delta_varint(delta, &mut pos)?;
999    let mut result = Vec::with_capacity(dest_size);
1000
1001    while pos < delta.len() {
1002        let cmd = delta[pos];
1003        pos += 1;
1004        if cmd == 0 {
1005            return Err(Error::CorruptObject(
1006                "reserved opcode 0 in delta stream".to_owned(),
1007            ));
1008        }
1009        if cmd & 0x80 != 0 {
1010            // COPY instruction: up to 4 offset bytes (bits 0-3) and up to 3
1011            // size bytes (bits 4-6) are present, each controlled by a flag bit.
1012            let mut offset = 0usize;
1013            let mut size = 0usize;
1014
1015            macro_rules! maybe_read_byte {
1016                ($flag:expr, $shift:expr, $target:expr) => {
1017                    if cmd & $flag != 0 {
1018                        let b = *delta.get(pos).ok_or_else(|| {
1019                            Error::CorruptObject("truncated delta COPY operand".to_owned())
1020                        })?;
1021                        pos += 1;
1022                        $target |= (b as usize) << $shift;
1023                    }
1024                };
1025            }
1026
1027            maybe_read_byte!(0x01, 0, offset);
1028            maybe_read_byte!(0x02, 8, offset);
1029            maybe_read_byte!(0x04, 16, offset);
1030            maybe_read_byte!(0x08, 24, offset);
1031            maybe_read_byte!(0x10, 0, size);
1032            maybe_read_byte!(0x20, 8, size);
1033            maybe_read_byte!(0x40, 16, size);
1034
1035            if size == 0 {
1036                size = 0x10000;
1037            }
1038
1039            let end = offset.checked_add(size).ok_or_else(|| {
1040                Error::CorruptObject("delta COPY range overflows usize".to_owned())
1041            })?;
1042            let chunk = base.get(offset..end).ok_or_else(|| {
1043                Error::CorruptObject(format!(
1044                    "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1045                    base.len()
1046                ))
1047            })?;
1048            result.extend_from_slice(chunk);
1049        } else {
1050            // INSERT instruction: copy the next `cmd` literal bytes verbatim.
1051            let n = cmd as usize;
1052            let chunk = delta
1053                .get(pos..pos + n)
1054                .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1055            result.extend_from_slice(chunk);
1056            pos += n;
1057        }
1058    }
1059
1060    if result.len() != dest_size {
1061        return Err(Error::CorruptObject(format!(
1062            "delta produced {} bytes but expected {dest_size}",
1063            result.len()
1064        )));
1065    }
1066
1067    Ok(result)
1068}
1069
1070/// Read a variable-length little-endian integer from `data` starting at `*pos`.
1071///
1072/// Advances `*pos` past the consumed bytes.
1073fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1074    let mut value = 0usize;
1075    let mut shift = 0u32;
1076    loop {
1077        let b = *data
1078            .get(*pos)
1079            .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1080        *pos += 1;
1081        value |= ((b & 0x7f) as usize) << shift;
1082        shift += 7;
1083        if b & 0x80 == 0 {
1084            break;
1085        }
1086    }
1087    Ok(value)
1088}
1089
1090#[cfg(test)]
1091mod tests {
1092    use super::*;
1093
1094    // Helper: build a minimal pack from a list of (kind, data) pairs.
1095    // Returns the raw pack bytes.
1096    fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1097        use flate2::write::ZlibEncoder;
1098        use std::io::Write;
1099
1100        let mut entries: Vec<Vec<u8>> = Vec::new();
1101        for (kind, data) in objects {
1102            let type_code: u8 = match kind {
1103                ObjectKind::Commit => 1,
1104                ObjectKind::Tree => 2,
1105                ObjectKind::Blob => 3,
1106                ObjectKind::Tag => 4,
1107            };
1108            // Encode type+size header.
1109            let mut header = Vec::new();
1110            let mut size = data.len();
1111            let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1112            size >>= 4;
1113            if size > 0 {
1114                header.push(first | 0x80);
1115                while size > 0 {
1116                    let b = (size & 0x7f) as u8;
1117                    size >>= 7;
1118                    header.push(if size > 0 { b | 0x80 } else { b });
1119                }
1120            } else {
1121                header.push(first);
1122            }
1123            // zlib-compress data.
1124            let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1125            enc.write_all(data).unwrap();
1126            let compressed = enc.finish().unwrap();
1127            let mut entry = header;
1128            entry.extend_from_slice(&compressed);
1129            entries.push(entry);
1130        }
1131
1132        // Assemble: PACK + version(2) + count + entries + SHA-1.
1133        let mut pack = Vec::new();
1134        pack.extend_from_slice(b"PACK");
1135        pack.extend_from_slice(&2u32.to_be_bytes());
1136        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1137        for entry in &entries {
1138            pack.extend_from_slice(entry);
1139        }
1140        let mut hasher = Sha1::new();
1141        hasher.update(&pack);
1142        let digest = hasher.finalize();
1143        pack.extend_from_slice(digest.as_slice());
1144        pack
1145    }
1146
1147    #[test]
1148    fn test_apply_delta_simple() {
1149        // Build a trivial delta: insert "hello world".
1150        let base = b"hello";
1151        let mut delta = Vec::new();
1152        // src_size = 5
1153        delta.push(5u8);
1154        // dest_size = 11
1155        delta.push(11u8);
1156        // COPY instruction: copy base[0..5]
1157        // cmd = 0x80 | 0x01 (offset present, byte 0) | 0x10 (size byte 0)
1158        delta.push(0x80 | 0x01 | 0x10); // 0x91
1159        delta.push(0u8); // offset = 0
1160        delta.push(5u8); // size = 5
1161                         // INSERT " world" (6 bytes)
1162        delta.push(6u8);
1163        delta.extend_from_slice(b" world");
1164
1165        let result = apply_delta(base, &delta).unwrap();
1166        assert_eq!(result, b"hello world");
1167    }
1168
1169    #[test]
1170    fn test_apply_delta_insert_only() {
1171        let base = b"";
1172        let mut delta = Vec::new();
1173        delta.push(0u8); // src_size = 0
1174        delta.push(5u8); // dest_size = 5
1175        delta.push(5u8); // INSERT 5 bytes
1176        delta.extend_from_slice(b"hello");
1177
1178        let result = apply_delta(base, &delta).unwrap();
1179        assert_eq!(result, b"hello");
1180    }
1181
1182    #[test]
1183    fn test_apply_delta_copy_only() {
1184        let base = b"abcdef";
1185        let mut delta = Vec::new();
1186        delta.push(6u8); // src_size = 6
1187        delta.push(3u8); // dest_size = 3
1188                         // COPY base[2..5]: offset=2, size=3
1189                         // cmd = 0x80 | 0x01 | 0x10
1190        delta.push(0x91u8);
1191        delta.push(2u8); // offset = 2
1192        delta.push(3u8); // size = 3
1193
1194        let result = apply_delta(base, &delta).unwrap();
1195        assert_eq!(result, b"cde");
1196    }
1197
1198    #[test]
1199    fn test_apply_delta_size_zero_means_65536() {
1200        // A COPY with size bytes all zero means 0x10000 = 65536.
1201        let base = vec![0xABu8; 65536];
1202        let mut delta = Vec::new();
1203        // src_size = 65536, encoded as 3 bytes little-endian varint
1204        delta.push(0x80 | (65536 & 0x7f) as u8); // 0
1205        delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); // 0x80
1206        delta.push(((65536 >> 14) & 0x7f) as u8); // 4
1207                                                  // dest_size = 65536, same
1208        delta.push(0x80 | (65536 & 0x7f) as u8);
1209        delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1210        delta.push(((65536 >> 14) & 0x7f) as u8);
1211        // COPY: offset=0 (no offset bytes), size=0 (no size bytes) → means 0x10000
1212        // cmd = 0x80 (no offset/size bytes present at all → offset=0, size=0→65536)
1213        delta.push(0x80u8);
1214
1215        let result = apply_delta(&base, &delta).unwrap();
1216        assert_eq!(result.len(), 65536);
1217        assert!(result.iter().all(|&b| b == 0xAB));
1218    }
1219
1220    #[test]
1221    fn test_unpack_objects_blobs() {
1222        use tempfile::TempDir;
1223        let tmp = TempDir::new().unwrap();
1224        let objects_dir = tmp.path().join("objects");
1225        std::fs::create_dir_all(&objects_dir).unwrap();
1226        let odb = Odb::new(&objects_dir);
1227
1228        let pack = make_pack(&[
1229            (ObjectKind::Blob, b"hello\n"),
1230            (ObjectKind::Blob, b"world\n"),
1231        ]);
1232
1233        let opts = UnpackOptions::default();
1234        let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1235        assert_eq!(count, 2);
1236
1237        // Verify both blobs can be read back.
1238        let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1239        let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1240        let obj1 = odb.read(&oid1).unwrap();
1241        let obj2 = odb.read(&oid2).unwrap();
1242        assert_eq!(obj1.data, b"hello\n");
1243        assert_eq!(obj2.data, b"world\n");
1244    }
1245
1246    #[test]
1247    fn test_unpack_objects_empty_tree() {
1248        use tempfile::TempDir;
1249        let tmp = TempDir::new().unwrap();
1250        let objects_dir = tmp.path().join("objects");
1251        std::fs::create_dir_all(&objects_dir).unwrap();
1252        let odb = Odb::new(&objects_dir);
1253
1254        let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1255        let opts = UnpackOptions::default();
1256        assert_eq!(
1257            unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1258            1
1259        );
1260        let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1261        assert!(odb.exists(&oid));
1262        let loose = objects_dir
1263            .join(oid.loose_prefix())
1264            .join(oid.loose_suffix());
1265        assert!(
1266            loose.is_file(),
1267            "empty tree must be materialized as a loose object during unpack"
1268        );
1269    }
1270
1271    #[test]
1272    fn test_strict_skips_gitlink_tree_entries() {
1273        use crate::index::{MODE_GITLINK, MODE_REGULAR};
1274        use crate::objects::{serialize_tree, TreeEntry};
1275
1276        // A submodule commit oid that is NOT in the pack/ODB (lives in the
1277        // submodule repository, like a 160000 gitlink target on push).
1278        let submodule_oid = ObjectId::from_hex(&"7f".repeat(20)).unwrap();
1279
1280        // Superproject tree referencing the submodule via a gitlink entry.
1281        let tree_data = serialize_tree(&[TreeEntry {
1282            mode: MODE_GITLINK,
1283            name: b"sub".to_vec(),
1284            oid: submodule_oid,
1285        }]);
1286        let tree_oid = Odb::hash_object_data(ObjectKind::Tree, &tree_data);
1287
1288        // Strict connectivity must NOT flag the gitlink target as missing,
1289        // matching upstream git (git/fsck.c skips S_ISGITLINK entries).
1290        let mut pack = HashMap::new();
1291        pack.insert(tree_oid, (ObjectKind::Tree, tree_data.clone()));
1292        assert!(strict_verify_packed_references(None, &pack).is_ok());
1293
1294        // Regression guard: a non-gitlink (regular file) entry pointing at an
1295        // absent blob must still be reported as a strict connectivity error.
1296        let bad_tree = serialize_tree(&[TreeEntry {
1297            mode: MODE_REGULAR,
1298            name: b"file".to_vec(),
1299            oid: ObjectId::from_hex(&"ab".repeat(20)).unwrap(),
1300        }]);
1301        let bad_oid = Odb::hash_object_data(ObjectKind::Tree, &bad_tree);
1302        let mut bad_pack = HashMap::new();
1303        bad_pack.insert(bad_oid, (ObjectKind::Tree, bad_tree));
1304        assert!(matches!(
1305            strict_verify_packed_references(None, &bad_pack),
1306            Err(Error::CorruptObject(_))
1307        ));
1308    }
1309
1310    /// `Read` that returns at most `max_len` bytes per call (simulates side-band chunking).
1311    struct ChunkedReader<'a> {
1312        data: &'a [u8],
1313        pos: usize,
1314        max_len: usize,
1315    }
1316
1317    impl io::Read for ChunkedReader<'_> {
1318        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1319            if self.pos >= self.data.len() {
1320                return Ok(0);
1321            }
1322            let take = (self.data.len() - self.pos)
1323                .min(self.max_len)
1324                .min(buf.len());
1325            buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1326            self.pos += take;
1327            Ok(take)
1328        }
1329    }
1330
1331    #[test]
1332    fn test_unpack_objects_chunked_read_matches_full_buffer() {
1333        use tempfile::TempDir;
1334        let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1335        let opts = UnpackOptions::default();
1336        let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1337
1338        let tmp = TempDir::new().unwrap();
1339        let objects_dir = tmp.path().join("objects");
1340        std::fs::create_dir_all(&objects_dir).unwrap();
1341        let odb = Odb::new(&objects_dir);
1342        assert_eq!(
1343            unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1344            1
1345        );
1346        assert!(odb.exists(&oid));
1347
1348        let tmp2 = TempDir::new().unwrap();
1349        let objects_dir2 = tmp2.path().join("objects");
1350        std::fs::create_dir_all(&objects_dir2).unwrap();
1351        let odb2 = Odb::new(&objects_dir2);
1352        let mut chunked = ChunkedReader {
1353            data: pack.as_slice(),
1354            pos: 0,
1355            max_len: 8,
1356        };
1357        assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1358        assert!(odb2.exists(&oid));
1359    }
1360
1361    #[test]
1362    fn test_unpack_objects_dry_run_writes_nothing() {
1363        use tempfile::TempDir;
1364        let tmp = TempDir::new().unwrap();
1365        let objects_dir = tmp.path().join("objects");
1366        std::fs::create_dir_all(&objects_dir).unwrap();
1367        let odb = Odb::new(&objects_dir);
1368
1369        let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1370
1371        let opts = UnpackOptions {
1372            dry_run: true,
1373            quiet: true,
1374            strict: false,
1375            max_input_bytes: None,
1376        };
1377        let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1378        assert_eq!(count, 1);
1379
1380        // Nothing should be written.
1381        let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1382        assert!(!odb.exists(&oid));
1383    }
1384
1385    #[test]
1386    fn test_unpack_objects_bad_signature() {
1387        use tempfile::TempDir;
1388        let tmp = TempDir::new().unwrap();
1389        let objects_dir = tmp.path().join("objects");
1390        std::fs::create_dir_all(&objects_dir).unwrap();
1391        let odb = Odb::new(&objects_dir);
1392
1393        let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1394        bad.extend_from_slice(&[0u8; 20]);
1395        let opts = UnpackOptions::default();
1396        let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1397        assert!(err.to_string().contains("invalid signature"));
1398    }
1399
1400    #[test]
1401    fn test_unpack_objects_checksum_mismatch() {
1402        use tempfile::TempDir;
1403        let tmp = TempDir::new().unwrap();
1404        let objects_dir = tmp.path().join("objects");
1405        std::fs::create_dir_all(&objects_dir).unwrap();
1406        let odb = Odb::new(&objects_dir);
1407
1408        let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1409        // Corrupt the trailing checksum.
1410        let n = pack.len();
1411        pack[n - 1] ^= 0xFF;
1412
1413        let opts = UnpackOptions::default();
1414        let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1415        assert!(err.to_string().contains("checksum"));
1416    }
1417
1418    #[test]
1419    fn test_apply_delta_source_size_mismatch() {
1420        let base = b"hi";
1421        let delta = [3u8, 2u8, 2u8, b'h', b'i']; // src_size=3 != base.len()=2
1422        let err = apply_delta(base, &delta).unwrap_err();
1423        assert!(err.to_string().contains("source size"));
1424    }
1425}