Skip to main content

grit_lib/
unpack_objects.rs

1//! `unpack-objects`: unpack a pack stream into loose objects.
2//!
3//! Reads a pack-format byte stream, validates the trailing checksum, and
4//! writes each object as a loose file in the object database.  Delta objects
5//! (both `OFS_DELTA` and `REF_DELTA`) are resolved against already-unpacked
6//! objects or objects already present in the ODB.
7//!
8//! Large blobs are written to the ODB and dropped from the in-memory maps so
9//! cloning multi-gigabyte repositories does not require holding the full pack
10//! in RAM (streaming read + bounded retention).
11
12use std::borrow::Cow;
13use std::collections::HashMap;
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19
20use crate::error::{Error, Result};
21use crate::gitmodules;
22use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
23use crate::odb::Odb;
24
25/// Options controlling `unpack-objects` behaviour.
26#[derive(Debug, Default)]
27pub struct UnpackOptions {
28    /// Validate and decompress objects but do not write them to the ODB.
29    pub dry_run: bool,
30    /// Suppress informational output.
31    pub quiet: bool,
32    /// Reject packs whose commits/trees/tags reference missing objects.
33    pub strict: bool,
34    /// Maximum number of raw pack bytes that may be consumed (including the 20-byte trailer).
35    ///
36    /// Matches Git's `unpack-objects --max-input-size` / `receive.maxInputSize`: counts every
37    /// byte read from the pack stream after crossing the limit. `None` means no limit.
38    pub max_input_bytes: Option<u64>,
39}
40
41/// A delta that could not yet be resolved because its base was not yet known.
42struct PendingDelta {
43    /// Byte offset of this object in the pack stream (used to anchor
44    /// `OFS_DELTA` back-references from later objects).
45    offset: usize,
46    /// For `REF_DELTA`: SHA-1 of the base object.
47    base_oid: Option<ObjectId>,
48    /// For `OFS_DELTA`: absolute byte offset of the base object.
49    base_offset: Option<usize>,
50    /// Decompressed delta data.
51    delta_data: Vec<u8>,
52}
53
54/// Unpack a pack stream from `reader` into `odb`.
55///
56/// Reads the complete pack from `reader`, validates the trailing SHA-1
57/// checksum, unpacks all objects (including full delta-chain resolution), and —
58/// unless [`UnpackOptions::dry_run`] is set — writes each object to `odb`.
59///
60/// Returns the total number of objects processed.
61///
62/// # Errors
63///
64/// - [`Error::CorruptObject`] — invalid pack format, checksum mismatch, or
65///   unresolvable delta chains.
66/// - [`Error::Io`] — I/O failure reading from `reader`.
67/// - [`Error::Zlib`] — decompression failure.
68pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
69    /// Blobs larger than this stay on disk only (after write) so huge packs do
70    /// not retain every blob in RAM. Smaller objects are kept for delta bases
71    /// and `--strict` graph walks without extra ODB reads.
72    const MAX_RETAIN_BYTES: usize = 1024 * 1024;
73
74    let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes);
75
76    // Validate magic and version.
77    let sig = rd.read_exact_n(4)?;
78    if sig != b"PACK" {
79        return Err(Error::CorruptObject(
80            "not a pack stream: invalid signature".to_owned(),
81        ));
82    }
83    let version = rd.read_u32_be()?;
84    if version != 2 && version != 3 {
85        return Err(Error::CorruptObject(format!(
86            "unsupported pack version {version}"
87        )));
88    }
89    let nr_objects = rd.read_u32_be()? as usize;
90
91    // pack-stream offset → resolved object (see [`PackedObjectEntry`]).
92    let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
93    // ObjectId → in-pack object for REF_DELTA resolution and strict checks.
94    let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
95
96    let mut pending: Vec<PendingDelta> = Vec::new();
97    let mut count = 0usize;
98
99    for _ in 0..nr_objects {
100        let obj_offset = rd.stream_pos();
101        let (type_code, size) = rd.read_type_size()?;
102
103        match type_code {
104            1..=4 => {
105                let kind = type_code_to_kind(type_code)?;
106                let data = rd.decompress(size)?;
107                let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
108                let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
109                by_offset.insert(obj_offset, entry.clone());
110                by_oid.insert(oid, entry);
111                count += 1;
112            }
113            6 => {
114                // OFS_DELTA: base at a negative encoded offset from this object.
115                let neg = rd.read_ofs_neg_offset()?;
116                let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
117                    Error::CorruptObject("ofs-delta base offset underflow".to_owned())
118                })?;
119                let delta_data = rd.decompress(size)?;
120                pending.push(PendingDelta {
121                    offset: obj_offset,
122                    base_oid: None,
123                    base_offset: Some(base_offset),
124                    delta_data,
125                });
126            }
127            7 => {
128                // REF_DELTA: base identified by its SHA-1.
129                let base_bytes = rd.read_exact_n(20)?;
130                let base_oid = ObjectId::from_bytes(&base_bytes)?;
131                let delta_data = rd.decompress(size)?;
132                pending.push(PendingDelta {
133                    offset: obj_offset,
134                    base_oid: Some(base_oid),
135                    base_offset: None,
136                    delta_data,
137                });
138            }
139            other => {
140                return Err(Error::CorruptObject(format!(
141                    "unknown packed-object type {other}"
142                )))
143            }
144        }
145    }
146
147    // Trailing pack checksum (SHA-1 of all preceding bytes); not included in the hash.
148    let digest = rd.finalize_hasher();
149    let trailing = rd.read_trailer_20()?;
150    if digest.as_slice() != trailing {
151        return Err(Error::CorruptObject(
152            "pack trailing checksum mismatch".to_owned(),
153        ));
154    }
155
156    // Resolve pending deltas iteratively.  Each pass resolves all deltas whose
157    // base is now known; repeat until none remain or we stall (corrupt pack).
158    let mut remaining = pending;
159    loop {
160        if remaining.is_empty() {
161            break;
162        }
163        let before = remaining.len();
164        let mut still_pending: Vec<PendingDelta> = Vec::new();
165
166        for delta in remaining {
167            let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
168                if let Some(base_off) = delta.base_offset {
169                    by_offset
170                        .get(&base_off)
171                        .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
172                } else if let Some(ref base_id) = delta.base_oid {
173                    if let Some(e) = by_oid.get(base_id) {
174                        Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
175                    } else if !opts.dry_run {
176                        odb.read(base_id)
177                            .ok()
178                            .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
179                    } else {
180                        None
181                    }
182                } else {
183                    None
184                };
185
186            match base_res {
187                Some(Ok((base_kind, base_data))) => {
188                    let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
189                    let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
190                    let new_entry = packed_entry_after_write(
191                        base_kind,
192                        result,
193                        oid,
194                        odb,
195                        opts,
196                        MAX_RETAIN_BYTES,
197                    );
198                    by_offset.insert(delta.offset, new_entry.clone());
199                    by_oid.insert(oid, new_entry);
200                    count += 1;
201                }
202                Some(Err(e)) => return Err(e),
203                None => still_pending.push(delta),
204            }
205        }
206
207        remaining = still_pending;
208        if remaining.len() == before {
209            return Err(Error::CorruptObject(format!(
210                "{} delta(s) could not be resolved",
211                remaining.len()
212            )));
213        }
214    }
215
216    if opts.strict {
217        let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
218            HashMap::with_capacity(by_oid.len());
219        for (oid, entry) in &by_oid {
220            let kind = entry.kind();
221            let data = match entry {
222                PackedObjectEntry::InMemory { data, .. } => data.clone(),
223                PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
224            };
225            dot_fsck_map.insert(*oid, (kind, data));
226        }
227        gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
228        strict_verify_packed_references_map(Some(odb), &by_oid)?;
229    }
230
231    Ok(count)
232}
233
234/// Resolved non-delta object: either full bytes in memory or a large blob on disk.
235#[derive(Debug, Clone)]
236enum PackedObjectEntry {
237    InMemory { kind: ObjectKind, data: Vec<u8> },
238    BlobOnDisk { oid: ObjectId },
239}
240
241impl PackedObjectEntry {
242    fn kind(&self) -> ObjectKind {
243        match self {
244            PackedObjectEntry::InMemory { kind, .. } => *kind,
245            PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
246        }
247    }
248}
249
250fn packed_entry_after_write(
251    kind: ObjectKind,
252    data: Vec<u8>,
253    oid: ObjectId,
254    _odb: &Odb,
255    opts: &UnpackOptions,
256    max_retain: usize,
257) -> PackedObjectEntry {
258    if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
259        PackedObjectEntry::BlobOnDisk { oid }
260    } else {
261        PackedObjectEntry::InMemory { kind, data }
262    }
263}
264
265fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
266    match entry {
267        PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
268        PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
269    }
270}
271
272fn strict_verify_packed_references_map(
273    odb: Option<&Odb>,
274    pack: &HashMap<ObjectId, PackedObjectEntry>,
275) -> Result<()> {
276    for entry in pack.values() {
277        match entry {
278            PackedObjectEntry::BlobOnDisk { .. } => {}
279            PackedObjectEntry::InMemory { kind, data } => match kind {
280                ObjectKind::Tree => {
281                    for e in parse_tree(data)? {
282                        if !strict_ref_resolves_map(&e.oid, pack, odb) {
283                            return Err(Error::CorruptObject(format!(
284                                "strict: missing object {} referenced by tree",
285                                e.oid.to_hex()
286                            )));
287                        }
288                    }
289                }
290                ObjectKind::Commit => {
291                    let c = parse_commit(data)?;
292                    if !strict_ref_resolves_map(&c.tree, pack, odb) {
293                        return Err(Error::CorruptObject(format!(
294                            "strict: missing tree {} referenced by commit",
295                            c.tree.to_hex()
296                        )));
297                    }
298                    for p in &c.parents {
299                        if !strict_ref_resolves_map(p, pack, odb) {
300                            return Err(Error::CorruptObject(format!(
301                                "strict: missing parent {} referenced by commit",
302                                p.to_hex()
303                            )));
304                        }
305                    }
306                }
307                ObjectKind::Tag => {
308                    let t = parse_tag(data)?;
309                    if !strict_ref_resolves_map(&t.object, pack, odb) {
310                        return Err(Error::CorruptObject(format!(
311                            "strict: missing object {} referenced by tag",
312                            t.object.to_hex()
313                        )));
314                    }
315                }
316                ObjectKind::Blob => {}
317            },
318        }
319    }
320    Ok(())
321}
322
323fn strict_ref_resolves_map(
324    oid: &ObjectId,
325    pack: &HashMap<ObjectId, PackedObjectEntry>,
326    odb: Option<&Odb>,
327) -> bool {
328    pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
329}
330
331fn strict_ref_resolves(
332    oid: &ObjectId,
333    pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
334    odb: Option<&Odb>,
335) -> bool {
336    pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
337}
338
339/// Verifies that references from commits, trees, and tags resolve to objects present in `pack`
340/// or, when `odb` is [`Some`], to loose objects in that database.
341///
342/// Use [`None`] for `odb` when indexing or unpacking in a context with no repository (Git allows
343/// `index-pack --strict` outside a work tree when the pack is self-contained).
344pub fn strict_verify_packed_references(
345    odb: Option<&Odb>,
346    pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
347) -> Result<()> {
348    for (kind, data) in pack.values() {
349        match kind {
350            ObjectKind::Tree => {
351                for e in parse_tree(data)? {
352                    if !strict_ref_resolves(&e.oid, pack, odb) {
353                        return Err(Error::CorruptObject(format!(
354                            "strict: missing object {} referenced by tree",
355                            e.oid.to_hex()
356                        )));
357                    }
358                }
359            }
360            ObjectKind::Commit => {
361                let c = parse_commit(data)?;
362                if !strict_ref_resolves(&c.tree, pack, odb) {
363                    return Err(Error::CorruptObject(format!(
364                        "strict: missing tree {} referenced by commit",
365                        c.tree.to_hex()
366                    )));
367                }
368                for p in &c.parents {
369                    if !strict_ref_resolves(p, pack, odb) {
370                        return Err(Error::CorruptObject(format!(
371                            "strict: missing parent {} referenced by commit",
372                            p.to_hex()
373                        )));
374                    }
375                }
376            }
377            ObjectKind::Tag => {
378                let t = parse_tag(data)?;
379                if !strict_ref_resolves(&t.object, pack, odb) {
380                    return Err(Error::CorruptObject(format!(
381                        "strict: missing object {} referenced by tag",
382                        t.object.to_hex()
383                    )));
384                }
385            }
386            ObjectKind::Blob => {}
387        }
388    }
389    Ok(())
390}
391
392/// Parse a pack byte stream and return every resolved object (after delta resolution) keyed by OID.
393///
394/// Does not write to any object database. Used for receive-pack connectivity checks before
395/// applying a push to the permanent ODB.
396///
397/// Thin-pack bases may be resolved from `odb` when they are not present in the pack.
398pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
399    let rd = PackReader::new(data.to_vec());
400    build_pack_object_map(rd, odb)
401}
402
403fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
404    let sig = rd.read_exact(4)?;
405    if sig != b"PACK" {
406        return Err(Error::CorruptObject(
407            "not a pack stream: invalid signature".to_owned(),
408        ));
409    }
410    let version = rd.read_u32_be()?;
411    if version != 2 && version != 3 {
412        return Err(Error::CorruptObject(format!(
413            "unsupported pack version {version}"
414        )));
415    }
416    let nr_objects = rd.read_u32_be()? as usize;
417
418    let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
419    let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
420    let mut pending: Vec<PendingDelta> = Vec::new();
421
422    fn base_from_pack_or_odb(
423        by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
424        odb: &Odb,
425        id: &ObjectId,
426    ) -> Option<(ObjectKind, Vec<u8>)> {
427        if let Some(e) = by_oid.get(id) {
428            return Some(e.clone());
429        }
430        odb.read(id).ok().map(|o| (o.kind, o.data))
431    }
432
433    for _ in 0..nr_objects {
434        let obj_offset = rd.pos;
435        let (type_code, size) = rd.read_type_size()?;
436
437        match type_code {
438            1..=4 => {
439                let kind = type_code_to_kind(type_code)?;
440                let data = rd.decompress(size)?;
441                let oid = Odb::hash_object_data(kind, &data);
442                by_offset.insert(obj_offset, (kind, data.clone()));
443                by_oid.insert(oid, (kind, data));
444            }
445            6 => {
446                let neg = rd.read_ofs_neg_offset()?;
447                let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
448                    Error::CorruptObject("ofs-delta base offset underflow".to_owned())
449                })?;
450                let delta_data = rd.decompress(size)?;
451                pending.push(PendingDelta {
452                    offset: obj_offset,
453                    base_oid: None,
454                    base_offset: Some(base_offset),
455                    delta_data,
456                });
457            }
458            7 => {
459                let base_bytes = rd.read_exact(20)?;
460                let base_oid = ObjectId::from_bytes(base_bytes)?;
461                let delta_data = rd.decompress(size)?;
462                pending.push(PendingDelta {
463                    offset: obj_offset,
464                    base_oid: Some(base_oid),
465                    base_offset: None,
466                    delta_data,
467                });
468            }
469            other => {
470                return Err(Error::CorruptObject(format!(
471                    "unknown packed-object type {other}"
472                )))
473            }
474        }
475    }
476
477    let consumed = rd.pos;
478    {
479        let mut hasher = Sha1::new();
480        hasher.update(&rd.data[..consumed]);
481        let digest = hasher.finalize();
482        let trailing = rd.read_exact(20)?;
483        if digest.as_slice() != trailing {
484            return Err(Error::CorruptObject(
485                "pack trailing checksum mismatch".to_owned(),
486            ));
487        }
488    }
489
490    let mut remaining = pending;
491    loop {
492        if remaining.is_empty() {
493            break;
494        }
495        let before = remaining.len();
496        let mut still_pending: Vec<PendingDelta> = Vec::new();
497
498        for delta in remaining {
499            let base = if let Some(base_off) = delta.base_offset {
500                by_offset.get(&base_off).cloned()
501            } else if let Some(ref base_id) = delta.base_oid {
502                base_from_pack_or_odb(&by_oid, odb, base_id)
503            } else {
504                None
505            };
506
507            if let Some((base_kind, base_data)) = base {
508                let result = apply_delta(&base_data, &delta.delta_data)?;
509                let oid = Odb::hash_object_data(base_kind, &result);
510                by_offset.insert(delta.offset, (base_kind, result.clone()));
511                by_oid.insert(oid, (base_kind, result));
512            } else {
513                still_pending.push(delta);
514            }
515        }
516
517        remaining = still_pending;
518        if remaining.len() == before {
519            return Err(Error::CorruptObject(format!(
520                "{} delta(s) could not be resolved",
521                remaining.len()
522            )));
523        }
524    }
525
526    Ok(by_oid
527        .into_iter()
528        .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
529        .collect())
530}
531
532/// Either write `data` as a loose object (if `!dry_run`) or just compute its
533/// [`ObjectId`] without touching the filesystem.
534fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
535    if dry_run {
536        Ok(Odb::hash_object_data(kind, data))
537    } else {
538        // Always materialize into this ODB: objects reachable only via alternates must still be
539        // written locally (matches git unpack-objects; t5519-push-alternates).
540        odb.write_local(kind, data)
541    }
542}
543
544/// Convert a pack object type code to an [`ObjectKind`].
545fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
546    match code {
547        1 => Ok(ObjectKind::Commit),
548        2 => Ok(ObjectKind::Tree),
549        3 => Ok(ObjectKind::Blob),
550        4 => Ok(ObjectKind::Tag),
551        _ => Err(Error::CorruptObject(format!(
552            "type code {code} is not a regular object type"
553        ))),
554    }
555}
556
557/// Low-level cursor over a buffered pack byte stream (in-memory pack parsing).
558struct PackReader {
559    data: Vec<u8>,
560    pos: usize,
561}
562
563impl PackReader {
564    fn new(data: Vec<u8>) -> Self {
565        Self { data, pos: 0 }
566    }
567
568    /// Read exactly `n` bytes and advance the cursor, returning a slice into
569    /// the internal buffer.
570    fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
571        if self.pos + n > self.data.len() {
572            return Err(Error::CorruptObject(format!(
573                "pack stream truncated: need {n} bytes at offset {}",
574                self.pos
575            )));
576        }
577        let slice = &self.data[self.pos..self.pos + n];
578        self.pos += n;
579        Ok(slice)
580    }
581
582    /// Read a single byte and advance the cursor.
583    fn read_byte(&mut self) -> Result<u8> {
584        if self.pos >= self.data.len() {
585            return Err(Error::CorruptObject(
586                "unexpected end of pack stream".to_owned(),
587            ));
588        }
589        let b = self.data[self.pos];
590        self.pos += 1;
591        Ok(b)
592    }
593
594    /// Read a big-endian `u32`.
595    fn read_u32_be(&mut self) -> Result<u32> {
596        let bytes = self.read_exact(4)?;
597        Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
598            Error::CorruptObject("u32 read failed".to_owned())
599        })?))
600    }
601
602    /// Read the packed-object type + size header (variable-length big-endian
603    /// encoding with the type in bits 4-6 of the first byte).
604    ///
605    /// Returns `(type_code, uncompressed_size)`.
606    fn read_type_size(&mut self) -> Result<(u8, usize)> {
607        let c = self.read_byte()?;
608        let type_code = (c >> 4) & 0x7;
609        let mut size = (c & 0x0f) as usize;
610        let mut shift = 4u32;
611        let mut cur = c;
612        while cur & 0x80 != 0 {
613            cur = self.read_byte()?;
614            size |= ((cur & 0x7f) as usize) << shift;
615            shift += 7;
616        }
617        Ok((type_code, size))
618    }
619
620    /// Read an `OFS_DELTA` negative-offset value.
621    ///
622    /// The encoding uses a big-endian variable-length integer with a +1 bias
623    /// on each continuation byte, yielding values ≥ 1.
624    fn read_ofs_neg_offset(&mut self) -> Result<usize> {
625        let mut c = self.read_byte()?;
626        let mut value = (c & 0x7f) as usize;
627        while c & 0x80 != 0 {
628            c = self.read_byte()?;
629            value = (value + 1) << 7 | (c & 0x7f) as usize;
630        }
631        Ok(value)
632    }
633
634    /// Decompress zlib-compressed data starting at the current cursor position.
635    ///
636    /// Advances the cursor by exactly the number of compressed bytes consumed.
637    /// Returns an error if the decompressed length differs from `expected_size`.
638    fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
639        let slice = &self.data[self.pos..];
640        let mut decoder = ZlibDecoder::new(slice);
641        let mut out = Vec::with_capacity(expected_size);
642        decoder
643            .read_to_end(&mut out)
644            .map_err(|e| Error::Zlib(e.to_string()))?;
645        if out.len() != expected_size {
646            return Err(Error::CorruptObject(format!(
647                "decompressed {} bytes but expected {}",
648                out.len(),
649                expected_size
650            )));
651        }
652        self.pos += decoder.total_in() as usize;
653        Ok(out)
654    }
655}
656
657fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
658    if e.kind() == io::ErrorKind::UnexpectedEof {
659        Error::CorruptObject(format!(
660            "pack stream truncated ({context}) at offset {stream_pos}"
661        ))
662    } else {
663        Error::Io(e)
664    }
665}
666
667/// Streaming cursor over a pack file: hashes body bytes incrementally (no full-buffer read).
668///
669/// Raw pack bytes are either consumed as object headers (via [`Self::read_byte`]) or as zlib
670/// payloads.  Zlib decoders may read ahead; overflow bytes stay in [`Self::pending`] so the next
671/// object header or zlib stream starts at the correct offset.
672struct StreamingPackReader<'a> {
673    inner: &'a mut dyn Read,
674    pack_hasher: Sha1,
675    stream_pos: usize,
676    max_input_bytes: Option<u64>,
677    /// Compressed (or other) bytes already read from `inner` and hashed but not yet consumed by
678    /// the current parsing step.
679    pending: Vec<u8>,
680}
681
682impl<'a> StreamingPackReader<'a> {
683    fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>) -> Self {
684        Self {
685            inner,
686            pack_hasher: Sha1::new(),
687            stream_pos: 0,
688            max_input_bytes,
689            pending: Vec::new(),
690        }
691    }
692
693    fn stream_pos(&self) -> usize {
694        self.stream_pos
695    }
696
697    fn enforce_max_input(&self) -> Result<()> {
698        if let Some(limit) = self.max_input_bytes {
699            let pos = u64::try_from(self.stream_pos)
700                .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
701            if pos > limit {
702                return Err(Error::CorruptObject(
703                    "pack exceeds maximum allowed size".to_owned(),
704                ));
705            }
706        }
707        Ok(())
708    }
709
710    /// Read pack-body bytes (hashed). Used for headers and non-zlib payload reads only.
711    fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
712        let n = if !self.pending.is_empty() {
713            let take = buf.len().min(self.pending.len());
714            buf[..take].copy_from_slice(&self.pending[..take]);
715            self.pending.drain(..take);
716            take
717        } else {
718            self.inner.read(buf).map_err(Error::Io)?
719        };
720        if n > 0 {
721            self.pack_hasher.update(&buf[..n]);
722            self.stream_pos += n;
723            self.enforce_max_input()?;
724        }
725        Ok(n)
726    }
727
728    fn read_byte(&mut self) -> Result<u8> {
729        let mut b = [0u8; 1];
730        let n = self.read_from_source(&mut b)?;
731        if n == 0 {
732            return Err(Error::CorruptObject(format!(
733                "pack stream truncated (read byte) at offset {}",
734                self.stream_pos
735            )));
736        }
737        Ok(b[0])
738    }
739
740    fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
741        let mut v = vec![0u8; n];
742        let mut got = 0usize;
743        while got < n {
744            let m = self.read_from_source(&mut v[got..n])?;
745            if m == 0 {
746                return Err(Error::CorruptObject(format!(
747                    "pack stream truncated (read exact) at offset {}",
748                    self.stream_pos
749                )));
750            }
751            got += m;
752        }
753        Ok(v)
754    }
755
756    fn read_u32_be(&mut self) -> Result<u32> {
757        let mut b = [0u8; 4];
758        let mut got = 0usize;
759        while got < 4 {
760            let m = self.read_from_source(&mut b[got..4])?;
761            if m == 0 {
762                return Err(Error::CorruptObject(format!(
763                    "pack stream truncated (read u32) at offset {}",
764                    self.stream_pos
765                )));
766            }
767            got += m;
768        }
769        Ok(u32::from_be_bytes(b))
770    }
771
772    fn read_type_size(&mut self) -> Result<(u8, usize)> {
773        let c = self.read_byte()?;
774        let type_code = (c >> 4) & 0x7;
775        let mut size = (c & 0x0f) as usize;
776        let mut shift = 4u32;
777        let mut cur = c;
778        while cur & 0x80 != 0 {
779            cur = self.read_byte()?;
780            size |= ((cur & 0x7f) as usize) << shift;
781            shift += 7;
782        }
783        Ok((type_code, size))
784    }
785
786    fn read_ofs_neg_offset(&mut self) -> Result<usize> {
787        let mut c = self.read_byte()?;
788        let mut value = (c & 0x7f) as usize;
789        while c & 0x80 != 0 {
790            c = self.read_byte()?;
791            value = (value + 1) << 7 | (c & 0x7f) as usize;
792        }
793        Ok(value)
794    }
795
796    /// Pull zlib-compressed bytes until one object inflates to `expected_size` bytes.
797    ///
798    /// Bytes read from `inner` into `pending` are not hashed until we know how many belong to the
799    /// zlib stream (`total_in()`). Lookahead past the zlib end (including the 20-byte pack
800    /// trailer) must never be fed to the pack checksum.
801    ///
802    /// When the pack arrives in small chunks (e.g. side-band-64k from `upload-pack`), `flate2` may
803    /// return an error before the full deflate stream is in `pending`. Retry after reading more
804    /// from `inner` (same idea as [`PackReader::decompress`], which sees the whole zlib at once).
805    fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
806        // `Read::read_exact` into an empty buffer returns `Ok` immediately without touching the
807        // decoder, so a 0-byte packed object would leave the zlib header in `pending` and desync
808        // the pack stream (bundle / clone unpack). Always run the zlib decoder once.
809        if expected_size == 0 {
810            const CHUNK: usize = 64 * 1024;
811            let mut scratch = [0u8; CHUNK];
812            loop {
813                let mut cursor = std::io::Cursor::new(self.pending.as_slice());
814                let mut z = ZlibDecoder::new(&mut cursor);
815                let mut sink = [0u8; 1];
816                match z.read(&mut sink) {
817                    Ok(0) => {
818                        let consumed = z.total_in() as usize;
819                        if consumed > self.pending.len() {
820                            return Err(Error::CorruptObject(
821                                "zlib total_in exceeds pending buffer".to_owned(),
822                            ));
823                        }
824                        if consumed == 0 {
825                            let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
826                            if n == 0 {
827                                return Err(Error::CorruptObject(format!(
828                                    "pack stream truncated (zlib) at offset {}",
829                                    self.stream_pos
830                                )));
831                            }
832                            self.pending.extend_from_slice(&scratch[..n]);
833                            continue;
834                        }
835                        self.pack_hasher.update(&self.pending[..consumed]);
836                        self.stream_pos += consumed;
837                        self.pending.drain(..consumed);
838                        self.enforce_max_input()?;
839                        return Ok(Vec::new());
840                    }
841                    Ok(_) => {
842                        return Err(Error::CorruptObject(
843                            "0-byte packed object inflated to non-empty output".to_owned(),
844                        ));
845                    }
846                    Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
847                        let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
848                        if n == 0 {
849                            return Err(Error::CorruptObject(format!(
850                                "pack stream truncated (zlib) at offset {}",
851                                self.stream_pos
852                            )));
853                        }
854                        self.pending.extend_from_slice(&scratch[..n]);
855                    }
856                    Err(e) => return Err(Error::Zlib(e.to_string())),
857                }
858            }
859        }
860
861        const CHUNK: usize = 64 * 1024;
862        let mut scratch = [0u8; CHUNK];
863
864        let mut out = vec![0u8; expected_size];
865        let mut z = Decompress::new(true);
866        let mut out_pos = 0usize;
867        let mut eof = false;
868        loop {
869            if self.pending.is_empty() && !eof {
870                let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
871                if n == 0 {
872                    eof = true;
873                } else {
874                    self.pending.extend_from_slice(&scratch[..n]);
875                }
876            }
877
878            let flush = if eof && self.pending.is_empty() {
879                FlushDecompress::Finish
880            } else {
881                FlushDecompress::None
882            };
883
884            let before_in = z.total_in();
885            let before_out = z.total_out();
886            let status = z
887                .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
888                .map_err(|e| Error::Zlib(e.to_string()))?;
889            let consumed = (z.total_in() - before_in) as usize;
890            if consumed > self.pending.len() {
891                return Err(Error::CorruptObject(
892                    "zlib consumed more than pending buffer".to_owned(),
893                ));
894            }
895            self.pack_hasher.update(&self.pending[..consumed]);
896            self.stream_pos += consumed;
897            self.pending.drain(..consumed);
898            self.enforce_max_input()?;
899            out_pos += (z.total_out() - before_out) as usize;
900
901            match status {
902                Status::StreamEnd => {
903                    if out_pos != expected_size {
904                        return Err(Error::CorruptObject(format!(
905                            "decompressed size mismatch: got {out_pos}, want {expected_size}"
906                        )));
907                    }
908                    return Ok(out);
909                }
910                Status::Ok | Status::BufError => {
911                    if consumed == 0 && !eof {
912                        let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
913                        if n == 0 {
914                            eof = true;
915                        } else {
916                            self.pending.extend_from_slice(&scratch[..n]);
917                        }
918                    } else if eof && self.pending.is_empty() && out_pos != expected_size {
919                        return Err(Error::CorruptObject(format!(
920                            "pack stream truncated (zlib) at offset {}",
921                            self.stream_pos
922                        )));
923                    }
924                }
925            }
926        }
927    }
928
929    /// SHA-1 over all pack bytes read so far (objects only; trailer not yet read).
930    fn finalize_hasher(
931        &self,
932    ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
933        self.pack_hasher.clone().finalize()
934    }
935
936    /// Trailing pack checksum; not included in [`Self::finalize_hasher`].
937    fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
938        let mut b = [0u8; 20];
939        if self.pending.len() >= 20 {
940            b.copy_from_slice(&self.pending[..20]);
941            self.pending.drain(..20);
942            self.stream_pos += 20;
943            self.enforce_max_input()?;
944            return Ok(b);
945        }
946        let tail = self.pending.len();
947        if tail > 0 {
948            b[..tail].copy_from_slice(&self.pending[..]);
949            self.pending.clear();
950        }
951        self.inner
952            .read_exact(&mut b[tail..])
953            .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
954        self.stream_pos += 20;
955        self.enforce_max_input()?;
956        Ok(b)
957    }
958}
959
960/// Apply a git "patch delta" to `base`, producing the patched result.
961///
962/// The delta binary format is:
963/// 1. Source size: variable-length little-endian integer (must equal
964///    `base.len()`).
965/// 2. Destination size: variable-length little-endian integer.
966/// 3. A sequence of COPY (MSB set) and INSERT (MSB clear) instructions.
967///
968/// # Errors
969///
970/// Returns [`Error::CorruptObject`] if the delta is malformed, the source-size
971/// field does not match `base.len()`, or the result length does not match the
972/// declared destination size.
973pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
974    let mut pos = 0usize;
975
976    let src_size = read_delta_varint(delta, &mut pos)?;
977    if src_size != base.len() {
978        return Err(Error::CorruptObject(format!(
979            "delta source size {src_size} != base size {}",
980            base.len()
981        )));
982    }
983    let dest_size = read_delta_varint(delta, &mut pos)?;
984    let mut result = Vec::with_capacity(dest_size);
985
986    while pos < delta.len() {
987        let cmd = delta[pos];
988        pos += 1;
989        if cmd == 0 {
990            return Err(Error::CorruptObject(
991                "reserved opcode 0 in delta stream".to_owned(),
992            ));
993        }
994        if cmd & 0x80 != 0 {
995            // COPY instruction: up to 4 offset bytes (bits 0-3) and up to 3
996            // size bytes (bits 4-6) are present, each controlled by a flag bit.
997            let mut offset = 0usize;
998            let mut size = 0usize;
999
1000            macro_rules! maybe_read_byte {
1001                ($flag:expr, $shift:expr, $target:expr) => {
1002                    if cmd & $flag != 0 {
1003                        let b = *delta.get(pos).ok_or_else(|| {
1004                            Error::CorruptObject("truncated delta COPY operand".to_owned())
1005                        })?;
1006                        pos += 1;
1007                        $target |= (b as usize) << $shift;
1008                    }
1009                };
1010            }
1011
1012            maybe_read_byte!(0x01, 0, offset);
1013            maybe_read_byte!(0x02, 8, offset);
1014            maybe_read_byte!(0x04, 16, offset);
1015            maybe_read_byte!(0x08, 24, offset);
1016            maybe_read_byte!(0x10, 0, size);
1017            maybe_read_byte!(0x20, 8, size);
1018            maybe_read_byte!(0x40, 16, size);
1019
1020            if size == 0 {
1021                size = 0x10000;
1022            }
1023
1024            let end = offset.checked_add(size).ok_or_else(|| {
1025                Error::CorruptObject("delta COPY range overflows usize".to_owned())
1026            })?;
1027            let chunk = base.get(offset..end).ok_or_else(|| {
1028                Error::CorruptObject(format!(
1029                    "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1030                    base.len()
1031                ))
1032            })?;
1033            result.extend_from_slice(chunk);
1034        } else {
1035            // INSERT instruction: copy the next `cmd` literal bytes verbatim.
1036            let n = cmd as usize;
1037            let chunk = delta
1038                .get(pos..pos + n)
1039                .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1040            result.extend_from_slice(chunk);
1041            pos += n;
1042        }
1043    }
1044
1045    if result.len() != dest_size {
1046        return Err(Error::CorruptObject(format!(
1047            "delta produced {} bytes but expected {dest_size}",
1048            result.len()
1049        )));
1050    }
1051
1052    Ok(result)
1053}
1054
1055/// Read a variable-length little-endian integer from `data` starting at `*pos`.
1056///
1057/// Advances `*pos` past the consumed bytes.
1058fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1059    let mut value = 0usize;
1060    let mut shift = 0u32;
1061    loop {
1062        let b = *data
1063            .get(*pos)
1064            .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1065        *pos += 1;
1066        value |= ((b & 0x7f) as usize) << shift;
1067        shift += 7;
1068        if b & 0x80 == 0 {
1069            break;
1070        }
1071    }
1072    Ok(value)
1073}
1074
1075#[cfg(test)]
1076mod tests {
1077    use super::*;
1078
1079    // Helper: build a minimal pack from a list of (kind, data) pairs.
1080    // Returns the raw pack bytes.
1081    fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1082        use flate2::write::ZlibEncoder;
1083        use std::io::Write;
1084
1085        let mut entries: Vec<Vec<u8>> = Vec::new();
1086        for (kind, data) in objects {
1087            let type_code: u8 = match kind {
1088                ObjectKind::Commit => 1,
1089                ObjectKind::Tree => 2,
1090                ObjectKind::Blob => 3,
1091                ObjectKind::Tag => 4,
1092            };
1093            // Encode type+size header.
1094            let mut header = Vec::new();
1095            let mut size = data.len();
1096            let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1097            size >>= 4;
1098            if size > 0 {
1099                header.push(first | 0x80);
1100                while size > 0 {
1101                    let b = (size & 0x7f) as u8;
1102                    size >>= 7;
1103                    header.push(if size > 0 { b | 0x80 } else { b });
1104                }
1105            } else {
1106                header.push(first);
1107            }
1108            // zlib-compress data.
1109            let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1110            enc.write_all(data).unwrap();
1111            let compressed = enc.finish().unwrap();
1112            let mut entry = header;
1113            entry.extend_from_slice(&compressed);
1114            entries.push(entry);
1115        }
1116
1117        // Assemble: PACK + version(2) + count + entries + SHA-1.
1118        let mut pack = Vec::new();
1119        pack.extend_from_slice(b"PACK");
1120        pack.extend_from_slice(&2u32.to_be_bytes());
1121        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1122        for entry in &entries {
1123            pack.extend_from_slice(entry);
1124        }
1125        let mut hasher = Sha1::new();
1126        hasher.update(&pack);
1127        let digest = hasher.finalize();
1128        pack.extend_from_slice(digest.as_slice());
1129        pack
1130    }
1131
1132    #[test]
1133    fn test_apply_delta_simple() {
1134        // Build a trivial delta: insert "hello world".
1135        let base = b"hello";
1136        let mut delta = Vec::new();
1137        // src_size = 5
1138        delta.push(5u8);
1139        // dest_size = 11
1140        delta.push(11u8);
1141        // COPY instruction: copy base[0..5]
1142        // cmd = 0x80 | 0x01 (offset present, byte 0) | 0x10 (size byte 0)
1143        delta.push(0x80 | 0x01 | 0x10); // 0x91
1144        delta.push(0u8); // offset = 0
1145        delta.push(5u8); // size = 5
1146                         // INSERT " world" (6 bytes)
1147        delta.push(6u8);
1148        delta.extend_from_slice(b" world");
1149
1150        let result = apply_delta(base, &delta).unwrap();
1151        assert_eq!(result, b"hello world");
1152    }
1153
1154    #[test]
1155    fn test_apply_delta_insert_only() {
1156        let base = b"";
1157        let mut delta = Vec::new();
1158        delta.push(0u8); // src_size = 0
1159        delta.push(5u8); // dest_size = 5
1160        delta.push(5u8); // INSERT 5 bytes
1161        delta.extend_from_slice(b"hello");
1162
1163        let result = apply_delta(base, &delta).unwrap();
1164        assert_eq!(result, b"hello");
1165    }
1166
1167    #[test]
1168    fn test_apply_delta_copy_only() {
1169        let base = b"abcdef";
1170        let mut delta = Vec::new();
1171        delta.push(6u8); // src_size = 6
1172        delta.push(3u8); // dest_size = 3
1173                         // COPY base[2..5]: offset=2, size=3
1174                         // cmd = 0x80 | 0x01 | 0x10
1175        delta.push(0x91u8);
1176        delta.push(2u8); // offset = 2
1177        delta.push(3u8); // size = 3
1178
1179        let result = apply_delta(base, &delta).unwrap();
1180        assert_eq!(result, b"cde");
1181    }
1182
1183    #[test]
1184    fn test_apply_delta_size_zero_means_65536() {
1185        // A COPY with size bytes all zero means 0x10000 = 65536.
1186        let base = vec![0xABu8; 65536];
1187        let mut delta = Vec::new();
1188        // src_size = 65536, encoded as 3 bytes little-endian varint
1189        delta.push(0x80 | (65536 & 0x7f) as u8); // 0
1190        delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); // 0x80
1191        delta.push(((65536 >> 14) & 0x7f) as u8); // 4
1192                                                  // dest_size = 65536, same
1193        delta.push(0x80 | (65536 & 0x7f) as u8);
1194        delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1195        delta.push(((65536 >> 14) & 0x7f) as u8);
1196        // COPY: offset=0 (no offset bytes), size=0 (no size bytes) → means 0x10000
1197        // cmd = 0x80 (no offset/size bytes present at all → offset=0, size=0→65536)
1198        delta.push(0x80u8);
1199
1200        let result = apply_delta(&base, &delta).unwrap();
1201        assert_eq!(result.len(), 65536);
1202        assert!(result.iter().all(|&b| b == 0xAB));
1203    }
1204
1205    #[test]
1206    fn test_unpack_objects_blobs() {
1207        use tempfile::TempDir;
1208        let tmp = TempDir::new().unwrap();
1209        let objects_dir = tmp.path().join("objects");
1210        std::fs::create_dir_all(&objects_dir).unwrap();
1211        let odb = Odb::new(&objects_dir);
1212
1213        let pack = make_pack(&[
1214            (ObjectKind::Blob, b"hello\n"),
1215            (ObjectKind::Blob, b"world\n"),
1216        ]);
1217
1218        let opts = UnpackOptions::default();
1219        let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1220        assert_eq!(count, 2);
1221
1222        // Verify both blobs can be read back.
1223        let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1224        let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1225        let obj1 = odb.read(&oid1).unwrap();
1226        let obj2 = odb.read(&oid2).unwrap();
1227        assert_eq!(obj1.data, b"hello\n");
1228        assert_eq!(obj2.data, b"world\n");
1229    }
1230
1231    #[test]
1232    fn test_unpack_objects_empty_tree() {
1233        use tempfile::TempDir;
1234        let tmp = TempDir::new().unwrap();
1235        let objects_dir = tmp.path().join("objects");
1236        std::fs::create_dir_all(&objects_dir).unwrap();
1237        let odb = Odb::new(&objects_dir);
1238
1239        let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1240        let opts = UnpackOptions::default();
1241        assert_eq!(
1242            unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1243            1
1244        );
1245        let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1246        assert!(odb.exists(&oid));
1247    }
1248
1249    /// `Read` that returns at most `max_len` bytes per call (simulates side-band chunking).
1250    struct ChunkedReader<'a> {
1251        data: &'a [u8],
1252        pos: usize,
1253        max_len: usize,
1254    }
1255
1256    impl io::Read for ChunkedReader<'_> {
1257        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1258            if self.pos >= self.data.len() {
1259                return Ok(0);
1260            }
1261            let take = (self.data.len() - self.pos)
1262                .min(self.max_len)
1263                .min(buf.len());
1264            buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1265            self.pos += take;
1266            Ok(take)
1267        }
1268    }
1269
1270    #[test]
1271    fn test_unpack_objects_chunked_read_matches_full_buffer() {
1272        use tempfile::TempDir;
1273        let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1274        let opts = UnpackOptions::default();
1275        let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1276
1277        let tmp = TempDir::new().unwrap();
1278        let objects_dir = tmp.path().join("objects");
1279        std::fs::create_dir_all(&objects_dir).unwrap();
1280        let odb = Odb::new(&objects_dir);
1281        assert_eq!(
1282            unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1283            1
1284        );
1285        assert!(odb.exists(&oid));
1286
1287        let tmp2 = TempDir::new().unwrap();
1288        let objects_dir2 = tmp2.path().join("objects");
1289        std::fs::create_dir_all(&objects_dir2).unwrap();
1290        let odb2 = Odb::new(&objects_dir2);
1291        let mut chunked = ChunkedReader {
1292            data: pack.as_slice(),
1293            pos: 0,
1294            max_len: 8,
1295        };
1296        assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1297        assert!(odb2.exists(&oid));
1298    }
1299
1300    #[test]
1301    fn test_unpack_objects_dry_run_writes_nothing() {
1302        use tempfile::TempDir;
1303        let tmp = TempDir::new().unwrap();
1304        let objects_dir = tmp.path().join("objects");
1305        std::fs::create_dir_all(&objects_dir).unwrap();
1306        let odb = Odb::new(&objects_dir);
1307
1308        let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1309
1310        let opts = UnpackOptions {
1311            dry_run: true,
1312            quiet: true,
1313            strict: false,
1314            max_input_bytes: None,
1315        };
1316        let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1317        assert_eq!(count, 1);
1318
1319        // Nothing should be written.
1320        let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1321        assert!(!odb.exists(&oid));
1322    }
1323
1324    #[test]
1325    fn test_unpack_objects_bad_signature() {
1326        use tempfile::TempDir;
1327        let tmp = TempDir::new().unwrap();
1328        let objects_dir = tmp.path().join("objects");
1329        std::fs::create_dir_all(&objects_dir).unwrap();
1330        let odb = Odb::new(&objects_dir);
1331
1332        let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1333        bad.extend_from_slice(&[0u8; 20]);
1334        let opts = UnpackOptions::default();
1335        let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1336        assert!(err.to_string().contains("invalid signature"));
1337    }
1338
1339    #[test]
1340    fn test_unpack_objects_checksum_mismatch() {
1341        use tempfile::TempDir;
1342        let tmp = TempDir::new().unwrap();
1343        let objects_dir = tmp.path().join("objects");
1344        std::fs::create_dir_all(&objects_dir).unwrap();
1345        let odb = Odb::new(&objects_dir);
1346
1347        let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1348        // Corrupt the trailing checksum.
1349        let n = pack.len();
1350        pack[n - 1] ^= 0xFF;
1351
1352        let opts = UnpackOptions::default();
1353        let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1354        assert!(err.to_string().contains("checksum"));
1355    }
1356
1357    #[test]
1358    fn test_apply_delta_source_size_mismatch() {
1359        let base = b"hi";
1360        let delta = [3u8, 2u8, 2u8, b'h', b'i']; // src_size=3 != base.len()=2
1361        let err = apply_delta(base, &delta).unwrap_err();
1362        assert!(err.to_string().contains("source size"));
1363    }
1364}