Skip to main content

grit_lib/
unpack_objects.rs

1//! `unpack-objects`: unpack a pack stream into loose objects.
2//!
3//! Reads a pack-format byte stream, validates the trailing checksum, and
4//! writes each object as a loose file in the object database.  Delta objects
5//! (both `OFS_DELTA` and `REF_DELTA`) are resolved against already-unpacked
6//! objects or objects already present in the ODB.
7//!
8//! Large blobs are written to the ODB and dropped from the in-memory maps so
9//! cloning multi-gigabyte repositories does not require holding the full pack
10//! in RAM (streaming read + bounded retention).
11
12use std::borrow::Cow;
13use std::collections::{HashMap, HashSet};
14use std::io::{self, Read};
15
16use flate2::read::ZlibDecoder;
17use flate2::{Decompress, FlushDecompress, Status};
18use sha1::{Digest, Sha1};
19
20use crate::error::{Error, Result};
21use crate::gitmodules;
22use crate::index::MODE_GITLINK;
23use crate::objects::{parse_commit, parse_tag, parse_tree, Object, ObjectId, ObjectKind};
24use crate::odb::Odb;
25
26/// Options controlling `unpack-objects` behaviour.
27#[derive(Debug, Default)]
28pub struct UnpackOptions {
29    /// Validate and decompress objects but do not write them to the ODB.
30    pub dry_run: bool,
31    /// Suppress informational output.
32    pub quiet: bool,
33    /// Reject packs whose commits/trees/tags reference missing objects.
34    pub strict: bool,
35    /// Object IDs that strict connectivity may treat as promised by a configured promisor remote.
36    pub allowed_missing: HashSet<ObjectId>,
37    /// Whether strict connectivity may tolerate references to missing objects in a promisor repo.
38    pub allow_promisor_missing_references: bool,
39    /// Maximum number of raw pack bytes that may be consumed (including the 20-byte trailer).
40    ///
41    /// Matches Git's `unpack-objects --max-input-size` / `receive.maxInputSize`: counts every
42    /// byte read from the pack stream after crossing the limit. `None` means no limit.
43    pub max_input_bytes: Option<u64>,
44    /// Commit OIDs that are shallow boundaries (grafts): their parents are intentionally absent and
45    /// must not be required during the `--strict` connectivity walk.
46    ///
47    /// Mirrors `unpack-objects --shallow-file <file>` in upstream `receive-pack`, where the shallow
48    /// file lists the commits whose parent objects were deliberately not transferred.
49    pub shallow_boundaries: HashSet<ObjectId>,
50}
51
52/// A delta that could not yet be resolved because its base was not yet known.
53struct PendingDelta {
54    /// Byte offset of this object in the pack stream (used to anchor
55    /// `OFS_DELTA` back-references from later objects).
56    offset: usize,
57    /// For `REF_DELTA`: SHA-1 of the base object.
58    base_oid: Option<ObjectId>,
59    /// For `OFS_DELTA`: absolute byte offset of the base object.
60    base_offset: Option<usize>,
61    /// Decompressed delta data.
62    delta_data: Vec<u8>,
63}
64
65/// Unpack a pack stream from `reader` into `odb`.
66///
67/// Reads the complete pack from `reader`, validates the trailing SHA-1
68/// checksum, unpacks all objects (including full delta-chain resolution), and —
69/// unless [`UnpackOptions::dry_run`] is set — writes each object to `odb`.
70///
71/// Returns the total number of objects processed.
72///
73/// # Errors
74///
75/// - [`Error::CorruptObject`] — invalid pack format, checksum mismatch, or
76///   unresolvable delta chains.
77/// - [`Error::Io`] — I/O failure reading from `reader`.
78/// - [`Error::Zlib`] — decompression failure.
79pub fn unpack_objects(reader: &mut dyn Read, odb: &Odb, opts: &UnpackOptions) -> Result<usize> {
80    /// Blobs larger than this stay on disk only (after write) so huge packs do
81    /// not retain every blob in RAM. Smaller objects are kept for delta bases
82    /// and `--strict` graph walks without extra ODB reads.
83    const MAX_RETAIN_BYTES: usize = 1024 * 1024;
84
85    let mut rd = StreamingPackReader::new(reader, opts.max_input_bytes);
86
87    // Validate magic and version.
88    let sig = rd.read_exact_n(4)?;
89    if sig != b"PACK" {
90        return Err(Error::CorruptObject(
91            "not a pack stream: invalid signature".to_owned(),
92        ));
93    }
94    let version = rd.read_u32_be()?;
95    if version != 2 && version != 3 {
96        return Err(Error::CorruptObject(format!(
97            "unsupported pack version {version}"
98        )));
99    }
100    let nr_objects = rd.read_u32_be()? as usize;
101
102    // pack-stream offset → resolved object (see [`PackedObjectEntry`]).
103    let mut by_offset: HashMap<usize, PackedObjectEntry> = HashMap::new();
104    // ObjectId → in-pack object for REF_DELTA resolution and strict checks.
105    let mut by_oid: HashMap<ObjectId, PackedObjectEntry> = HashMap::new();
106
107    let mut pending: Vec<PendingDelta> = Vec::new();
108    let mut count = 0usize;
109
110    for _ in 0..nr_objects {
111        let obj_offset = rd.stream_pos();
112        let (type_code, size) = rd.read_type_size()?;
113
114        match type_code {
115            1..=4 => {
116                let kind = type_code_to_kind(type_code)?;
117                let data = rd.decompress(size)?;
118                let oid = write_or_hash(kind, &data, odb, opts.dry_run)?;
119                let entry = packed_entry_after_write(kind, data, oid, odb, opts, MAX_RETAIN_BYTES);
120                by_offset.insert(obj_offset, entry.clone());
121                by_oid.insert(oid, entry);
122                count += 1;
123            }
124            6 => {
125                // OFS_DELTA: base at a negative encoded offset from this object.
126                let neg = rd.read_ofs_neg_offset()?;
127                let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
128                    Error::CorruptObject("ofs-delta base offset underflow".to_owned())
129                })?;
130                let delta_data = rd.decompress(size)?;
131                pending.push(PendingDelta {
132                    offset: obj_offset,
133                    base_oid: None,
134                    base_offset: Some(base_offset),
135                    delta_data,
136                });
137            }
138            7 => {
139                // REF_DELTA: base identified by its SHA-1.
140                let base_bytes = rd.read_exact_n(20)?;
141                let base_oid = ObjectId::from_bytes(&base_bytes)?;
142                let delta_data = rd.decompress(size)?;
143                pending.push(PendingDelta {
144                    offset: obj_offset,
145                    base_oid: Some(base_oid),
146                    base_offset: None,
147                    delta_data,
148                });
149            }
150            other => {
151                return Err(Error::CorruptObject(format!(
152                    "unknown packed-object type {other}"
153                )))
154            }
155        }
156    }
157
158    // Trailing pack checksum (SHA-1 of all preceding bytes); not included in the hash.
159    let digest = rd.finalize_hasher();
160    let trailing = rd.read_trailer_20()?;
161    if digest.as_slice() != trailing {
162        return Err(Error::CorruptObject(
163            "pack trailing checksum mismatch".to_owned(),
164        ));
165    }
166
167    // Resolve pending deltas iteratively.  Each pass resolves all deltas whose
168    // base is now known; repeat until none remain or we stall (corrupt pack).
169    let mut remaining = pending;
170    loop {
171        if remaining.is_empty() {
172            break;
173        }
174        let before = remaining.len();
175        let mut still_pending: Vec<PendingDelta> = Vec::new();
176
177        for delta in remaining {
178            let base_res: Option<Result<(ObjectKind, Cow<'_, [u8]>)>> =
179                if let Some(base_off) = delta.base_offset {
180                    by_offset
181                        .get(&base_off)
182                        .map(|e| entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
183                } else if let Some(ref base_id) = delta.base_oid {
184                    if let Some(e) = by_oid.get(base_id) {
185                        Some(entry_object_bytes(e, odb).map(|d| (e.kind(), d)))
186                    } else if !opts.dry_run {
187                        odb.read(base_id)
188                            .ok()
189                            .map(|obj| Ok((obj.kind, Cow::Owned(obj.data))))
190                    } else {
191                        None
192                    }
193                } else {
194                    None
195                };
196
197            match base_res {
198                Some(Ok((base_kind, base_data))) => {
199                    let result = apply_delta(base_data.as_ref(), &delta.delta_data)?;
200                    let oid = write_or_hash(base_kind, &result, odb, opts.dry_run)?;
201                    let new_entry = packed_entry_after_write(
202                        base_kind,
203                        result,
204                        oid,
205                        odb,
206                        opts,
207                        MAX_RETAIN_BYTES,
208                    );
209                    by_offset.insert(delta.offset, new_entry.clone());
210                    by_oid.insert(oid, new_entry);
211                    count += 1;
212                }
213                Some(Err(e)) => return Err(e),
214                None => still_pending.push(delta),
215            }
216        }
217
218        remaining = still_pending;
219        if remaining.len() == before {
220            return Err(Error::CorruptObject(format!(
221                "{} delta(s) could not be resolved",
222                remaining.len()
223            )));
224        }
225    }
226
227    if opts.strict {
228        let mut dot_fsck_map: HashMap<ObjectId, (ObjectKind, Vec<u8>)> =
229            HashMap::with_capacity(by_oid.len());
230        for (oid, entry) in &by_oid {
231            let kind = entry.kind();
232            let data = match entry {
233                PackedObjectEntry::InMemory { data, .. } => data.clone(),
234                PackedObjectEntry::BlobOnDisk { oid: blob_oid } => odb.read(blob_oid)?.data,
235            };
236            dot_fsck_map.insert(*oid, (kind, data));
237        }
238        gitmodules::verify_packed_dot_special(&dot_fsck_map)?;
239        strict_verify_packed_references_map(
240            Some(odb),
241            &by_oid,
242            &opts.allowed_missing,
243            opts.allow_promisor_missing_references,
244            &opts.shallow_boundaries,
245        )?;
246    }
247
248    Ok(count)
249}
250
251/// Resolved non-delta object: either full bytes in memory or a large blob on disk.
252#[derive(Debug, Clone)]
253enum PackedObjectEntry {
254    InMemory { kind: ObjectKind, data: Vec<u8> },
255    BlobOnDisk { oid: ObjectId },
256}
257
258impl PackedObjectEntry {
259    fn kind(&self) -> ObjectKind {
260        match self {
261            PackedObjectEntry::InMemory { kind, .. } => *kind,
262            PackedObjectEntry::BlobOnDisk { .. } => ObjectKind::Blob,
263        }
264    }
265}
266
267fn packed_entry_after_write(
268    kind: ObjectKind,
269    data: Vec<u8>,
270    oid: ObjectId,
271    _odb: &Odb,
272    opts: &UnpackOptions,
273    max_retain: usize,
274) -> PackedObjectEntry {
275    if !opts.dry_run && kind == ObjectKind::Blob && data.len() > max_retain {
276        PackedObjectEntry::BlobOnDisk { oid }
277    } else {
278        PackedObjectEntry::InMemory { kind, data }
279    }
280}
281
282fn entry_object_bytes<'a>(entry: &'a PackedObjectEntry, odb: &Odb) -> Result<Cow<'a, [u8]>> {
283    match entry {
284        PackedObjectEntry::InMemory { data, .. } => Ok(Cow::Borrowed(data.as_slice())),
285        PackedObjectEntry::BlobOnDisk { oid } => Ok(Cow::Owned(odb.read(oid)?.data)),
286    }
287}
288
289fn strict_verify_packed_references_map(
290    odb: Option<&Odb>,
291    pack: &HashMap<ObjectId, PackedObjectEntry>,
292    allowed_missing: &HashSet<ObjectId>,
293    allow_promisor_missing_references: bool,
294    shallow_boundaries: &HashSet<ObjectId>,
295) -> Result<()> {
296    for (oid, entry) in pack {
297        match entry {
298            PackedObjectEntry::BlobOnDisk { .. } => {}
299            PackedObjectEntry::InMemory { kind, data } => match kind {
300                ObjectKind::Tree => {
301                    for e in parse_tree(data)? {
302                        // Gitlink (submodule) entries point at commits that live
303                        // in the submodule repository, not the superproject's
304                        // pack/ODB. Skip them in the connectivity walk, matching
305                        // upstream git (git/fsck.c:374 `if (S_ISGITLINK) continue;`).
306                        if e.mode == MODE_GITLINK {
307                            continue;
308                        }
309                        if !strict_ref_resolves_map(
310                            &e.oid,
311                            pack,
312                            odb,
313                            allowed_missing,
314                            allow_promisor_missing_references,
315                        ) {
316                            return Err(Error::CorruptObject(format!(
317                                "strict: missing object {} referenced by tree",
318                                e.oid.to_hex()
319                            )));
320                        }
321                    }
322                }
323                ObjectKind::Commit => {
324                    let c = parse_commit(data)?;
325                    if !strict_ref_resolves_map(
326                        &c.tree,
327                        pack,
328                        odb,
329                        allowed_missing,
330                        allow_promisor_missing_references,
331                    ) {
332                        return Err(Error::CorruptObject(format!(
333                            "strict: missing tree {} referenced by commit",
334                            c.tree.to_hex()
335                        )));
336                    }
337                    // A commit recorded as a shallow boundary (graft) has its parents intentionally
338                    // absent — skip parent connectivity for it, matching unpack-objects run with a
339                    // `--shallow-file` listing this commit.
340                    if shallow_boundaries.contains(oid) {
341                        continue;
342                    }
343                    for p in &c.parents {
344                        if !strict_ref_resolves_map(
345                            p,
346                            pack,
347                            odb,
348                            allowed_missing,
349                            allow_promisor_missing_references,
350                        ) {
351                            return Err(Error::CorruptObject(format!(
352                                "strict: missing parent {} referenced by commit",
353                                p.to_hex()
354                            )));
355                        }
356                    }
357                }
358                ObjectKind::Tag => {
359                    let t = parse_tag(data)?;
360                    if !strict_ref_resolves_map(
361                        &t.object,
362                        pack,
363                        odb,
364                        allowed_missing,
365                        allow_promisor_missing_references,
366                    ) {
367                        return Err(Error::CorruptObject(format!(
368                            "strict: missing object {} referenced by tag",
369                            t.object.to_hex()
370                        )));
371                    }
372                }
373                ObjectKind::Blob => {}
374            },
375        }
376    }
377    Ok(())
378}
379
380fn strict_ref_resolves_map(
381    oid: &ObjectId,
382    pack: &HashMap<ObjectId, PackedObjectEntry>,
383    odb: Option<&Odb>,
384    allowed_missing: &HashSet<ObjectId>,
385    allow_promisor_missing_references: bool,
386) -> bool {
387    pack.contains_key(oid)
388        || allowed_missing.contains(oid)
389        || odb.is_some_and(|o| o.exists(oid))
390        || allow_promisor_missing_references
391}
392
393fn strict_ref_resolves(
394    oid: &ObjectId,
395    pack: &std::collections::HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
396    odb: Option<&Odb>,
397) -> bool {
398    pack.contains_key(oid) || odb.is_some_and(|o| o.exists(oid))
399}
400
401/// Verifies that references from commits, trees, and tags resolve to objects present in `pack`
402/// or, when `odb` is [`Some`], to loose objects in that database.
403///
404/// Use [`None`] for `odb` when indexing or unpacking in a context with no repository (Git allows
405/// `index-pack --strict` outside a work tree when the pack is self-contained).
406pub fn strict_verify_packed_references(
407    odb: Option<&Odb>,
408    pack: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
409) -> Result<()> {
410    for (kind, data) in pack.values() {
411        match kind {
412            ObjectKind::Tree => {
413                for e in parse_tree(data)? {
414                    // Gitlink (submodule) entries point at commits that live in
415                    // the submodule repository, not this pack/ODB. Skip them in
416                    // the connectivity walk, matching upstream git
417                    // (git/fsck.c:374 `if (S_ISGITLINK) continue;`).
418                    if e.mode == MODE_GITLINK {
419                        continue;
420                    }
421                    if !strict_ref_resolves(&e.oid, pack, odb) {
422                        return Err(Error::CorruptObject(format!(
423                            "strict: missing object {} referenced by tree",
424                            e.oid.to_hex()
425                        )));
426                    }
427                }
428            }
429            ObjectKind::Commit => {
430                let c = parse_commit(data)?;
431                if !strict_ref_resolves(&c.tree, pack, odb) {
432                    return Err(Error::CorruptObject(format!(
433                        "strict: missing tree {} referenced by commit",
434                        c.tree.to_hex()
435                    )));
436                }
437                for p in &c.parents {
438                    if !strict_ref_resolves(p, pack, odb) {
439                        return Err(Error::CorruptObject(format!(
440                            "strict: missing parent {} referenced by commit",
441                            p.to_hex()
442                        )));
443                    }
444                }
445            }
446            ObjectKind::Tag => {
447                let t = parse_tag(data)?;
448                if !strict_ref_resolves(&t.object, pack, odb) {
449                    return Err(Error::CorruptObject(format!(
450                        "strict: missing object {} referenced by tag",
451                        t.object.to_hex()
452                    )));
453                }
454            }
455            ObjectKind::Blob => {}
456        }
457    }
458    Ok(())
459}
460
461/// Whether `data` is a *thin* pack — i.e. it contains a `ref-delta` (type 7) whose base object is
462/// not itself present in the pack. `git pack-objects --thin` produces such packs; a receiver that
463/// rejects thin packs (`receive-pack --reject-thin-pack-for-testing`) uses this to refuse them.
464///
465/// Conservative: any parse error makes this return `false` (treat as non-thin) so a malformed pack
466/// is handled by the normal ingestion path rather than mislabeled.
467pub fn pack_is_thin(data: &[u8]) -> bool {
468    pack_is_thin_inner(data).unwrap_or(false)
469}
470
471fn pack_is_thin_inner(data: &[u8]) -> Result<bool> {
472    let mut rd = PackReader::new(data.to_vec());
473    if rd.read_exact(4)? != b"PACK" {
474        return Ok(false);
475    }
476    let _version = rd.read_u32_be()?;
477    let nr_objects = rd.read_u32_be()? as usize;
478
479    let mut in_pack: HashSet<ObjectId> = HashSet::new();
480    let mut ref_delta_bases: Vec<ObjectId> = Vec::new();
481    for _ in 0..nr_objects {
482        let obj_offset = rd.pos;
483        let (type_code, size) = rd.read_type_size()?;
484        match type_code {
485            1..=4 => {
486                let kind = type_code_to_kind(type_code)?;
487                let obj_data = rd.decompress(size)?;
488                in_pack.insert(Odb::hash_object_data(kind, &obj_data));
489            }
490            6 => {
491                // ofs-delta: base is always in-pack (referenced by relative offset).
492                let _neg = rd.read_ofs_neg_offset()?;
493                let _ = obj_offset;
494                let _ = rd.decompress(size)?;
495            }
496            7 => {
497                let base_bytes = rd.read_exact(20)?;
498                ref_delta_bases.push(ObjectId::from_bytes(base_bytes)?);
499                let _ = rd.decompress(size)?;
500            }
501            _ => return Ok(false),
502        }
503    }
504    // Thin iff any ref-delta points at a base that is not packed alongside it.
505    Ok(ref_delta_bases.iter().any(|b| !in_pack.contains(b)))
506}
507
508/// Parse a pack byte stream and return every resolved object (after delta resolution) keyed by OID.
509///
510/// Does not write to any object database. Used for receive-pack connectivity checks before
511/// applying a push to the permanent ODB.
512///
513/// Thin-pack bases may be resolved from `odb` when they are not present in the pack.
514pub fn pack_bytes_to_object_map(data: &[u8], odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
515    let rd = PackReader::new(data.to_vec());
516    build_pack_object_map(rd, odb)
517}
518
519fn build_pack_object_map(mut rd: PackReader, odb: &Odb) -> Result<HashMap<ObjectId, Object>> {
520    let sig = rd.read_exact(4)?;
521    if sig != b"PACK" {
522        return Err(Error::CorruptObject(
523            "not a pack stream: invalid signature".to_owned(),
524        ));
525    }
526    let version = rd.read_u32_be()?;
527    if version != 2 && version != 3 {
528        return Err(Error::CorruptObject(format!(
529            "unsupported pack version {version}"
530        )));
531    }
532    let nr_objects = rd.read_u32_be()? as usize;
533
534    let mut by_offset: HashMap<usize, (ObjectKind, Vec<u8>)> = HashMap::new();
535    let mut by_oid: HashMap<ObjectId, (ObjectKind, Vec<u8>)> = HashMap::new();
536    let mut pending: Vec<PendingDelta> = Vec::new();
537
538    fn base_from_pack_or_odb(
539        by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>,
540        odb: &Odb,
541        id: &ObjectId,
542    ) -> Option<(ObjectKind, Vec<u8>)> {
543        if let Some(e) = by_oid.get(id) {
544            return Some(e.clone());
545        }
546        odb.read(id).ok().map(|o| (o.kind, o.data))
547    }
548
549    for _ in 0..nr_objects {
550        let obj_offset = rd.pos;
551        let (type_code, size) = rd.read_type_size()?;
552
553        match type_code {
554            1..=4 => {
555                let kind = type_code_to_kind(type_code)?;
556                let data = rd.decompress(size)?;
557                let oid = Odb::hash_object_data(kind, &data);
558                by_offset.insert(obj_offset, (kind, data.clone()));
559                by_oid.insert(oid, (kind, data));
560            }
561            6 => {
562                let neg = rd.read_ofs_neg_offset()?;
563                let base_offset = obj_offset.checked_sub(neg).ok_or_else(|| {
564                    Error::CorruptObject("ofs-delta base offset underflow".to_owned())
565                })?;
566                let delta_data = rd.decompress(size)?;
567                pending.push(PendingDelta {
568                    offset: obj_offset,
569                    base_oid: None,
570                    base_offset: Some(base_offset),
571                    delta_data,
572                });
573            }
574            7 => {
575                let base_bytes = rd.read_exact(20)?;
576                let base_oid = ObjectId::from_bytes(base_bytes)?;
577                let delta_data = rd.decompress(size)?;
578                pending.push(PendingDelta {
579                    offset: obj_offset,
580                    base_oid: Some(base_oid),
581                    base_offset: None,
582                    delta_data,
583                });
584            }
585            other => {
586                return Err(Error::CorruptObject(format!(
587                    "unknown packed-object type {other}"
588                )))
589            }
590        }
591    }
592
593    let consumed = rd.pos;
594    {
595        let mut hasher = Sha1::new();
596        hasher.update(&rd.data[..consumed]);
597        let digest = hasher.finalize();
598        let trailing = rd.read_exact(20)?;
599        if digest.as_slice() != trailing {
600            return Err(Error::CorruptObject(
601                "pack trailing checksum mismatch".to_owned(),
602            ));
603        }
604    }
605
606    let mut remaining = pending;
607    loop {
608        if remaining.is_empty() {
609            break;
610        }
611        let before = remaining.len();
612        let mut still_pending: Vec<PendingDelta> = Vec::new();
613
614        for delta in remaining {
615            let base = if let Some(base_off) = delta.base_offset {
616                by_offset.get(&base_off).cloned()
617            } else if let Some(ref base_id) = delta.base_oid {
618                base_from_pack_or_odb(&by_oid, odb, base_id)
619            } else {
620                None
621            };
622
623            if let Some((base_kind, base_data)) = base {
624                let result = apply_delta(&base_data, &delta.delta_data)?;
625                let oid = Odb::hash_object_data(base_kind, &result);
626                by_offset.insert(delta.offset, (base_kind, result.clone()));
627                by_oid.insert(oid, (base_kind, result));
628            } else {
629                still_pending.push(delta);
630            }
631        }
632
633        remaining = still_pending;
634        if remaining.len() == before {
635            return Err(Error::CorruptObject(format!(
636                "{} delta(s) could not be resolved",
637                remaining.len()
638            )));
639        }
640    }
641
642    Ok(by_oid
643        .into_iter()
644        .map(|(oid, (kind, data))| (oid, Object::new(kind, data)))
645        .collect())
646}
647
648/// Either write `data` as a loose object (if `!dry_run`) or just compute its
649/// [`ObjectId`] without touching the filesystem.
650fn write_or_hash(kind: ObjectKind, data: &[u8], odb: &Odb, dry_run: bool) -> Result<ObjectId> {
651    if dry_run {
652        Ok(Odb::hash_object_data(kind, data))
653    } else {
654        // Always materialize into this ODB: objects reachable only via alternates must still be
655        // written locally (matches git unpack-objects; t5519-push-alternates).
656        odb.write_local(kind, data)
657    }
658}
659
660/// Convert a pack object type code to an [`ObjectKind`].
661fn type_code_to_kind(code: u8) -> Result<ObjectKind> {
662    match code {
663        1 => Ok(ObjectKind::Commit),
664        2 => Ok(ObjectKind::Tree),
665        3 => Ok(ObjectKind::Blob),
666        4 => Ok(ObjectKind::Tag),
667        _ => Err(Error::CorruptObject(format!(
668            "type code {code} is not a regular object type"
669        ))),
670    }
671}
672
673/// Low-level cursor over a buffered pack byte stream (in-memory pack parsing).
674struct PackReader {
675    data: Vec<u8>,
676    pos: usize,
677}
678
679impl PackReader {
680    fn new(data: Vec<u8>) -> Self {
681        Self { data, pos: 0 }
682    }
683
684    /// Read exactly `n` bytes and advance the cursor, returning a slice into
685    /// the internal buffer.
686    fn read_exact(&mut self, n: usize) -> Result<&[u8]> {
687        if self.pos + n > self.data.len() {
688            return Err(Error::CorruptObject(format!(
689                "pack stream truncated: need {n} bytes at offset {}",
690                self.pos
691            )));
692        }
693        let slice = &self.data[self.pos..self.pos + n];
694        self.pos += n;
695        Ok(slice)
696    }
697
698    /// Read a single byte and advance the cursor.
699    fn read_byte(&mut self) -> Result<u8> {
700        if self.pos >= self.data.len() {
701            return Err(Error::CorruptObject(
702                "unexpected end of pack stream".to_owned(),
703            ));
704        }
705        let b = self.data[self.pos];
706        self.pos += 1;
707        Ok(b)
708    }
709
710    /// Read a big-endian `u32`.
711    fn read_u32_be(&mut self) -> Result<u32> {
712        let bytes = self.read_exact(4)?;
713        Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
714            Error::CorruptObject("u32 read failed".to_owned())
715        })?))
716    }
717
718    /// Read the packed-object type + size header (variable-length big-endian
719    /// encoding with the type in bits 4-6 of the first byte).
720    ///
721    /// Returns `(type_code, uncompressed_size)`.
722    fn read_type_size(&mut self) -> Result<(u8, usize)> {
723        let c = self.read_byte()?;
724        let type_code = (c >> 4) & 0x7;
725        let mut size = (c & 0x0f) as usize;
726        let mut shift = 4u32;
727        let mut cur = c;
728        while cur & 0x80 != 0 {
729            cur = self.read_byte()?;
730            size |= ((cur & 0x7f) as usize) << shift;
731            shift += 7;
732        }
733        Ok((type_code, size))
734    }
735
736    /// Read an `OFS_DELTA` negative-offset value.
737    ///
738    /// The encoding uses a big-endian variable-length integer with a +1 bias
739    /// on each continuation byte, yielding values ≥ 1.
740    fn read_ofs_neg_offset(&mut self) -> Result<usize> {
741        let mut c = self.read_byte()?;
742        let mut value = (c & 0x7f) as usize;
743        while c & 0x80 != 0 {
744            c = self.read_byte()?;
745            value = (value + 1) << 7 | (c & 0x7f) as usize;
746        }
747        Ok(value)
748    }
749
750    /// Decompress zlib-compressed data starting at the current cursor position.
751    ///
752    /// Advances the cursor by exactly the number of compressed bytes consumed.
753    /// Returns an error if the decompressed length differs from `expected_size`.
754    fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
755        let slice = &self.data[self.pos..];
756        let mut decoder = ZlibDecoder::new(slice);
757        let mut out = Vec::with_capacity(expected_size);
758        decoder
759            .read_to_end(&mut out)
760            .map_err(|e| Error::Zlib(e.to_string()))?;
761        if out.len() != expected_size {
762            return Err(Error::CorruptObject(format!(
763                "decompressed {} bytes but expected {}",
764                out.len(),
765                expected_size
766            )));
767        }
768        self.pos += decoder.total_in() as usize;
769        Ok(out)
770    }
771}
772
773fn io_to_corrupt_eof(e: io::Error, stream_pos: usize, context: &str) -> Error {
774    if e.kind() == io::ErrorKind::UnexpectedEof {
775        Error::CorruptObject(format!(
776            "pack stream truncated ({context}) at offset {stream_pos}"
777        ))
778    } else {
779        Error::Io(e)
780    }
781}
782
783/// Streaming cursor over a pack file: hashes body bytes incrementally (no full-buffer read).
784///
785/// Raw pack bytes are either consumed as object headers (via [`Self::read_byte`]) or as zlib
786/// payloads.  Zlib decoders may read ahead; overflow bytes stay in [`Self::pending`] so the next
787/// object header or zlib stream starts at the correct offset.
788struct StreamingPackReader<'a> {
789    inner: &'a mut dyn Read,
790    pack_hasher: Sha1,
791    stream_pos: usize,
792    max_input_bytes: Option<u64>,
793    /// Compressed (or other) bytes already read from `inner` and hashed but not yet consumed by
794    /// the current parsing step.
795    pending: Vec<u8>,
796}
797
798impl<'a> StreamingPackReader<'a> {
799    fn new(inner: &'a mut dyn Read, max_input_bytes: Option<u64>) -> Self {
800        Self {
801            inner,
802            pack_hasher: Sha1::new(),
803            stream_pos: 0,
804            max_input_bytes,
805            pending: Vec::new(),
806        }
807    }
808
809    fn stream_pos(&self) -> usize {
810        self.stream_pos
811    }
812
813    fn enforce_max_input(&self) -> Result<()> {
814        if let Some(limit) = self.max_input_bytes {
815            let pos = u64::try_from(self.stream_pos)
816                .map_err(|_| Error::CorruptObject("pack stream position overflow".to_owned()))?;
817            if pos > limit {
818                return Err(Error::CorruptObject(
819                    "pack exceeds maximum allowed size".to_owned(),
820                ));
821            }
822        }
823        Ok(())
824    }
825
826    /// Read pack-body bytes (hashed). Used for headers and non-zlib payload reads only.
827    fn read_from_source(&mut self, buf: &mut [u8]) -> Result<usize> {
828        let n = if !self.pending.is_empty() {
829            let take = buf.len().min(self.pending.len());
830            buf[..take].copy_from_slice(&self.pending[..take]);
831            self.pending.drain(..take);
832            take
833        } else {
834            self.inner.read(buf).map_err(Error::Io)?
835        };
836        if n > 0 {
837            self.pack_hasher.update(&buf[..n]);
838            self.stream_pos += n;
839            self.enforce_max_input()?;
840        }
841        Ok(n)
842    }
843
844    fn read_byte(&mut self) -> Result<u8> {
845        let mut b = [0u8; 1];
846        let n = self.read_from_source(&mut b)?;
847        if n == 0 {
848            return Err(Error::CorruptObject(format!(
849                "pack stream truncated (read byte) at offset {}",
850                self.stream_pos
851            )));
852        }
853        Ok(b[0])
854    }
855
856    fn read_exact_n(&mut self, n: usize) -> Result<Vec<u8>> {
857        let mut v = vec![0u8; n];
858        let mut got = 0usize;
859        while got < n {
860            let m = self.read_from_source(&mut v[got..n])?;
861            if m == 0 {
862                return Err(Error::CorruptObject(format!(
863                    "pack stream truncated (read exact) at offset {}",
864                    self.stream_pos
865                )));
866            }
867            got += m;
868        }
869        Ok(v)
870    }
871
872    fn read_u32_be(&mut self) -> Result<u32> {
873        let mut b = [0u8; 4];
874        let mut got = 0usize;
875        while got < 4 {
876            let m = self.read_from_source(&mut b[got..4])?;
877            if m == 0 {
878                return Err(Error::CorruptObject(format!(
879                    "pack stream truncated (read u32) at offset {}",
880                    self.stream_pos
881                )));
882            }
883            got += m;
884        }
885        Ok(u32::from_be_bytes(b))
886    }
887
888    fn read_type_size(&mut self) -> Result<(u8, usize)> {
889        let c = self.read_byte()?;
890        let type_code = (c >> 4) & 0x7;
891        let mut size = (c & 0x0f) as usize;
892        let mut shift = 4u32;
893        let mut cur = c;
894        while cur & 0x80 != 0 {
895            cur = self.read_byte()?;
896            size |= ((cur & 0x7f) as usize) << shift;
897            shift += 7;
898        }
899        Ok((type_code, size))
900    }
901
902    fn read_ofs_neg_offset(&mut self) -> Result<usize> {
903        let mut c = self.read_byte()?;
904        let mut value = (c & 0x7f) as usize;
905        while c & 0x80 != 0 {
906            c = self.read_byte()?;
907            value = (value + 1) << 7 | (c & 0x7f) as usize;
908        }
909        Ok(value)
910    }
911
912    /// Pull zlib-compressed bytes until one object inflates to `expected_size` bytes.
913    ///
914    /// Bytes read from `inner` into `pending` are not hashed until we know how many belong to the
915    /// zlib stream (`total_in()`). Lookahead past the zlib end (including the 20-byte pack
916    /// trailer) must never be fed to the pack checksum.
917    ///
918    /// When the pack arrives in small chunks (e.g. side-band-64k from `upload-pack`), `flate2` may
919    /// return an error before the full deflate stream is in `pending`. Retry after reading more
920    /// from `inner` (same idea as [`PackReader::decompress`], which sees the whole zlib at once).
921    fn decompress(&mut self, expected_size: usize) -> Result<Vec<u8>> {
922        // `Read::read_exact` into an empty buffer returns `Ok` immediately without touching the
923        // decoder, so a 0-byte packed object would leave the zlib header in `pending` and desync
924        // the pack stream (bundle / clone unpack). Always run the zlib decoder once.
925        if expected_size == 0 {
926            const CHUNK: usize = 64 * 1024;
927            let mut scratch = [0u8; CHUNK];
928            loop {
929                let mut cursor = std::io::Cursor::new(self.pending.as_slice());
930                let mut z = ZlibDecoder::new(&mut cursor);
931                let mut sink = [0u8; 1];
932                match z.read(&mut sink) {
933                    Ok(0) => {
934                        let consumed = z.total_in() as usize;
935                        if consumed > self.pending.len() {
936                            return Err(Error::CorruptObject(
937                                "zlib total_in exceeds pending buffer".to_owned(),
938                            ));
939                        }
940                        if consumed == 0 {
941                            let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
942                            if n == 0 {
943                                return Err(Error::CorruptObject(format!(
944                                    "pack stream truncated (zlib) at offset {}",
945                                    self.stream_pos
946                                )));
947                            }
948                            self.pending.extend_from_slice(&scratch[..n]);
949                            continue;
950                        }
951                        self.pack_hasher.update(&self.pending[..consumed]);
952                        self.stream_pos += consumed;
953                        self.pending.drain(..consumed);
954                        self.enforce_max_input()?;
955                        return Ok(Vec::new());
956                    }
957                    Ok(_) => {
958                        return Err(Error::CorruptObject(
959                            "0-byte packed object inflated to non-empty output".to_owned(),
960                        ));
961                    }
962                    Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
963                        let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
964                        if n == 0 {
965                            return Err(Error::CorruptObject(format!(
966                                "pack stream truncated (zlib) at offset {}",
967                                self.stream_pos
968                            )));
969                        }
970                        self.pending.extend_from_slice(&scratch[..n]);
971                    }
972                    Err(e) => return Err(Error::Zlib(e.to_string())),
973                }
974            }
975        }
976
977        const CHUNK: usize = 64 * 1024;
978        let mut scratch = [0u8; CHUNK];
979
980        let mut out = vec![0u8; expected_size];
981        let mut z = Decompress::new(true);
982        let mut out_pos = 0usize;
983        let mut eof = false;
984        loop {
985            if self.pending.is_empty() && !eof {
986                let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
987                if n == 0 {
988                    eof = true;
989                } else {
990                    self.pending.extend_from_slice(&scratch[..n]);
991                }
992            }
993
994            let flush = if eof && self.pending.is_empty() {
995                FlushDecompress::Finish
996            } else {
997                FlushDecompress::None
998            };
999
1000            let before_in = z.total_in();
1001            let before_out = z.total_out();
1002            let status = z
1003                .decompress(self.pending.as_slice(), &mut out[out_pos..], flush)
1004                .map_err(|e| Error::Zlib(e.to_string()))?;
1005            let consumed = (z.total_in() - before_in) as usize;
1006            if consumed > self.pending.len() {
1007                return Err(Error::CorruptObject(
1008                    "zlib consumed more than pending buffer".to_owned(),
1009                ));
1010            }
1011            self.pack_hasher.update(&self.pending[..consumed]);
1012            self.stream_pos += consumed;
1013            self.pending.drain(..consumed);
1014            self.enforce_max_input()?;
1015            out_pos += (z.total_out() - before_out) as usize;
1016
1017            match status {
1018                Status::StreamEnd => {
1019                    if out_pos != expected_size {
1020                        return Err(Error::CorruptObject(format!(
1021                            "decompressed size mismatch: got {out_pos}, want {expected_size}"
1022                        )));
1023                    }
1024                    return Ok(out);
1025                }
1026                Status::Ok | Status::BufError => {
1027                    if consumed == 0 && !eof {
1028                        let n = self.inner.read(&mut scratch).map_err(Error::Io)?;
1029                        if n == 0 {
1030                            eof = true;
1031                        } else {
1032                            self.pending.extend_from_slice(&scratch[..n]);
1033                        }
1034                    } else if eof && self.pending.is_empty() && out_pos != expected_size {
1035                        return Err(Error::CorruptObject(format!(
1036                            "pack stream truncated (zlib) at offset {}",
1037                            self.stream_pos
1038                        )));
1039                    }
1040                }
1041            }
1042        }
1043    }
1044
1045    /// SHA-1 over all pack bytes read so far (objects only; trailer not yet read).
1046    fn finalize_hasher(
1047        &self,
1048    ) -> sha1::digest::generic_array::GenericArray<u8, sha1::digest::consts::U20> {
1049        self.pack_hasher.clone().finalize()
1050    }
1051
1052    /// Trailing pack checksum; not included in [`Self::finalize_hasher`].
1053    fn read_trailer_20(&mut self) -> Result<[u8; 20]> {
1054        let mut b = [0u8; 20];
1055        if self.pending.len() >= 20 {
1056            b.copy_from_slice(&self.pending[..20]);
1057            self.pending.drain(..20);
1058            self.stream_pos += 20;
1059            self.enforce_max_input()?;
1060            return Ok(b);
1061        }
1062        let tail = self.pending.len();
1063        if tail > 0 {
1064            b[..tail].copy_from_slice(&self.pending[..]);
1065            self.pending.clear();
1066        }
1067        self.inner
1068            .read_exact(&mut b[tail..])
1069            .map_err(|e| io_to_corrupt_eof(e, self.stream_pos, "trailer"))?;
1070        self.stream_pos += 20;
1071        self.enforce_max_input()?;
1072        Ok(b)
1073    }
1074}
1075
1076/// Apply a git "patch delta" to `base`, producing the patched result.
1077///
1078/// The delta binary format is:
1079/// 1. Source size: variable-length little-endian integer (must equal
1080///    `base.len()`).
1081/// 2. Destination size: variable-length little-endian integer.
1082/// 3. A sequence of COPY (MSB set) and INSERT (MSB clear) instructions.
1083///
1084/// # Errors
1085///
1086/// Returns [`Error::CorruptObject`] if the delta is malformed, the source-size
1087/// field does not match `base.len()`, or the result length does not match the
1088/// declared destination size.
1089pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
1090    let mut pos = 0usize;
1091
1092    let src_size = read_delta_varint(delta, &mut pos)?;
1093    if src_size != base.len() {
1094        return Err(Error::CorruptObject(format!(
1095            "delta source size {src_size} != base size {}",
1096            base.len()
1097        )));
1098    }
1099    let dest_size = read_delta_varint(delta, &mut pos)?;
1100    let mut result = Vec::with_capacity(dest_size);
1101
1102    while pos < delta.len() {
1103        let cmd = delta[pos];
1104        pos += 1;
1105        if cmd == 0 {
1106            return Err(Error::CorruptObject(
1107                "reserved opcode 0 in delta stream".to_owned(),
1108            ));
1109        }
1110        if cmd & 0x80 != 0 {
1111            // COPY instruction: up to 4 offset bytes (bits 0-3) and up to 3
1112            // size bytes (bits 4-6) are present, each controlled by a flag bit.
1113            let mut offset = 0usize;
1114            let mut size = 0usize;
1115
1116            macro_rules! maybe_read_byte {
1117                ($flag:expr, $shift:expr, $target:expr) => {
1118                    if cmd & $flag != 0 {
1119                        let b = *delta.get(pos).ok_or_else(|| {
1120                            Error::CorruptObject("truncated delta COPY operand".to_owned())
1121                        })?;
1122                        pos += 1;
1123                        $target |= (b as usize) << $shift;
1124                    }
1125                };
1126            }
1127
1128            maybe_read_byte!(0x01, 0, offset);
1129            maybe_read_byte!(0x02, 8, offset);
1130            maybe_read_byte!(0x04, 16, offset);
1131            maybe_read_byte!(0x08, 24, offset);
1132            maybe_read_byte!(0x10, 0, size);
1133            maybe_read_byte!(0x20, 8, size);
1134            maybe_read_byte!(0x40, 16, size);
1135
1136            if size == 0 {
1137                size = 0x10000;
1138            }
1139
1140            let end = offset.checked_add(size).ok_or_else(|| {
1141                Error::CorruptObject("delta COPY range overflows usize".to_owned())
1142            })?;
1143            let chunk = base.get(offset..end).ok_or_else(|| {
1144                Error::CorruptObject(format!(
1145                    "delta COPY [{offset},{end}) out of range (base is {} bytes)",
1146                    base.len()
1147                ))
1148            })?;
1149            result.extend_from_slice(chunk);
1150        } else {
1151            // INSERT instruction: copy the next `cmd` literal bytes verbatim.
1152            let n = cmd as usize;
1153            let chunk = delta
1154                .get(pos..pos + n)
1155                .ok_or_else(|| Error::CorruptObject("truncated delta INSERT data".to_owned()))?;
1156            result.extend_from_slice(chunk);
1157            pos += n;
1158        }
1159    }
1160
1161    if result.len() != dest_size {
1162        return Err(Error::CorruptObject(format!(
1163            "delta produced {} bytes but expected {dest_size}",
1164            result.len()
1165        )));
1166    }
1167
1168    Ok(result)
1169}
1170
1171/// Read a variable-length little-endian integer from `data` starting at `*pos`.
1172///
1173/// Advances `*pos` past the consumed bytes.
1174fn read_delta_varint(data: &[u8], pos: &mut usize) -> Result<usize> {
1175    let mut value = 0usize;
1176    let mut shift = 0u32;
1177    loop {
1178        let b = *data
1179            .get(*pos)
1180            .ok_or_else(|| Error::CorruptObject("truncated delta varint".to_owned()))?;
1181        *pos += 1;
1182        value |= ((b & 0x7f) as usize) << shift;
1183        shift += 7;
1184        if b & 0x80 == 0 {
1185            break;
1186        }
1187    }
1188    Ok(value)
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193    use super::*;
1194
1195    // Helper: build a minimal pack from a list of (kind, data) pairs.
1196    // Returns the raw pack bytes.
1197    fn make_pack(objects: &[(ObjectKind, &[u8])]) -> Vec<u8> {
1198        use flate2::write::ZlibEncoder;
1199        use std::io::Write;
1200
1201        let mut entries: Vec<Vec<u8>> = Vec::new();
1202        for (kind, data) in objects {
1203            let type_code: u8 = match kind {
1204                ObjectKind::Commit => 1,
1205                ObjectKind::Tree => 2,
1206                ObjectKind::Blob => 3,
1207                ObjectKind::Tag => 4,
1208            };
1209            // Encode type+size header.
1210            let mut header = Vec::new();
1211            let mut size = data.len();
1212            let first = ((type_code & 0x7) << 4) | (size & 0x0f) as u8;
1213            size >>= 4;
1214            if size > 0 {
1215                header.push(first | 0x80);
1216                while size > 0 {
1217                    let b = (size & 0x7f) as u8;
1218                    size >>= 7;
1219                    header.push(if size > 0 { b | 0x80 } else { b });
1220                }
1221            } else {
1222                header.push(first);
1223            }
1224            // zlib-compress data.
1225            let mut enc = ZlibEncoder::new(Vec::new(), flate2::Compression::default());
1226            enc.write_all(data).unwrap();
1227            let compressed = enc.finish().unwrap();
1228            let mut entry = header;
1229            entry.extend_from_slice(&compressed);
1230            entries.push(entry);
1231        }
1232
1233        // Assemble: PACK + version(2) + count + entries + SHA-1.
1234        let mut pack = Vec::new();
1235        pack.extend_from_slice(b"PACK");
1236        pack.extend_from_slice(&2u32.to_be_bytes());
1237        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1238        for entry in &entries {
1239            pack.extend_from_slice(entry);
1240        }
1241        let mut hasher = Sha1::new();
1242        hasher.update(&pack);
1243        let digest = hasher.finalize();
1244        pack.extend_from_slice(digest.as_slice());
1245        pack
1246    }
1247
1248    #[test]
1249    fn test_apply_delta_simple() {
1250        // Build a trivial delta: insert "hello world".
1251        let base = b"hello";
1252        let mut delta = Vec::new();
1253        // src_size = 5
1254        delta.push(5u8);
1255        // dest_size = 11
1256        delta.push(11u8);
1257        // COPY instruction: copy base[0..5]
1258        // cmd = 0x80 | 0x01 (offset present, byte 0) | 0x10 (size byte 0)
1259        delta.push(0x80 | 0x01 | 0x10); // 0x91
1260        delta.push(0u8); // offset = 0
1261        delta.push(5u8); // size = 5
1262                         // INSERT " world" (6 bytes)
1263        delta.push(6u8);
1264        delta.extend_from_slice(b" world");
1265
1266        let result = apply_delta(base, &delta).unwrap();
1267        assert_eq!(result, b"hello world");
1268    }
1269
1270    #[test]
1271    fn test_apply_delta_insert_only() {
1272        let base = b"";
1273        let mut delta = Vec::new();
1274        delta.push(0u8); // src_size = 0
1275        delta.push(5u8); // dest_size = 5
1276        delta.push(5u8); // INSERT 5 bytes
1277        delta.extend_from_slice(b"hello");
1278
1279        let result = apply_delta(base, &delta).unwrap();
1280        assert_eq!(result, b"hello");
1281    }
1282
1283    #[test]
1284    fn test_apply_delta_copy_only() {
1285        let base = b"abcdef";
1286        let mut delta = Vec::new();
1287        delta.push(6u8); // src_size = 6
1288        delta.push(3u8); // dest_size = 3
1289                         // COPY base[2..5]: offset=2, size=3
1290                         // cmd = 0x80 | 0x01 | 0x10
1291        delta.push(0x91u8);
1292        delta.push(2u8); // offset = 2
1293        delta.push(3u8); // size = 3
1294
1295        let result = apply_delta(base, &delta).unwrap();
1296        assert_eq!(result, b"cde");
1297    }
1298
1299    #[test]
1300    fn test_apply_delta_size_zero_means_65536() {
1301        // A COPY with size bytes all zero means 0x10000 = 65536.
1302        let base = vec![0xABu8; 65536];
1303        let mut delta = Vec::new();
1304        // src_size = 65536, encoded as 3 bytes little-endian varint
1305        delta.push(0x80 | (65536 & 0x7f) as u8); // 0
1306        delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8); // 0x80
1307        delta.push(((65536 >> 14) & 0x7f) as u8); // 4
1308                                                  // dest_size = 65536, same
1309        delta.push(0x80 | (65536 & 0x7f) as u8);
1310        delta.push(0x80 | ((65536 >> 7) & 0x7f) as u8);
1311        delta.push(((65536 >> 14) & 0x7f) as u8);
1312        // COPY: offset=0 (no offset bytes), size=0 (no size bytes) → means 0x10000
1313        // cmd = 0x80 (no offset/size bytes present at all → offset=0, size=0→65536)
1314        delta.push(0x80u8);
1315
1316        let result = apply_delta(&base, &delta).unwrap();
1317        assert_eq!(result.len(), 65536);
1318        assert!(result.iter().all(|&b| b == 0xAB));
1319    }
1320
1321    #[test]
1322    fn test_unpack_objects_blobs() {
1323        use tempfile::TempDir;
1324        let tmp = TempDir::new().unwrap();
1325        let objects_dir = tmp.path().join("objects");
1326        std::fs::create_dir_all(&objects_dir).unwrap();
1327        let odb = Odb::new(&objects_dir);
1328
1329        let pack = make_pack(&[
1330            (ObjectKind::Blob, b"hello\n"),
1331            (ObjectKind::Blob, b"world\n"),
1332        ]);
1333
1334        let opts = UnpackOptions::default();
1335        let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1336        assert_eq!(count, 2);
1337
1338        // Verify both blobs can be read back.
1339        let oid1 = Odb::hash_object_data(ObjectKind::Blob, b"hello\n");
1340        let oid2 = Odb::hash_object_data(ObjectKind::Blob, b"world\n");
1341        let obj1 = odb.read(&oid1).unwrap();
1342        let obj2 = odb.read(&oid2).unwrap();
1343        assert_eq!(obj1.data, b"hello\n");
1344        assert_eq!(obj2.data, b"world\n");
1345    }
1346
1347    #[test]
1348    fn test_unpack_objects_empty_tree() {
1349        use tempfile::TempDir;
1350        let tmp = TempDir::new().unwrap();
1351        let objects_dir = tmp.path().join("objects");
1352        std::fs::create_dir_all(&objects_dir).unwrap();
1353        let odb = Odb::new(&objects_dir);
1354
1355        let pack = make_pack(&[(ObjectKind::Tree, b"")]);
1356        let opts = UnpackOptions::default();
1357        assert_eq!(
1358            unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1359            1
1360        );
1361        let oid = Odb::hash_object_data(ObjectKind::Tree, b"");
1362        assert!(odb.exists(&oid));
1363        let loose = objects_dir
1364            .join(oid.loose_prefix())
1365            .join(oid.loose_suffix());
1366        assert!(
1367            loose.is_file(),
1368            "empty tree must be materialized as a loose object during unpack"
1369        );
1370    }
1371
1372    #[test]
1373    fn test_strict_skips_gitlink_tree_entries() {
1374        use crate::index::{MODE_GITLINK, MODE_REGULAR};
1375        use crate::objects::{serialize_tree, TreeEntry};
1376
1377        // A submodule commit oid that is NOT in the pack/ODB (lives in the
1378        // submodule repository, like a 160000 gitlink target on push).
1379        let submodule_oid = ObjectId::from_hex(&"7f".repeat(20)).unwrap();
1380
1381        // Superproject tree referencing the submodule via a gitlink entry.
1382        let tree_data = serialize_tree(&[TreeEntry {
1383            mode: MODE_GITLINK,
1384            name: b"sub".to_vec(),
1385            oid: submodule_oid,
1386        }]);
1387        let tree_oid = Odb::hash_object_data(ObjectKind::Tree, &tree_data);
1388
1389        // Strict connectivity must NOT flag the gitlink target as missing,
1390        // matching upstream git (git/fsck.c skips S_ISGITLINK entries).
1391        let mut pack = HashMap::new();
1392        pack.insert(tree_oid, (ObjectKind::Tree, tree_data.clone()));
1393        assert!(strict_verify_packed_references(None, &pack).is_ok());
1394
1395        // Regression guard: a non-gitlink (regular file) entry pointing at an
1396        // absent blob must still be reported as a strict connectivity error.
1397        let bad_tree = serialize_tree(&[TreeEntry {
1398            mode: MODE_REGULAR,
1399            name: b"file".to_vec(),
1400            oid: ObjectId::from_hex(&"ab".repeat(20)).unwrap(),
1401        }]);
1402        let bad_oid = Odb::hash_object_data(ObjectKind::Tree, &bad_tree);
1403        let mut bad_pack = HashMap::new();
1404        bad_pack.insert(bad_oid, (ObjectKind::Tree, bad_tree));
1405        assert!(matches!(
1406            strict_verify_packed_references(None, &bad_pack),
1407            Err(Error::CorruptObject(_))
1408        ));
1409    }
1410
1411    /// `Read` that returns at most `max_len` bytes per call (simulates side-band chunking).
1412    struct ChunkedReader<'a> {
1413        data: &'a [u8],
1414        pos: usize,
1415        max_len: usize,
1416    }
1417
1418    impl io::Read for ChunkedReader<'_> {
1419        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1420            if self.pos >= self.data.len() {
1421                return Ok(0);
1422            }
1423            let take = (self.data.len() - self.pos)
1424                .min(self.max_len)
1425                .min(buf.len());
1426            buf[..take].copy_from_slice(&self.data[self.pos..self.pos + take]);
1427            self.pos += take;
1428            Ok(take)
1429        }
1430    }
1431
1432    #[test]
1433    fn test_unpack_objects_chunked_read_matches_full_buffer() {
1434        use tempfile::TempDir;
1435        let pack = make_pack(&[(ObjectKind::Blob, b"chunked-stream")]);
1436        let opts = UnpackOptions::default();
1437        let oid = Odb::hash_object_data(ObjectKind::Blob, b"chunked-stream");
1438
1439        let tmp = TempDir::new().unwrap();
1440        let objects_dir = tmp.path().join("objects");
1441        std::fs::create_dir_all(&objects_dir).unwrap();
1442        let odb = Odb::new(&objects_dir);
1443        assert_eq!(
1444            unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap(),
1445            1
1446        );
1447        assert!(odb.exists(&oid));
1448
1449        let tmp2 = TempDir::new().unwrap();
1450        let objects_dir2 = tmp2.path().join("objects");
1451        std::fs::create_dir_all(&objects_dir2).unwrap();
1452        let odb2 = Odb::new(&objects_dir2);
1453        let mut chunked = ChunkedReader {
1454            data: pack.as_slice(),
1455            pos: 0,
1456            max_len: 8,
1457        };
1458        assert_eq!(unpack_objects(&mut chunked, &odb2, &opts).unwrap(), 1);
1459        assert!(odb2.exists(&oid));
1460    }
1461
1462    #[test]
1463    fn test_unpack_objects_dry_run_writes_nothing() {
1464        use tempfile::TempDir;
1465        let tmp = TempDir::new().unwrap();
1466        let objects_dir = tmp.path().join("objects");
1467        std::fs::create_dir_all(&objects_dir).unwrap();
1468        let odb = Odb::new(&objects_dir);
1469
1470        let pack = make_pack(&[(ObjectKind::Blob, b"test content")]);
1471
1472        let opts = UnpackOptions {
1473            dry_run: true,
1474            quiet: true,
1475            strict: false,
1476            allowed_missing: Default::default(),
1477            allow_promisor_missing_references: false,
1478            max_input_bytes: None,
1479            ..Default::default()
1480        };
1481        let count = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap();
1482        assert_eq!(count, 1);
1483
1484        // Nothing should be written.
1485        let oid = Odb::hash_object_data(ObjectKind::Blob, b"test content");
1486        assert!(!odb.exists(&oid));
1487    }
1488
1489    #[test]
1490    fn test_unpack_objects_bad_signature() {
1491        use tempfile::TempDir;
1492        let tmp = TempDir::new().unwrap();
1493        let objects_dir = tmp.path().join("objects");
1494        std::fs::create_dir_all(&objects_dir).unwrap();
1495        let odb = Odb::new(&objects_dir);
1496
1497        let mut bad = b"NOPE\x00\x00\x00\x02\x00\x00\x00\x00".to_vec();
1498        bad.extend_from_slice(&[0u8; 20]);
1499        let opts = UnpackOptions::default();
1500        let err = unpack_objects(&mut bad.as_slice(), &odb, &opts).unwrap_err();
1501        assert!(err.to_string().contains("invalid signature"));
1502    }
1503
1504    #[test]
1505    fn test_unpack_objects_checksum_mismatch() {
1506        use tempfile::TempDir;
1507        let tmp = TempDir::new().unwrap();
1508        let objects_dir = tmp.path().join("objects");
1509        std::fs::create_dir_all(&objects_dir).unwrap();
1510        let odb = Odb::new(&objects_dir);
1511
1512        let mut pack = make_pack(&[(ObjectKind::Blob, b"data")]);
1513        // Corrupt the trailing checksum.
1514        let n = pack.len();
1515        pack[n - 1] ^= 0xFF;
1516
1517        let opts = UnpackOptions::default();
1518        let err = unpack_objects(&mut pack.as_slice(), &odb, &opts).unwrap_err();
1519        assert!(err.to_string().contains("checksum"));
1520    }
1521
1522    #[test]
1523    fn test_apply_delta_source_size_mismatch() {
1524        let base = b"hi";
1525        let delta = [3u8, 2u8, 2u8, b'h', b'i']; // src_size=3 != base.len()=2
1526        let err = apply_delta(base, &delta).unwrap_err();
1527        assert!(err.to_string().contains("source size"));
1528    }
1529}