Skip to main content

grit_lib/
pack.rs

1//! Pack and pack-index helpers for object counting and verification.
2//!
3//! This module implements a focused subset of pack functionality required by
4//! `count-objects`, `verify-pack`, and `show-index`.
5
6use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::odb::Odb;
9use crate::unpack_objects::apply_delta;
10use flate2::read::ZlibDecoder;
11use sha1::{Digest, Sha1};
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::fs;
14use std::io;
15use std::io::Read;
16use std::path::{Path, PathBuf};
17
18/// A parsed entry from an index file.
19#[derive(Debug, Clone)]
20pub struct PackIndexEntry {
21    /// Object identifier.
22    pub oid: ObjectId,
23    /// Byte offset of the object in the corresponding `.pack`.
24    pub offset: u64,
25}
26
27/// Parsed data from a `.idx` file (version 2).
28#[derive(Debug, Clone)]
29pub struct PackIndex {
30    /// Absolute path to the `.idx` file.
31    pub idx_path: PathBuf,
32    /// Absolute path to the `.pack` file.
33    pub pack_path: PathBuf,
34    /// Parsed entries in index order.
35    pub entries: Vec<PackIndexEntry>,
36}
37
38/// A single entry produced by `show-index`, with an optional CRC32.
39///
40/// Version-1 index files do not store CRC32 values; `crc32` is `None` for
41/// those entries.  Version-2 index files always carry a CRC32.
42#[derive(Debug, Clone)]
43pub struct ShowIndexEntry {
44    /// Object identifier.
45    pub oid: ObjectId,
46    /// Byte offset of the object in the corresponding `.pack` file.
47    pub offset: u64,
48    /// CRC32 of the compressed object data (v2 only).
49    pub crc32: Option<u32>,
50}
51
52/// Parse a pack index from a reader (e.g. stdin) and return all entries in
53/// index order.
54///
55/// Both version-1 (legacy) and version-2 index formats are supported.  Only
56/// SHA-1 (20-byte hash) objects are supported; pass `hash_size = 20`.
57///
58/// # Errors
59///
60/// Returns [`Error::CorruptObject`] when the data cannot be parsed as a valid
61/// pack index.
62pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
63    let mut buf = Vec::new();
64    reader.read_to_end(&mut buf).map_err(Error::Io)?;
65
66    if buf.len() < 8 {
67        return Err(Error::CorruptObject(
68            "unable to read header: index file too small".to_owned(),
69        ));
70    }
71
72    let mut pos = 0usize;
73    let first_u32 = read_u32_be(&buf, &mut pos)?;
74
75    const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
76
77    if first_u32 == PACK_IDX_SIGNATURE {
78        // Version 2 (or higher): read version word, then 256-entry fanout.
79        let version = read_u32_be(&buf, &mut pos)?;
80        if version != 2 {
81            return Err(Error::CorruptObject(format!(
82                "unknown index version: {version}"
83            )));
84        }
85        show_index_v2(&buf, &mut pos, hash_size)
86    } else {
87        // Version 1: the two u32s we already started reading are the first two
88        // fanout entries.  Re-read the whole fanout from the top.
89        pos = 0;
90        show_index_v1(&buf, &mut pos, hash_size)
91    }
92}
93
94/// Parse version-1 pack index entries from `buf`.
95fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
96    if buf.len() < 256 * 4 {
97        return Err(Error::CorruptObject(
98            "unable to read index: v1 fanout too short".to_owned(),
99        ));
100    }
101    let mut fanout = [0u32; 256];
102    for slot in &mut fanout {
103        *slot = read_u32_be(buf, pos)?;
104    }
105    let object_count = fanout[255] as usize;
106
107    let mut entries = Vec::with_capacity(object_count);
108    for i in 0..object_count {
109        // Each record: 4-byte big-endian offset + hash_size-byte OID.
110        if *pos + 4 + hash_size > buf.len() {
111            return Err(Error::CorruptObject(format!(
112                "unable to read entry {i}/{object_count}: truncated"
113            )));
114        }
115        let offset = read_u32_be(buf, pos)? as u64;
116        let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
117        *pos += hash_size;
118        entries.push(ShowIndexEntry {
119            oid,
120            offset,
121            crc32: None,
122        });
123    }
124    Ok(entries)
125}
126
127/// Parse version-2 pack index entries from `buf` starting after the magic and
128/// version words (fanout table is next).
129fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
130    if buf.len() < *pos + 256 * 4 {
131        return Err(Error::CorruptObject(
132            "unable to read index: v2 fanout too short".to_owned(),
133        ));
134    }
135    let mut fanout = [0u32; 256];
136    for slot in &mut fanout {
137        *slot = read_u32_be(buf, pos)?;
138    }
139    let object_count = fanout[255] as usize;
140
141    // OID table.
142    let mut oids = Vec::with_capacity(object_count);
143    for i in 0..object_count {
144        if *pos + hash_size > buf.len() {
145            return Err(Error::CorruptObject(format!(
146                "unable to read sha1 {i}/{object_count}: truncated"
147            )));
148        }
149        let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
150        *pos += hash_size;
151        oids.push(oid);
152    }
153
154    // CRC32 table.
155    let mut crcs = Vec::with_capacity(object_count);
156    for i in 0..object_count {
157        if *pos + 4 > buf.len() {
158            return Err(Error::CorruptObject(format!(
159                "unable to read crc {i}/{object_count}: truncated"
160            )));
161        }
162        crcs.push(read_u32_be(buf, pos)?);
163    }
164
165    // 32-bit offset table.
166    let mut offsets32 = Vec::with_capacity(object_count);
167    let mut large_count = 0usize;
168    for i in 0..object_count {
169        if *pos + 4 > buf.len() {
170            return Err(Error::CorruptObject(format!(
171                "unable to read 32b offset {i}/{object_count}: truncated"
172            )));
173        }
174        let v = read_u32_be(buf, pos)?;
175        if (v & 0x8000_0000) != 0 {
176            large_count += 1;
177        }
178        offsets32.push(v);
179    }
180
181    // 64-bit large-offset table.
182    let mut large_offsets = Vec::with_capacity(large_count);
183    for i in 0..large_count {
184        if *pos + 8 > buf.len() {
185            return Err(Error::CorruptObject(format!(
186                "unable to read 64b offset {i}: truncated"
187            )));
188        }
189        large_offsets.push(read_u64_be(buf, pos)?);
190    }
191
192    let mut next_large = 0usize;
193    let mut entries = Vec::with_capacity(object_count);
194    for (i, oid) in oids.into_iter().enumerate() {
195        let raw = offsets32[i];
196        let offset = if (raw & 0x8000_0000) == 0 {
197            raw as u64
198        } else {
199            let idx = (raw & 0x7fff_ffff) as usize;
200            if idx != next_large {
201                return Err(Error::CorruptObject(format!(
202                    "inconsistent 64b offset index at entry {i}"
203                )));
204            }
205            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
206                Error::CorruptObject(format!("missing large offset entry {next_large}"))
207            })?;
208            next_large += 1;
209            off
210        };
211        entries.push(ShowIndexEntry {
212            oid,
213            offset,
214            crc32: Some(crcs[i]),
215        });
216    }
217    Ok(entries)
218}
219
220/// Basic information about local packs.
221#[derive(Debug, Clone, Default)]
222pub struct LocalPackInfo {
223    /// Number of valid local packs.
224    pub pack_count: usize,
225    /// Total objects across all valid local packs.
226    pub object_count: usize,
227    /// Combined on-disk bytes of `.pack` + `.idx`.
228    pub size_bytes: u64,
229    /// Set of all object IDs present in local packs.
230    pub object_ids: HashSet<ObjectId>,
231}
232
233/// Read all valid `.idx` files in `objects/pack`.
234///
235/// # Errors
236///
237/// Returns [`Error::Io`] for directory-level failures. Individual invalid pack
238/// pairs are skipped.
239pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
240    let pack_dir = objects_dir.join("pack");
241    let rd = match fs::read_dir(&pack_dir) {
242        Ok(rd) => rd,
243        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
244        Err(err) => return Err(Error::Io(err)),
245    };
246
247    let mut out = Vec::new();
248    for entry in rd {
249        let entry = entry.map_err(Error::Io)?;
250        let path = entry.path();
251        if path.extension().and_then(|s| s.to_str()) != Some("idx") {
252            continue;
253        }
254        if let Ok(idx) = read_pack_index(&path) {
255            out.push(idx);
256        }
257    }
258    Ok(out)
259}
260
261/// Collect aggregate local pack metrics.
262///
263/// # Errors
264///
265/// Returns [`Error::Io`] when reading pack metadata fails.
266pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
267    let indexes = read_local_pack_indexes(objects_dir)?;
268    let mut info = LocalPackInfo::default();
269    for idx in indexes {
270        let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
271        let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
272        info.pack_count += 1;
273        info.object_count += idx.entries.len();
274        info.size_bytes += pack_meta.len() + idx_meta.len();
275        for entry in idx.entries {
276            info.object_ids.insert(entry.oid);
277        }
278    }
279    Ok(info)
280}
281
282/// Parse a version-2 pack index file.
283///
284/// # Errors
285///
286/// Returns [`Error::CorruptObject`] when format checks fail.
287pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
288    let bytes = fs::read(idx_path).map_err(Error::Io)?;
289    if bytes.len() < 8 + 256 * 4 + 40 {
290        return Err(Error::CorruptObject(format!(
291            "index file {} is too small",
292            idx_path.display()
293        )));
294    }
295
296    let mut pos = 0usize;
297    let magic = &bytes[pos..pos + 4];
298    pos += 4;
299    if magic != [0xff, b't', b'O', b'c'] {
300        return Err(Error::CorruptObject(format!(
301            "unsupported idx signature in {}",
302            idx_path.display()
303        )));
304    }
305    let version = read_u32_be(&bytes, &mut pos)?;
306    if version != 2 {
307        return Err(Error::CorruptObject(format!(
308            "unsupported idx version {} in {}",
309            version,
310            idx_path.display()
311        )));
312    }
313
314    let mut fanout = [0u32; 256];
315    for slot in &mut fanout {
316        *slot = read_u32_be(&bytes, &mut pos)?;
317    }
318    let object_count = fanout[255] as usize;
319
320    let need = pos
321        .saturating_add(object_count * 20)
322        .saturating_add(object_count * 4)
323        .saturating_add(object_count * 4)
324        .saturating_add(40);
325    if bytes.len() < need {
326        return Err(Error::CorruptObject(format!(
327            "truncated idx file {}",
328            idx_path.display()
329        )));
330    }
331
332    let mut oids = Vec::with_capacity(object_count);
333    for _ in 0..object_count {
334        let oid = ObjectId::from_bytes(&bytes[pos..pos + 20])?;
335        pos += 20;
336        oids.push(oid);
337    }
338
339    // Skip CRC table.
340    pos += object_count * 4;
341
342    let mut offsets32 = Vec::with_capacity(object_count);
343    let mut large_count = 0usize;
344    for _ in 0..object_count {
345        let v = read_u32_be(&bytes, &mut pos)?;
346        if (v & 0x8000_0000) != 0 {
347            large_count += 1;
348        }
349        offsets32.push(v);
350    }
351
352    if bytes.len() < pos + large_count * 8 + 40 {
353        return Err(Error::CorruptObject(format!(
354            "truncated large offset table in {}",
355            idx_path.display()
356        )));
357    }
358    let mut large_offsets = Vec::with_capacity(large_count);
359    for _ in 0..large_count {
360        large_offsets.push(read_u64_be(&bytes, &mut pos)?);
361    }
362
363    let mut next_large = 0usize;
364    let mut entries = Vec::with_capacity(object_count);
365    for (i, oid) in oids.into_iter().enumerate() {
366        let raw = offsets32[i];
367        let offset = if (raw & 0x8000_0000) == 0 {
368            raw as u64
369        } else {
370            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
371                Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
372            })?;
373            next_large += 1;
374            off
375        };
376        entries.push(PackIndexEntry { oid, offset });
377    }
378
379    let mut pack_path = idx_path.to_path_buf();
380    pack_path.set_extension("pack");
381
382    // Trailing 20 bytes are SHA-1 over all preceding index bytes (Git format).
383    if bytes.len() < 20 {
384        return Err(Error::CorruptObject(format!(
385            "index file {} missing checksum",
386            idx_path.display()
387        )));
388    }
389    let idx_body_end = bytes.len() - 20;
390    let mut h = Sha1::new();
391    h.update(&bytes[..idx_body_end]);
392    let digest = h.finalize();
393    if digest.as_slice() != &bytes[idx_body_end..] {
394        return Err(Error::CorruptObject(format!(
395            "index checksum mismatch for {}",
396            idx_path.display()
397        )));
398    }
399
400    Ok(PackIndex {
401        idx_path: idx_path.to_path_buf(),
402        pack_path,
403        entries,
404    })
405}
406
407/// A pack object type as encoded in the packed stream header.
408#[derive(Debug, Clone, Copy, PartialEq, Eq)]
409pub enum PackedType {
410    /// Commit object.
411    Commit,
412    /// Tree object.
413    Tree,
414    /// Blob object.
415    Blob,
416    /// Tag object.
417    Tag,
418    /// Offset delta.
419    OfsDelta,
420    /// Reference delta.
421    RefDelta,
422}
423
424impl PackedType {
425    /// Printable name used by `verify-pack -v` output.
426    #[must_use]
427    pub fn as_str(self) -> &'static str {
428        match self {
429            Self::Commit => "commit",
430            Self::Tree => "tree",
431            Self::Blob => "blob",
432            Self::Tag => "tag",
433            Self::OfsDelta => "ofs-delta",
434            Self::RefDelta => "ref-delta",
435        }
436    }
437}
438
439/// A decoded object header record used by `verify-pack`.
440#[derive(Debug, Clone)]
441pub struct VerifyObjectRecord {
442    /// Object ID from the index.
443    pub oid: ObjectId,
444    /// Type from the pack stream header.
445    pub packed_type: PackedType,
446    /// Uncompressed object size from the pack header.
447    pub size: u64,
448    /// Total bytes in pack occupied by this object slot.
449    pub size_in_pack: u64,
450    /// Offset in pack file.
451    pub offset: u64,
452    /// Delta chain depth, if deltified.
453    pub depth: Option<u64>,
454    /// Base object for ref-delta objects.
455    pub base_oid: Option<ObjectId>,
456}
457
458/// Verify one pack/index pair and optionally return object records.
459///
460/// # Errors
461///
462/// Returns [`Error::CorruptObject`] when the index or pack are malformed.
463pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
464    let idx = read_pack_index(idx_path)?;
465    let idx_file_bytes = fs::read(idx_path).map_err(Error::Io)?;
466    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
467    if pack_bytes.len() < 12 + 20 {
468        return Err(Error::CorruptObject(format!(
469            "pack file {} is too small",
470            idx.pack_path.display()
471        )));
472    }
473    let pack_end = pack_bytes.len() - 20;
474    {
475        let mut h = Sha1::new();
476        h.update(&pack_bytes[..pack_end]);
477        let digest = h.finalize();
478        if digest.as_slice() != &pack_bytes[pack_end..] {
479            return Err(Error::CorruptObject(format!(
480                "pack trailing checksum mismatch for {}",
481                idx.pack_path.display()
482            )));
483        }
484    }
485    if idx_file_bytes.len() >= 40 {
486        let embedded = &idx_file_bytes[idx_file_bytes.len() - 40..idx_file_bytes.len() - 20];
487        if embedded != &pack_bytes[pack_end..] {
488            return Err(Error::CorruptObject(format!(
489                "pack checksum in index does not match {}",
490                idx.pack_path.display()
491            )));
492        }
493    }
494    if &pack_bytes[0..4] != b"PACK" {
495        return Err(Error::CorruptObject(format!(
496            "pack file {} has invalid signature",
497            idx.pack_path.display()
498        )));
499    }
500    let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
501    if version != 2 && version != 3 {
502        return Err(Error::CorruptObject(format!(
503            "unsupported pack version {} in {}",
504            version,
505            idx.pack_path.display()
506        )));
507    }
508    let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
509    if count != idx.entries.len() {
510        return Err(Error::CorruptObject(format!(
511            "pack/index object count mismatch for {}",
512            idx.pack_path.display()
513        )));
514    }
515
516    let mut by_offset: BTreeMap<u64, ObjectId> = BTreeMap::new();
517    for entry in &idx.entries {
518        by_offset.insert(entry.offset, entry.oid);
519    }
520    let offsets: Vec<u64> = by_offset.keys().copied().collect();
521    if offsets.is_empty() {
522        return Ok(Vec::new());
523    }
524
525    let mut by_oid: HashMap<ObjectId, usize> = HashMap::new();
526    let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
527    for (i, offset) in offsets.iter().copied().enumerate() {
528        let oid = by_offset.get(&offset).copied().ok_or_else(|| {
529            Error::CorruptObject(format!("missing object id for offset {}", offset))
530        })?;
531        let next_off = offsets
532            .get(i + 1)
533            .copied()
534            .unwrap_or((pack_bytes.len() - 20) as u64);
535        if next_off <= offset || next_off > (pack_bytes.len() - 20) as u64 {
536            return Err(Error::CorruptObject(format!(
537                "invalid object boundaries at offset {} in {}",
538                offset,
539                idx.pack_path.display()
540            )));
541        }
542        let mut p = offset as usize;
543        let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
544        let mut base_oid = None;
545        let mut depth = None;
546
547        match packed_type {
548            PackedType::RefDelta => {
549                if p + 20 > pack_bytes.len() {
550                    return Err(Error::CorruptObject(format!(
551                        "truncated ref-delta base at offset {}",
552                        offset
553                    )));
554                }
555                base_oid = Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?);
556            }
557            PackedType::OfsDelta => {
558                let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
559                let base_depth = records
560                    .iter()
561                    .find(|r| r.offset == base_offset)
562                    .and_then(|r| r.depth)
563                    .unwrap_or(0);
564                depth = Some(base_depth + 1);
565            }
566            PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
567        }
568
569        let size_in_pack = next_off - offset;
570        records.push(VerifyObjectRecord {
571            oid,
572            packed_type,
573            size,
574            size_in_pack,
575            offset,
576            depth,
577            base_oid,
578        });
579        by_oid.insert(oid, i);
580    }
581
582    // Fill ref-delta depths in a second pass once all base objects are known.
583    for i in 0..records.len() {
584        if records[i].packed_type != PackedType::RefDelta {
585            continue;
586        }
587        let base = records[i]
588            .base_oid
589            .ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
590        let base_depth = by_oid
591            .get(&base)
592            .and_then(|idx| records.get(*idx))
593            .and_then(|r| r.depth)
594            .unwrap_or(0);
595        records[i].depth = Some(base_depth + 1);
596    }
597
598    // Confirm each index OID matches the resolved object bytes (catches swapped .idx/.pack pairs).
599    for entry in &idx.entries {
600        let obj = read_object_from_pack(&idx, &entry.oid)?;
601        let computed = Odb::hash_object_data(obj.kind, &obj.data);
602        if computed != entry.oid {
603            return Err(Error::CorruptObject(format!(
604                "pack object hash mismatch at offset {} (index says {})",
605                entry.offset, entry.oid
606            )));
607        }
608    }
609
610    Ok(records)
611}
612
613/// Read alternates recursively, deduplicated in discovery order.
614///
615/// # Errors
616///
617/// Returns [`Error::Io`] when alternate files cannot be read.
618pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
619    let mut visited = HashSet::new();
620    let mut out = Vec::new();
621    read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
622    Ok(out)
623}
624
625/// Maximum alternate chain depth (git uses 5).
626const MAX_ALTERNATE_DEPTH: usize = 5;
627
628fn read_alternates_inner(
629    objects_dir: &Path,
630    visited: &mut HashSet<PathBuf>,
631    out: &mut Vec<PathBuf>,
632    depth: usize,
633) -> Result<()> {
634    if depth > MAX_ALTERNATE_DEPTH {
635        return Ok(());
636    }
637    let canonical = canonical_or_self(objects_dir);
638    let alt_file = canonical.join("info").join("alternates");
639    let text = match fs::read_to_string(&alt_file) {
640        Ok(text) => text,
641        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
642        Err(err) => return Err(Error::Io(err)),
643    };
644
645    for raw in text.lines() {
646        let line = raw.trim();
647        if line.is_empty() {
648            continue;
649        }
650        let candidate = if Path::new(line).is_absolute() {
651            PathBuf::from(line)
652        } else {
653            canonical.join(line)
654        };
655        let candidate = canonical_or_self(&candidate);
656        if visited.insert(candidate.clone()) {
657            out.push(candidate.clone());
658            read_alternates_inner(&candidate, visited, out, depth + 1)?;
659        }
660    }
661    Ok(())
662}
663
664fn canonical_or_self(path: &Path) -> PathBuf {
665    fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
666}
667
668/// Convert a [`PackedType`] to an [`ObjectKind`] for non-delta types.
669fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
670    match pt {
671        PackedType::Commit => Ok(ObjectKind::Commit),
672        PackedType::Tree => Ok(ObjectKind::Tree),
673        PackedType::Blob => Ok(ObjectKind::Blob),
674        PackedType::Tag => Ok(ObjectKind::Tag),
675        PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
676            "cannot convert delta type to object kind directly".to_owned(),
677        )),
678    }
679}
680
681/// Decompress zlib data from a byte slice starting at `pos`.
682///
683/// Returns the decompressed data and advances `pos` past the consumed
684/// compressed bytes.
685fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
686    let slice = &bytes[*pos..];
687    let mut decoder = ZlibDecoder::new(slice);
688    let mut out = Vec::with_capacity(expected_size as usize);
689    decoder
690        .read_to_end(&mut out)
691        .map_err(|e| Error::Zlib(e.to_string()))?;
692    *pos += decoder.total_in() as usize;
693    Ok(out)
694}
695
696/// Read and fully resolve one object from a pack file given its offset.
697///
698/// Handles OFS_DELTA and REF_DELTA by recursively reading the base object.
699/// The `idx` is used for REF_DELTA resolution (to find a base by OID).
700fn read_pack_object_at(
701    pack_bytes: &[u8],
702    offset: u64,
703    idx: &PackIndex,
704    depth: usize,
705) -> Result<(ObjectKind, Vec<u8>)> {
706    if depth > 50 {
707        return Err(Error::CorruptObject(
708            "delta chain too deep (>50)".to_owned(),
709        ));
710    }
711    let mut pos = offset as usize;
712    let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
713
714    match packed_type {
715        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
716            let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
717            let kind = packed_type_to_kind(packed_type)?;
718            Ok((kind, data))
719        }
720        PackedType::OfsDelta => {
721            let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
722            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
723            let (base_kind, base_data) =
724                read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
725            let result = apply_delta(&base_data, &delta_data)?;
726            Ok((base_kind, result))
727        }
728        PackedType::RefDelta => {
729            if pos + 20 > pack_bytes.len() {
730                return Err(Error::CorruptObject(
731                    "truncated ref-delta base OID".to_owned(),
732                ));
733            }
734            let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
735            pos += 20;
736            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
737            // Find the base in the same pack index
738            let base_entry = idx
739                .entries
740                .iter()
741                .find(|e| e.oid == base_oid)
742                .ok_or_else(|| {
743                    Error::CorruptObject(format!("ref-delta base {} not found in pack", base_oid))
744                })?;
745            let (base_kind, base_data) =
746                read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
747            let result = apply_delta(&base_data, &delta_data)?;
748            Ok((base_kind, result))
749        }
750    }
751}
752
753/// Read an object from a pack file by its OID.
754///
755/// Searches the given pack index for the OID, then reads and decompresses
756/// the object from the corresponding pack file, resolving delta chains.
757///
758/// # Errors
759///
760/// Returns [`Error::ObjectNotFound`] if the OID is not in this pack.
761pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
762    let entry = idx
763        .entries
764        .iter()
765        .find(|e| e.oid == *oid)
766        .ok_or_else(|| Error::ObjectNotFound(oid.to_hex()))?;
767
768    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
769    let (kind, data) = read_pack_object_at(&pack_bytes, entry.offset, idx, 0)?;
770    Ok(Object::new(kind, data))
771}
772
773/// Search all pack indexes in `objects_dir` for the given OID and read it.
774///
775/// # Errors
776///
777/// Returns [`Error::ObjectNotFound`] if no pack contains the OID.
778pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
779    let indexes = read_local_pack_indexes(objects_dir)?;
780    for idx in &indexes {
781        if idx.entries.iter().any(|e| e.oid == *oid) {
782            return read_object_from_pack(idx, oid);
783        }
784    }
785    Err(Error::ObjectNotFound(oid.to_hex()))
786}
787
788/// When `oid` is stored as a delta in a pack, return its delta base object id.
789/// Returns [`None`] for loose objects and for non-delta packed objects.
790/// If `oid` is stored as `REF_DELTA` or `OFS_DELTA` in a local pack and its base OID is in
791/// `packed_set`, return the base OID and the **uncompressed** delta payload (Git binary delta).
792///
793/// Callers re-zlib when writing a new pack so we do not depend on copying raw deflate streams.
794///
795/// # Errors
796///
797/// Returns [`Error::CorruptObject`] when the pack stream is malformed.
798pub fn packed_ref_delta_reuse_slice(
799    objects_dir: &Path,
800    oid: &ObjectId,
801    packed_set: &HashSet<ObjectId>,
802) -> Result<Option<(ObjectId, Vec<u8>)>> {
803    let mut indexes = read_local_pack_indexes(objects_dir)?;
804    sort_pack_indexes_oldest_first(&mut indexes);
805    for idx in indexes {
806        let Some(entry) = idx.entries.iter().find(|e| e.oid == *oid) else {
807            continue;
808        };
809        let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
810        let mut p = entry.offset as usize;
811        let (packed_type, _size) = parse_pack_object_header(&pack_bytes, &mut p)?;
812        let base = match packed_type {
813            PackedType::RefDelta => {
814                if p + 20 > pack_bytes.len() {
815                    return Err(Error::CorruptObject(
816                        "truncated ref-delta base oid while scanning for reuse".to_owned(),
817                    ));
818                }
819                let oid = ObjectId::from_bytes(&pack_bytes[p..p + 20])?;
820                p += 20;
821                oid
822            }
823            PackedType::OfsDelta => {
824                let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
825                let Some(base_entry) = idx.entries.iter().find(|e| e.offset == base_off) else {
826                    continue;
827                };
828                base_entry.oid
829            }
830            _ => {
831                // Same OID may exist as a full object in an older pack and as a delta in a newer
832                // one; keep scanning packs.
833                continue;
834            }
835        };
836        if !packed_set.contains(&base) {
837            continue;
838        }
839        let zlib_start = p;
840        let mut end_pos = zlib_start;
841        if skip_one_pack_object(&pack_bytes, &mut end_pos, entry.offset).is_err() {
842            continue;
843        }
844        let compressed = &pack_bytes[zlib_start..end_pos];
845        let mut dec = ZlibDecoder::new(compressed);
846        let mut delta = Vec::new();
847        if dec.read_to_end(&mut delta).is_err() {
848            continue;
849        }
850        return Ok(Some((base, delta)));
851    }
852    Ok(None)
853}
854
855/// Prefer older packs when the same OID exists as a full object in a fresh repack and as a delta
856/// in an earlier thin pack (t5316).
857fn sort_pack_indexes_oldest_first(indexes: &mut [PackIndex]) {
858    indexes.sort_by(|a, b| {
859        let ta = fs::metadata(&a.pack_path)
860            .and_then(|m| m.modified())
861            .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
862        let tb = fs::metadata(&b.pack_path)
863            .and_then(|m| m.modified())
864            .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
865        ta.cmp(&tb).then_with(|| a.pack_path.cmp(&b.pack_path))
866    });
867}
868
869pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
870    let mut indexes = read_local_pack_indexes(objects_dir)?;
871    sort_pack_indexes_oldest_first(&mut indexes);
872    for idx in &indexes {
873        let Some(entry) = idx.entries.iter().find(|e| e.oid == *oid) else {
874            continue;
875        };
876        let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
877        let mut p = entry.offset as usize;
878        let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
879        match packed_type {
880            PackedType::RefDelta => {
881                if p + 20 > pack_bytes.len() {
882                    return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
883                }
884                return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?));
885            }
886            PackedType::OfsDelta => {
887                let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
888                return Ok(idx
889                    .entries
890                    .iter()
891                    .find(|e| e.offset == base_off)
892                    .map(|e| e.oid));
893            }
894            _ => continue,
895        }
896    }
897    Ok(None)
898}
899
900fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
901    let first = *bytes.get(*pos).ok_or_else(|| {
902        Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
903    })?;
904    *pos += 1;
905
906    let type_code = (first >> 4) & 0x7;
907    let mut size = (first & 0x0f) as u64;
908    let mut shift = 4u32;
909    let mut c = first;
910    while (c & 0x80) != 0 {
911        c = *bytes.get(*pos).ok_or_else(|| {
912            Error::CorruptObject("unexpected end of variable size header".to_owned())
913        })?;
914        *pos += 1;
915        size |= ((c & 0x7f) as u64) << shift;
916        shift += 7;
917    }
918
919    let packed_type = match type_code {
920        1 => PackedType::Commit,
921        2 => PackedType::Tree,
922        3 => PackedType::Blob,
923        4 => PackedType::Tag,
924        6 => PackedType::OfsDelta,
925        7 => PackedType::RefDelta,
926        _ => {
927            return Err(Error::CorruptObject(format!(
928                "unsupported packed object type {}",
929                type_code
930            )))
931        }
932    };
933    Ok((packed_type, size))
934}
935
936fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
937    let mut c = *bytes
938        .get(*pos)
939        .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
940    *pos += 1;
941    let mut value = (c & 0x7f) as u64;
942    while (c & 0x80) != 0 {
943        c = *bytes
944            .get(*pos)
945            .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
946        *pos += 1;
947        value = ((value + 1) << 7) | (c & 0x7f) as u64;
948    }
949    this_offset
950        .checked_sub(value)
951        .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
952}
953
954/// Advance `pos` past one packed object (including zlib payload).
955///
956/// `object_start_offset` is the byte offset of this object within the pack file
957/// (used for `OFS_DELTA` base resolution).
958pub fn skip_one_pack_object(bytes: &[u8], pos: &mut usize, object_start_offset: u64) -> Result<()> {
959    let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
960    match packed_type {
961        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
962            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
963            let mut tmp = Vec::with_capacity(size as usize);
964            dec.read_to_end(&mut tmp)
965                .map_err(|e| Error::Zlib(e.to_string()))?;
966            *pos += dec.total_in() as usize;
967        }
968        PackedType::RefDelta => {
969            if *pos + 20 > bytes.len() {
970                return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
971            }
972            *pos += 20;
973            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
974            let mut tmp = Vec::with_capacity(size as usize);
975            dec.read_to_end(&mut tmp)
976                .map_err(|e| Error::Zlib(e.to_string()))?;
977            *pos += dec.total_in() as usize;
978        }
979        PackedType::OfsDelta => {
980            let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
981            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
982            let mut tmp = Vec::with_capacity(size as usize);
983            dec.read_to_end(&mut tmp)
984                .map_err(|e| Error::Zlib(e.to_string()))?;
985            *pos += dec.total_in() as usize;
986        }
987    }
988    Ok(())
989}
990
991fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
992    if bytes.len() < *pos + 4 {
993        return Err(Error::CorruptObject(
994            "unexpected end of idx while reading u32".to_owned(),
995        ));
996    }
997    let v = u32::from_be_bytes(
998        bytes[*pos..*pos + 4]
999            .try_into()
1000            .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
1001    );
1002    *pos += 4;
1003    Ok(v)
1004}
1005
1006fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
1007    if bytes.len() < *pos + 8 {
1008        return Err(Error::CorruptObject(
1009            "unexpected end of idx while reading u64".to_owned(),
1010        ));
1011    }
1012    let v = u64::from_be_bytes(
1013        bytes[*pos..*pos + 8]
1014            .try_into()
1015            .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
1016    );
1017    *pos += 8;
1018    Ok(v)
1019}
1020
1021/// Read all object IDs from a `.idx` file.
1022pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
1023    let index = read_pack_index(idx_path)?;
1024    Ok(index.entries.into_iter().map(|e| e.oid).collect())
1025}