Skip to main content

grit_lib/
pack.rs

1//! Pack and pack-index helpers for object counting and verification.
2//!
3//! This module implements a focused subset of pack functionality required by
4//! `count-objects`, `verify-pack`, and `show-index`.
5
6use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::unpack_objects::apply_delta;
9use flate2::read::ZlibDecoder;
10use std::collections::{BTreeMap, HashMap, HashSet};
11use std::fs;
12use std::io;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15
16/// A parsed entry from an index file.
17#[derive(Debug, Clone)]
18pub struct PackIndexEntry {
19    /// Object identifier.
20    pub oid: ObjectId,
21    /// Byte offset of the object in the corresponding `.pack`.
22    pub offset: u64,
23}
24
25/// Parsed data from a `.idx` file (version 2).
26#[derive(Debug, Clone)]
27pub struct PackIndex {
28    /// Absolute path to the `.idx` file.
29    pub idx_path: PathBuf,
30    /// Absolute path to the `.pack` file.
31    pub pack_path: PathBuf,
32    /// Parsed entries in index order.
33    pub entries: Vec<PackIndexEntry>,
34}
35
36/// A single entry produced by `show-index`, with an optional CRC32.
37///
38/// Version-1 index files do not store CRC32 values; `crc32` is `None` for
39/// those entries.  Version-2 index files always carry a CRC32.
40#[derive(Debug, Clone)]
41pub struct ShowIndexEntry {
42    /// Object identifier.
43    pub oid: ObjectId,
44    /// Byte offset of the object in the corresponding `.pack` file.
45    pub offset: u64,
46    /// CRC32 of the compressed object data (v2 only).
47    pub crc32: Option<u32>,
48}
49
50/// Parse a pack index from a reader (e.g. stdin) and return all entries in
51/// index order.
52///
53/// Both version-1 (legacy) and version-2 index formats are supported.  Only
54/// SHA-1 (20-byte hash) objects are supported; pass `hash_size = 20`.
55///
56/// # Errors
57///
58/// Returns [`Error::CorruptObject`] when the data cannot be parsed as a valid
59/// pack index.
60pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
61    let mut buf = Vec::new();
62    reader.read_to_end(&mut buf).map_err(Error::Io)?;
63
64    if buf.len() < 8 {
65        return Err(Error::CorruptObject(
66            "unable to read header: index file too small".to_owned(),
67        ));
68    }
69
70    let mut pos = 0usize;
71    let first_u32 = read_u32_be(&buf, &mut pos)?;
72
73    const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
74
75    if first_u32 == PACK_IDX_SIGNATURE {
76        // Version 2 (or higher): read version word, then 256-entry fanout.
77        let version = read_u32_be(&buf, &mut pos)?;
78        if version != 2 {
79            return Err(Error::CorruptObject(format!(
80                "unknown index version: {version}"
81            )));
82        }
83        show_index_v2(&buf, &mut pos, hash_size)
84    } else {
85        // Version 1: the two u32s we already started reading are the first two
86        // fanout entries.  Re-read the whole fanout from the top.
87        pos = 0;
88        show_index_v1(&buf, &mut pos, hash_size)
89    }
90}
91
92/// Parse version-1 pack index entries from `buf`.
93fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
94    if buf.len() < 256 * 4 {
95        return Err(Error::CorruptObject(
96            "unable to read index: v1 fanout too short".to_owned(),
97        ));
98    }
99    let mut fanout = [0u32; 256];
100    for slot in &mut fanout {
101        *slot = read_u32_be(buf, pos)?;
102    }
103    let object_count = fanout[255] as usize;
104
105    let mut entries = Vec::with_capacity(object_count);
106    for i in 0..object_count {
107        // Each record: 4-byte big-endian offset + hash_size-byte OID.
108        if *pos + 4 + hash_size > buf.len() {
109            return Err(Error::CorruptObject(format!(
110                "unable to read entry {i}/{object_count}: truncated"
111            )));
112        }
113        let offset = read_u32_be(buf, pos)? as u64;
114        let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
115        *pos += hash_size;
116        entries.push(ShowIndexEntry {
117            oid,
118            offset,
119            crc32: None,
120        });
121    }
122    Ok(entries)
123}
124
125/// Parse version-2 pack index entries from `buf` starting after the magic and
126/// version words (fanout table is next).
127fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
128    if buf.len() < *pos + 256 * 4 {
129        return Err(Error::CorruptObject(
130            "unable to read index: v2 fanout too short".to_owned(),
131        ));
132    }
133    let mut fanout = [0u32; 256];
134    for slot in &mut fanout {
135        *slot = read_u32_be(buf, pos)?;
136    }
137    let object_count = fanout[255] as usize;
138
139    // OID table.
140    let mut oids = Vec::with_capacity(object_count);
141    for i in 0..object_count {
142        if *pos + hash_size > buf.len() {
143            return Err(Error::CorruptObject(format!(
144                "unable to read sha1 {i}/{object_count}: truncated"
145            )));
146        }
147        let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
148        *pos += hash_size;
149        oids.push(oid);
150    }
151
152    // CRC32 table.
153    let mut crcs = Vec::with_capacity(object_count);
154    for i in 0..object_count {
155        if *pos + 4 > buf.len() {
156            return Err(Error::CorruptObject(format!(
157                "unable to read crc {i}/{object_count}: truncated"
158            )));
159        }
160        crcs.push(read_u32_be(buf, pos)?);
161    }
162
163    // 32-bit offset table.
164    let mut offsets32 = Vec::with_capacity(object_count);
165    let mut large_count = 0usize;
166    for i in 0..object_count {
167        if *pos + 4 > buf.len() {
168            return Err(Error::CorruptObject(format!(
169                "unable to read 32b offset {i}/{object_count}: truncated"
170            )));
171        }
172        let v = read_u32_be(buf, pos)?;
173        if (v & 0x8000_0000) != 0 {
174            large_count += 1;
175        }
176        offsets32.push(v);
177    }
178
179    // 64-bit large-offset table.
180    let mut large_offsets = Vec::with_capacity(large_count);
181    for i in 0..large_count {
182        if *pos + 8 > buf.len() {
183            return Err(Error::CorruptObject(format!(
184                "unable to read 64b offset {i}: truncated"
185            )));
186        }
187        large_offsets.push(read_u64_be(buf, pos)?);
188    }
189
190    let mut next_large = 0usize;
191    let mut entries = Vec::with_capacity(object_count);
192    for (i, oid) in oids.into_iter().enumerate() {
193        let raw = offsets32[i];
194        let offset = if (raw & 0x8000_0000) == 0 {
195            raw as u64
196        } else {
197            let idx = (raw & 0x7fff_ffff) as usize;
198            if idx != next_large {
199                return Err(Error::CorruptObject(format!(
200                    "inconsistent 64b offset index at entry {i}"
201                )));
202            }
203            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
204                Error::CorruptObject(format!("missing large offset entry {next_large}"))
205            })?;
206            next_large += 1;
207            off
208        };
209        entries.push(ShowIndexEntry {
210            oid,
211            offset,
212            crc32: Some(crcs[i]),
213        });
214    }
215    Ok(entries)
216}
217
218/// Basic information about local packs.
219#[derive(Debug, Clone, Default)]
220pub struct LocalPackInfo {
221    /// Number of valid local packs.
222    pub pack_count: usize,
223    /// Total objects across all valid local packs.
224    pub object_count: usize,
225    /// Combined on-disk bytes of `.pack` + `.idx`.
226    pub size_bytes: u64,
227    /// Set of all object IDs present in local packs.
228    pub object_ids: HashSet<ObjectId>,
229}
230
231/// Read all valid `.idx` files in `objects/pack`.
232///
233/// # Errors
234///
235/// Returns [`Error::Io`] for directory-level failures. Individual invalid pack
236/// pairs are skipped.
237pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
238    let pack_dir = objects_dir.join("pack");
239    let rd = match fs::read_dir(&pack_dir) {
240        Ok(rd) => rd,
241        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
242        Err(err) => return Err(Error::Io(err)),
243    };
244
245    let mut out = Vec::new();
246    for entry in rd {
247        let entry = entry.map_err(Error::Io)?;
248        let path = entry.path();
249        if path.extension().and_then(|s| s.to_str()) != Some("idx") {
250            continue;
251        }
252        if let Ok(idx) = read_pack_index(&path) {
253            out.push(idx);
254        }
255    }
256    Ok(out)
257}
258
259/// Collect aggregate local pack metrics.
260///
261/// # Errors
262///
263/// Returns [`Error::Io`] when reading pack metadata fails.
264pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
265    let indexes = read_local_pack_indexes(objects_dir)?;
266    let mut info = LocalPackInfo::default();
267    for idx in indexes {
268        let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
269        let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
270        info.pack_count += 1;
271        info.object_count += idx.entries.len();
272        info.size_bytes += pack_meta.len() + idx_meta.len();
273        for entry in idx.entries {
274            info.object_ids.insert(entry.oid);
275        }
276    }
277    Ok(info)
278}
279
280/// Parse a version-2 pack index file.
281///
282/// # Errors
283///
284/// Returns [`Error::CorruptObject`] when format checks fail.
285pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
286    let bytes = fs::read(idx_path).map_err(Error::Io)?;
287    if bytes.len() < 8 + 256 * 4 + 40 {
288        return Err(Error::CorruptObject(format!(
289            "index file {} is too small",
290            idx_path.display()
291        )));
292    }
293
294    let mut pos = 0usize;
295    let magic = &bytes[pos..pos + 4];
296    pos += 4;
297    if magic != [0xff, b't', b'O', b'c'] {
298        return Err(Error::CorruptObject(format!(
299            "unsupported idx signature in {}",
300            idx_path.display()
301        )));
302    }
303    let version = read_u32_be(&bytes, &mut pos)?;
304    if version != 2 {
305        return Err(Error::CorruptObject(format!(
306            "unsupported idx version {} in {}",
307            version,
308            idx_path.display()
309        )));
310    }
311
312    let mut fanout = [0u32; 256];
313    for slot in &mut fanout {
314        *slot = read_u32_be(&bytes, &mut pos)?;
315    }
316    let object_count = fanout[255] as usize;
317
318    let need = pos
319        .saturating_add(object_count * 20)
320        .saturating_add(object_count * 4)
321        .saturating_add(object_count * 4)
322        .saturating_add(40);
323    if bytes.len() < need {
324        return Err(Error::CorruptObject(format!(
325            "truncated idx file {}",
326            idx_path.display()
327        )));
328    }
329
330    let mut oids = Vec::with_capacity(object_count);
331    for _ in 0..object_count {
332        let oid = ObjectId::from_bytes(&bytes[pos..pos + 20])?;
333        pos += 20;
334        oids.push(oid);
335    }
336
337    // Skip CRC table.
338    pos += object_count * 4;
339
340    let mut offsets32 = Vec::with_capacity(object_count);
341    let mut large_count = 0usize;
342    for _ in 0..object_count {
343        let v = read_u32_be(&bytes, &mut pos)?;
344        if (v & 0x8000_0000) != 0 {
345            large_count += 1;
346        }
347        offsets32.push(v);
348    }
349
350    if bytes.len() < pos + large_count * 8 + 40 {
351        return Err(Error::CorruptObject(format!(
352            "truncated large offset table in {}",
353            idx_path.display()
354        )));
355    }
356    let mut large_offsets = Vec::with_capacity(large_count);
357    for _ in 0..large_count {
358        large_offsets.push(read_u64_be(&bytes, &mut pos)?);
359    }
360
361    let mut next_large = 0usize;
362    let mut entries = Vec::with_capacity(object_count);
363    for (i, oid) in oids.into_iter().enumerate() {
364        let raw = offsets32[i];
365        let offset = if (raw & 0x8000_0000) == 0 {
366            raw as u64
367        } else {
368            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
369                Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
370            })?;
371            next_large += 1;
372            off
373        };
374        entries.push(PackIndexEntry { oid, offset });
375    }
376
377    let mut pack_path = idx_path.to_path_buf();
378    pack_path.set_extension("pack");
379    Ok(PackIndex {
380        idx_path: idx_path.to_path_buf(),
381        pack_path,
382        entries,
383    })
384}
385
386/// A pack object type as encoded in the packed stream header.
387#[derive(Debug, Clone, Copy, PartialEq, Eq)]
388pub enum PackedType {
389    /// Commit object.
390    Commit,
391    /// Tree object.
392    Tree,
393    /// Blob object.
394    Blob,
395    /// Tag object.
396    Tag,
397    /// Offset delta.
398    OfsDelta,
399    /// Reference delta.
400    RefDelta,
401}
402
403impl PackedType {
404    /// Printable name used by `verify-pack -v` output.
405    #[must_use]
406    pub fn as_str(self) -> &'static str {
407        match self {
408            Self::Commit => "commit",
409            Self::Tree => "tree",
410            Self::Blob => "blob",
411            Self::Tag => "tag",
412            Self::OfsDelta => "ofs-delta",
413            Self::RefDelta => "ref-delta",
414        }
415    }
416}
417
418/// A decoded object header record used by `verify-pack`.
419#[derive(Debug, Clone)]
420pub struct VerifyObjectRecord {
421    /// Object ID from the index.
422    pub oid: ObjectId,
423    /// Type from the pack stream header.
424    pub packed_type: PackedType,
425    /// Uncompressed object size from the pack header.
426    pub size: u64,
427    /// Total bytes in pack occupied by this object slot.
428    pub size_in_pack: u64,
429    /// Offset in pack file.
430    pub offset: u64,
431    /// Delta chain depth, if deltified.
432    pub depth: Option<u64>,
433    /// Base object for ref-delta objects.
434    pub base_oid: Option<ObjectId>,
435}
436
437/// Verify one pack/index pair and optionally return object records.
438///
439/// # Errors
440///
441/// Returns [`Error::CorruptObject`] when the index or pack are malformed.
442pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
443    let idx = read_pack_index(idx_path)?;
444    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
445    if pack_bytes.len() < 12 + 20 {
446        return Err(Error::CorruptObject(format!(
447            "pack file {} is too small",
448            idx.pack_path.display()
449        )));
450    }
451    if &pack_bytes[0..4] != b"PACK" {
452        return Err(Error::CorruptObject(format!(
453            "pack file {} has invalid signature",
454            idx.pack_path.display()
455        )));
456    }
457    let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
458    if version != 2 && version != 3 {
459        return Err(Error::CorruptObject(format!(
460            "unsupported pack version {} in {}",
461            version,
462            idx.pack_path.display()
463        )));
464    }
465    let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
466    if count != idx.entries.len() {
467        return Err(Error::CorruptObject(format!(
468            "pack/index object count mismatch for {}",
469            idx.pack_path.display()
470        )));
471    }
472
473    let mut by_offset: BTreeMap<u64, ObjectId> = BTreeMap::new();
474    for entry in &idx.entries {
475        by_offset.insert(entry.offset, entry.oid);
476    }
477    let offsets: Vec<u64> = by_offset.keys().copied().collect();
478    if offsets.is_empty() {
479        return Ok(Vec::new());
480    }
481
482    let mut by_oid: HashMap<ObjectId, usize> = HashMap::new();
483    let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
484    for (i, offset) in offsets.iter().copied().enumerate() {
485        let oid = by_offset.get(&offset).copied().ok_or_else(|| {
486            Error::CorruptObject(format!("missing object id for offset {}", offset))
487        })?;
488        let next_off = offsets
489            .get(i + 1)
490            .copied()
491            .unwrap_or((pack_bytes.len() - 20) as u64);
492        if next_off <= offset || next_off > (pack_bytes.len() - 20) as u64 {
493            return Err(Error::CorruptObject(format!(
494                "invalid object boundaries at offset {} in {}",
495                offset,
496                idx.pack_path.display()
497            )));
498        }
499        let mut p = offset as usize;
500        let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
501        let mut base_oid = None;
502        let mut depth = None;
503
504        match packed_type {
505            PackedType::RefDelta => {
506                if p + 20 > pack_bytes.len() {
507                    return Err(Error::CorruptObject(format!(
508                        "truncated ref-delta base at offset {}",
509                        offset
510                    )));
511                }
512                base_oid = Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?);
513            }
514            PackedType::OfsDelta => {
515                let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
516                let base_depth = records
517                    .iter()
518                    .find(|r| r.offset == base_offset)
519                    .and_then(|r| r.depth)
520                    .unwrap_or(0);
521                depth = Some(base_depth + 1);
522            }
523            PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
524        }
525
526        let size_in_pack = next_off - offset;
527        records.push(VerifyObjectRecord {
528            oid,
529            packed_type,
530            size,
531            size_in_pack,
532            offset,
533            depth,
534            base_oid,
535        });
536        by_oid.insert(oid, i);
537    }
538
539    // Fill ref-delta depths in a second pass once all base objects are known.
540    for i in 0..records.len() {
541        if records[i].packed_type != PackedType::RefDelta {
542            continue;
543        }
544        let base = records[i]
545            .base_oid
546            .ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
547        let base_depth = by_oid
548            .get(&base)
549            .and_then(|idx| records.get(*idx))
550            .and_then(|r| r.depth)
551            .unwrap_or(0);
552        records[i].depth = Some(base_depth + 1);
553    }
554
555    Ok(records)
556}
557
558/// Read alternates recursively, deduplicated in discovery order.
559///
560/// # Errors
561///
562/// Returns [`Error::Io`] when alternate files cannot be read.
563pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
564    let mut visited = HashSet::new();
565    let mut out = Vec::new();
566    read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
567    Ok(out)
568}
569
570/// Maximum alternate chain depth (git uses 5).
571const MAX_ALTERNATE_DEPTH: usize = 5;
572
573fn read_alternates_inner(
574    objects_dir: &Path,
575    visited: &mut HashSet<PathBuf>,
576    out: &mut Vec<PathBuf>,
577    depth: usize,
578) -> Result<()> {
579    if depth > MAX_ALTERNATE_DEPTH {
580        return Ok(());
581    }
582    let canonical = canonical_or_self(objects_dir);
583    let alt_file = canonical.join("info").join("alternates");
584    let text = match fs::read_to_string(&alt_file) {
585        Ok(text) => text,
586        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
587        Err(err) => return Err(Error::Io(err)),
588    };
589
590    for raw in text.lines() {
591        let line = raw.trim();
592        if line.is_empty() {
593            continue;
594        }
595        let candidate = if Path::new(line).is_absolute() {
596            PathBuf::from(line)
597        } else {
598            canonical.join(line)
599        };
600        let candidate = canonical_or_self(&candidate);
601        if visited.insert(candidate.clone()) {
602            out.push(candidate.clone());
603            read_alternates_inner(&candidate, visited, out, depth + 1)?;
604        }
605    }
606    Ok(())
607}
608
609fn canonical_or_self(path: &Path) -> PathBuf {
610    fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
611}
612
613/// Convert a [`PackedType`] to an [`ObjectKind`] for non-delta types.
614fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
615    match pt {
616        PackedType::Commit => Ok(ObjectKind::Commit),
617        PackedType::Tree => Ok(ObjectKind::Tree),
618        PackedType::Blob => Ok(ObjectKind::Blob),
619        PackedType::Tag => Ok(ObjectKind::Tag),
620        PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
621            "cannot convert delta type to object kind directly".to_owned(),
622        )),
623    }
624}
625
626/// Decompress zlib data from a byte slice starting at `pos`.
627///
628/// Returns the decompressed data and advances `pos` past the consumed
629/// compressed bytes.
630fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
631    let slice = &bytes[*pos..];
632    let mut decoder = ZlibDecoder::new(slice);
633    let mut out = Vec::with_capacity(expected_size as usize);
634    decoder
635        .read_to_end(&mut out)
636        .map_err(|e| Error::Zlib(e.to_string()))?;
637    *pos += decoder.total_in() as usize;
638    Ok(out)
639}
640
641/// Read and fully resolve one object from a pack file given its offset.
642///
643/// Handles OFS_DELTA and REF_DELTA by recursively reading the base object.
644/// The `idx` is used for REF_DELTA resolution (to find a base by OID).
645fn read_pack_object_at(
646    pack_bytes: &[u8],
647    offset: u64,
648    idx: &PackIndex,
649    depth: usize,
650) -> Result<(ObjectKind, Vec<u8>)> {
651    if depth > 50 {
652        return Err(Error::CorruptObject(
653            "delta chain too deep (>50)".to_owned(),
654        ));
655    }
656    let mut pos = offset as usize;
657    let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
658
659    match packed_type {
660        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
661            let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
662            let kind = packed_type_to_kind(packed_type)?;
663            Ok((kind, data))
664        }
665        PackedType::OfsDelta => {
666            let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
667            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
668            let (base_kind, base_data) =
669                read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
670            let result = apply_delta(&base_data, &delta_data)?;
671            Ok((base_kind, result))
672        }
673        PackedType::RefDelta => {
674            if pos + 20 > pack_bytes.len() {
675                return Err(Error::CorruptObject(
676                    "truncated ref-delta base OID".to_owned(),
677                ));
678            }
679            let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
680            pos += 20;
681            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
682            // Find the base in the same pack index
683            let base_entry = idx
684                .entries
685                .iter()
686                .find(|e| e.oid == base_oid)
687                .ok_or_else(|| {
688                    Error::CorruptObject(format!("ref-delta base {} not found in pack", base_oid))
689                })?;
690            let (base_kind, base_data) =
691                read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
692            let result = apply_delta(&base_data, &delta_data)?;
693            Ok((base_kind, result))
694        }
695    }
696}
697
698/// Read an object from a pack file by its OID.
699///
700/// Searches the given pack index for the OID, then reads and decompresses
701/// the object from the corresponding pack file, resolving delta chains.
702///
703/// # Errors
704///
705/// Returns [`Error::ObjectNotFound`] if the OID is not in this pack.
706pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
707    let entry = idx
708        .entries
709        .iter()
710        .find(|e| e.oid == *oid)
711        .ok_or_else(|| Error::ObjectNotFound(oid.to_hex()))?;
712
713    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
714    let (kind, data) = read_pack_object_at(&pack_bytes, entry.offset, idx, 0)?;
715    Ok(Object::new(kind, data))
716}
717
718/// Search all pack indexes in `objects_dir` for the given OID and read it.
719///
720/// # Errors
721///
722/// Returns [`Error::ObjectNotFound`] if no pack contains the OID.
723pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
724    let indexes = read_local_pack_indexes(objects_dir)?;
725    for idx in &indexes {
726        if idx.entries.iter().any(|e| e.oid == *oid) {
727            return read_object_from_pack(idx, oid);
728        }
729    }
730    Err(Error::ObjectNotFound(oid.to_hex()))
731}
732
733/// When `oid` is stored as a delta in a pack, return its delta base object id.
734/// Returns [`None`] for loose objects and for non-delta packed objects.
735pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
736    let indexes = read_local_pack_indexes(objects_dir)?;
737    for idx in indexes {
738        let Some(entry) = idx.entries.iter().find(|e| e.oid == *oid) else {
739            continue;
740        };
741        let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
742        let mut p = entry.offset as usize;
743        let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
744        match packed_type {
745            PackedType::RefDelta => {
746                if p + 20 > pack_bytes.len() {
747                    return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
748                }
749                return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?));
750            }
751            PackedType::OfsDelta => {
752                let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
753                return Ok(idx
754                    .entries
755                    .iter()
756                    .find(|e| e.offset == base_off)
757                    .map(|e| e.oid));
758            }
759            _ => return Ok(None),
760        }
761    }
762    Ok(None)
763}
764
765fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
766    let first = *bytes.get(*pos).ok_or_else(|| {
767        Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
768    })?;
769    *pos += 1;
770
771    let type_code = (first >> 4) & 0x7;
772    let mut size = (first & 0x0f) as u64;
773    let mut shift = 4u32;
774    let mut c = first;
775    while (c & 0x80) != 0 {
776        c = *bytes.get(*pos).ok_or_else(|| {
777            Error::CorruptObject("unexpected end of variable size header".to_owned())
778        })?;
779        *pos += 1;
780        size |= ((c & 0x7f) as u64) << shift;
781        shift += 7;
782    }
783
784    let packed_type = match type_code {
785        1 => PackedType::Commit,
786        2 => PackedType::Tree,
787        3 => PackedType::Blob,
788        4 => PackedType::Tag,
789        6 => PackedType::OfsDelta,
790        7 => PackedType::RefDelta,
791        _ => {
792            return Err(Error::CorruptObject(format!(
793                "unsupported packed object type {}",
794                type_code
795            )))
796        }
797    };
798    Ok((packed_type, size))
799}
800
801fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
802    let mut c = *bytes
803        .get(*pos)
804        .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
805    *pos += 1;
806    let mut value = (c & 0x7f) as u64;
807    while (c & 0x80) != 0 {
808        c = *bytes
809            .get(*pos)
810            .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
811        *pos += 1;
812        value = ((value + 1) << 7) | (c & 0x7f) as u64;
813    }
814    this_offset
815        .checked_sub(value)
816        .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
817}
818
819/// Advance `pos` past one packed object (including zlib payload).
820///
821/// `object_start_offset` is the byte offset of this object within the pack file
822/// (used for `OFS_DELTA` base resolution).
823pub fn skip_one_pack_object(bytes: &[u8], pos: &mut usize, object_start_offset: u64) -> Result<()> {
824    let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
825    match packed_type {
826        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
827            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
828            let mut tmp = Vec::with_capacity(size as usize);
829            dec.read_to_end(&mut tmp)
830                .map_err(|e| Error::Zlib(e.to_string()))?;
831            *pos += dec.total_in() as usize;
832        }
833        PackedType::RefDelta => {
834            if *pos + 20 > bytes.len() {
835                return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
836            }
837            *pos += 20;
838            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
839            let mut tmp = Vec::with_capacity(size as usize);
840            dec.read_to_end(&mut tmp)
841                .map_err(|e| Error::Zlib(e.to_string()))?;
842            *pos += dec.total_in() as usize;
843        }
844        PackedType::OfsDelta => {
845            let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
846            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
847            let mut tmp = Vec::with_capacity(size as usize);
848            dec.read_to_end(&mut tmp)
849                .map_err(|e| Error::Zlib(e.to_string()))?;
850            *pos += dec.total_in() as usize;
851        }
852    }
853    Ok(())
854}
855
856fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
857    if bytes.len() < *pos + 4 {
858        return Err(Error::CorruptObject(
859            "unexpected end of idx while reading u32".to_owned(),
860        ));
861    }
862    let v = u32::from_be_bytes(
863        bytes[*pos..*pos + 4]
864            .try_into()
865            .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
866    );
867    *pos += 4;
868    Ok(v)
869}
870
871fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
872    if bytes.len() < *pos + 8 {
873        return Err(Error::CorruptObject(
874            "unexpected end of idx while reading u64".to_owned(),
875        ));
876    }
877    let v = u64::from_be_bytes(
878        bytes[*pos..*pos + 8]
879            .try_into()
880            .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
881    );
882    *pos += 8;
883    Ok(v)
884}
885
886/// Read all object IDs from a `.idx` file.
887pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
888    let index = read_pack_index(idx_path)?;
889    Ok(index.entries.into_iter().map(|e| e.oid).collect())
890}