Skip to main content

grit_lib/
pack.rs

1//! Pack and pack-index helpers for object counting and verification.
2//!
3//! This module implements a focused subset of pack functionality required by
4//! `count-objects`, `verify-pack`, and `show-index`.
5
6use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::unpack_objects::apply_delta;
9use flate2::read::ZlibDecoder;
10use std::collections::{BTreeMap, HashMap, HashSet};
11use std::fs;
12use std::io;
13use std::io::Read;
14use std::path::{Path, PathBuf};
15
16/// A parsed entry from an index file.
17#[derive(Debug, Clone)]
18pub struct PackIndexEntry {
19    /// Object identifier.
20    pub oid: ObjectId,
21    /// Byte offset of the object in the corresponding `.pack`.
22    pub offset: u64,
23}
24
25/// Parsed data from a `.idx` file (version 2).
26#[derive(Debug, Clone)]
27pub struct PackIndex {
28    /// Absolute path to the `.idx` file.
29    pub idx_path: PathBuf,
30    /// Absolute path to the `.pack` file.
31    pub pack_path: PathBuf,
32    /// Parsed entries in index order.
33    pub entries: Vec<PackIndexEntry>,
34}
35
36/// A single entry produced by `show-index`, with an optional CRC32.
37///
38/// Version-1 index files do not store CRC32 values; `crc32` is `None` for
39/// those entries.  Version-2 index files always carry a CRC32.
40#[derive(Debug, Clone)]
41pub struct ShowIndexEntry {
42    /// Object identifier.
43    pub oid: ObjectId,
44    /// Byte offset of the object in the corresponding `.pack` file.
45    pub offset: u64,
46    /// CRC32 of the compressed object data (v2 only).
47    pub crc32: Option<u32>,
48}
49
50/// Parse a pack index from a reader (e.g. stdin) and return all entries in
51/// index order.
52///
53/// Both version-1 (legacy) and version-2 index formats are supported.  Only
54/// SHA-1 (20-byte hash) objects are supported; pass `hash_size = 20`.
55///
56/// # Errors
57///
58/// Returns [`Error::CorruptObject`] when the data cannot be parsed as a valid
59/// pack index.
60pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
61    let mut buf = Vec::new();
62    reader.read_to_end(&mut buf).map_err(Error::Io)?;
63
64    if buf.len() < 8 {
65        return Err(Error::CorruptObject(
66            "unable to read header: index file too small".to_owned(),
67        ));
68    }
69
70    let mut pos = 0usize;
71    let first_u32 = read_u32_be(&buf, &mut pos)?;
72
73    const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
74
75    if first_u32 == PACK_IDX_SIGNATURE {
76        // Version 2 (or higher): read version word, then 256-entry fanout.
77        let version = read_u32_be(&buf, &mut pos)?;
78        if version != 2 {
79            return Err(Error::CorruptObject(format!(
80                "unknown index version: {version}"
81            )));
82        }
83        show_index_v2(&buf, &mut pos, hash_size)
84    } else {
85        // Version 1: the two u32s we already started reading are the first two
86        // fanout entries.  Re-read the whole fanout from the top.
87        pos = 0;
88        show_index_v1(&buf, &mut pos, hash_size)
89    }
90}
91
92/// Parse version-1 pack index entries from `buf`.
93fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
94    if buf.len() < 256 * 4 {
95        return Err(Error::CorruptObject(
96            "unable to read index: v1 fanout too short".to_owned(),
97        ));
98    }
99    let mut fanout = [0u32; 256];
100    for slot in &mut fanout {
101        *slot = read_u32_be(buf, pos)?;
102    }
103    let object_count = fanout[255] as usize;
104
105    let mut entries = Vec::with_capacity(object_count);
106    for i in 0..object_count {
107        // Each record: 4-byte big-endian offset + hash_size-byte OID.
108        if *pos + 4 + hash_size > buf.len() {
109            return Err(Error::CorruptObject(format!(
110                "unable to read entry {i}/{object_count}: truncated"
111            )));
112        }
113        let offset = read_u32_be(buf, pos)? as u64;
114        let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
115        *pos += hash_size;
116        entries.push(ShowIndexEntry {
117            oid,
118            offset,
119            crc32: None,
120        });
121    }
122    Ok(entries)
123}
124
125/// Parse version-2 pack index entries from `buf` starting after the magic and
126/// version words (fanout table is next).
127fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
128    if buf.len() < *pos + 256 * 4 {
129        return Err(Error::CorruptObject(
130            "unable to read index: v2 fanout too short".to_owned(),
131        ));
132    }
133    let mut fanout = [0u32; 256];
134    for slot in &mut fanout {
135        *slot = read_u32_be(buf, pos)?;
136    }
137    let object_count = fanout[255] as usize;
138
139    // OID table.
140    let mut oids = Vec::with_capacity(object_count);
141    for i in 0..object_count {
142        if *pos + hash_size > buf.len() {
143            return Err(Error::CorruptObject(format!(
144                "unable to read sha1 {i}/{object_count}: truncated"
145            )));
146        }
147        let oid = ObjectId::from_bytes(&buf[*pos..*pos + hash_size])?;
148        *pos += hash_size;
149        oids.push(oid);
150    }
151
152    // CRC32 table.
153    let mut crcs = Vec::with_capacity(object_count);
154    for i in 0..object_count {
155        if *pos + 4 > buf.len() {
156            return Err(Error::CorruptObject(format!(
157                "unable to read crc {i}/{object_count}: truncated"
158            )));
159        }
160        crcs.push(read_u32_be(buf, pos)?);
161    }
162
163    // 32-bit offset table.
164    let mut offsets32 = Vec::with_capacity(object_count);
165    let mut large_count = 0usize;
166    for i in 0..object_count {
167        if *pos + 4 > buf.len() {
168            return Err(Error::CorruptObject(format!(
169                "unable to read 32b offset {i}/{object_count}: truncated"
170            )));
171        }
172        let v = read_u32_be(buf, pos)?;
173        if (v & 0x8000_0000) != 0 {
174            large_count += 1;
175        }
176        offsets32.push(v);
177    }
178
179    // 64-bit large-offset table.
180    let mut large_offsets = Vec::with_capacity(large_count);
181    for i in 0..large_count {
182        if *pos + 8 > buf.len() {
183            return Err(Error::CorruptObject(format!(
184                "unable to read 64b offset {i}: truncated"
185            )));
186        }
187        large_offsets.push(read_u64_be(buf, pos)?);
188    }
189
190    let mut next_large = 0usize;
191    let mut entries = Vec::with_capacity(object_count);
192    for (i, oid) in oids.into_iter().enumerate() {
193        let raw = offsets32[i];
194        let offset = if (raw & 0x8000_0000) == 0 {
195            raw as u64
196        } else {
197            let idx = (raw & 0x7fff_ffff) as usize;
198            if idx != next_large {
199                return Err(Error::CorruptObject(format!(
200                    "inconsistent 64b offset index at entry {i}"
201                )));
202            }
203            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
204                Error::CorruptObject(format!("missing large offset entry {next_large}"))
205            })?;
206            next_large += 1;
207            off
208        };
209        entries.push(ShowIndexEntry {
210            oid,
211            offset,
212            crc32: Some(crcs[i]),
213        });
214    }
215    Ok(entries)
216}
217
218/// Basic information about local packs.
219#[derive(Debug, Clone, Default)]
220pub struct LocalPackInfo {
221    /// Number of valid local packs.
222    pub pack_count: usize,
223    /// Total objects across all valid local packs.
224    pub object_count: usize,
225    /// Combined on-disk bytes of `.pack` + `.idx`.
226    pub size_bytes: u64,
227    /// Set of all object IDs present in local packs.
228    pub object_ids: HashSet<ObjectId>,
229}
230
231/// Read all valid `.idx` files in `objects/pack`.
232///
233/// # Errors
234///
235/// Returns [`Error::Io`] for directory-level failures. Individual invalid pack
236/// pairs are skipped.
237pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
238    let pack_dir = objects_dir.join("pack");
239    let rd = match fs::read_dir(&pack_dir) {
240        Ok(rd) => rd,
241        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
242        Err(err) => return Err(Error::Io(err)),
243    };
244
245    let mut out = Vec::new();
246    for entry in rd {
247        let entry = entry.map_err(Error::Io)?;
248        let path = entry.path();
249        if path.extension().and_then(|s| s.to_str()) != Some("idx") {
250            continue;
251        }
252        if let Ok(idx) = read_pack_index(&path) {
253            out.push(idx);
254        }
255    }
256    Ok(out)
257}
258
259/// Collect aggregate local pack metrics.
260///
261/// # Errors
262///
263/// Returns [`Error::Io`] when reading pack metadata fails.
264pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
265    let indexes = read_local_pack_indexes(objects_dir)?;
266    let mut info = LocalPackInfo::default();
267    for idx in indexes {
268        let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
269        let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
270        info.pack_count += 1;
271        info.object_count += idx.entries.len();
272        info.size_bytes += pack_meta.len() + idx_meta.len();
273        for entry in idx.entries {
274            info.object_ids.insert(entry.oid);
275        }
276    }
277    Ok(info)
278}
279
280/// Parse a version-2 pack index file.
281///
282/// # Errors
283///
284/// Returns [`Error::CorruptObject`] when format checks fail.
285pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
286    let bytes = fs::read(idx_path).map_err(Error::Io)?;
287    if bytes.len() < 8 + 256 * 4 + 40 {
288        return Err(Error::CorruptObject(format!(
289            "index file {} is too small",
290            idx_path.display()
291        )));
292    }
293
294    let mut pos = 0usize;
295    let magic = &bytes[pos..pos + 4];
296    pos += 4;
297    if magic != [0xff, b't', b'O', b'c'] {
298        return Err(Error::CorruptObject(format!(
299            "unsupported idx signature in {}",
300            idx_path.display()
301        )));
302    }
303    let version = read_u32_be(&bytes, &mut pos)?;
304    if version != 2 {
305        return Err(Error::CorruptObject(format!(
306            "unsupported idx version {} in {}",
307            version,
308            idx_path.display()
309        )));
310    }
311
312    let mut fanout = [0u32; 256];
313    for slot in &mut fanout {
314        *slot = read_u32_be(&bytes, &mut pos)?;
315    }
316    let object_count = fanout[255] as usize;
317
318    let need = pos
319        .saturating_add(object_count * 20)
320        .saturating_add(object_count * 4)
321        .saturating_add(object_count * 4)
322        .saturating_add(40);
323    if bytes.len() < need {
324        return Err(Error::CorruptObject(format!(
325            "truncated idx file {}",
326            idx_path.display()
327        )));
328    }
329
330    let mut oids = Vec::with_capacity(object_count);
331    for _ in 0..object_count {
332        let oid = ObjectId::from_bytes(&bytes[pos..pos + 20])?;
333        pos += 20;
334        oids.push(oid);
335    }
336
337    // Skip CRC table.
338    pos += object_count * 4;
339
340    let mut offsets32 = Vec::with_capacity(object_count);
341    let mut large_count = 0usize;
342    for _ in 0..object_count {
343        let v = read_u32_be(&bytes, &mut pos)?;
344        if (v & 0x8000_0000) != 0 {
345            large_count += 1;
346        }
347        offsets32.push(v);
348    }
349
350    if bytes.len() < pos + large_count * 8 + 40 {
351        return Err(Error::CorruptObject(format!(
352            "truncated large offset table in {}",
353            idx_path.display()
354        )));
355    }
356    let mut large_offsets = Vec::with_capacity(large_count);
357    for _ in 0..large_count {
358        large_offsets.push(read_u64_be(&bytes, &mut pos)?);
359    }
360
361    let mut next_large = 0usize;
362    let mut entries = Vec::with_capacity(object_count);
363    for (i, oid) in oids.into_iter().enumerate() {
364        let raw = offsets32[i];
365        let offset = if (raw & 0x8000_0000) == 0 {
366            raw as u64
367        } else {
368            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
369                Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
370            })?;
371            next_large += 1;
372            off
373        };
374        entries.push(PackIndexEntry { oid, offset });
375    }
376
377    let mut pack_path = idx_path.to_path_buf();
378    pack_path.set_extension("pack");
379    Ok(PackIndex {
380        idx_path: idx_path.to_path_buf(),
381        pack_path,
382        entries,
383    })
384}
385
386/// A pack object type as encoded in the packed stream header.
387#[derive(Debug, Clone, Copy, PartialEq, Eq)]
388pub enum PackedType {
389    /// Commit object.
390    Commit,
391    /// Tree object.
392    Tree,
393    /// Blob object.
394    Blob,
395    /// Tag object.
396    Tag,
397    /// Offset delta.
398    OfsDelta,
399    /// Reference delta.
400    RefDelta,
401}
402
403impl PackedType {
404    /// Printable name used by `verify-pack -v` output.
405    #[must_use]
406    pub fn as_str(self) -> &'static str {
407        match self {
408            Self::Commit => "commit",
409            Self::Tree => "tree",
410            Self::Blob => "blob",
411            Self::Tag => "tag",
412            Self::OfsDelta => "ofs-delta",
413            Self::RefDelta => "ref-delta",
414        }
415    }
416}
417
418/// A decoded object header record used by `verify-pack`.
419#[derive(Debug, Clone)]
420pub struct VerifyObjectRecord {
421    /// Object ID from the index.
422    pub oid: ObjectId,
423    /// Type from the pack stream header.
424    pub packed_type: PackedType,
425    /// Uncompressed object size from the pack header.
426    pub size: u64,
427    /// Total bytes in pack occupied by this object slot.
428    pub size_in_pack: u64,
429    /// Offset in pack file.
430    pub offset: u64,
431    /// Delta chain depth, if deltified.
432    pub depth: Option<u64>,
433    /// Base object for ref-delta objects.
434    pub base_oid: Option<ObjectId>,
435}
436
437/// Verify one pack/index pair and optionally return object records.
438///
439/// # Errors
440///
441/// Returns [`Error::CorruptObject`] when the index or pack are malformed.
442pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
443    let idx = read_pack_index(idx_path)?;
444    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
445    if pack_bytes.len() < 12 + 20 {
446        return Err(Error::CorruptObject(format!(
447            "pack file {} is too small",
448            idx.pack_path.display()
449        )));
450    }
451    if &pack_bytes[0..4] != b"PACK" {
452        return Err(Error::CorruptObject(format!(
453            "pack file {} has invalid signature",
454            idx.pack_path.display()
455        )));
456    }
457    let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
458    if version != 2 && version != 3 {
459        return Err(Error::CorruptObject(format!(
460            "unsupported pack version {} in {}",
461            version,
462            idx.pack_path.display()
463        )));
464    }
465    let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
466    if count != idx.entries.len() {
467        return Err(Error::CorruptObject(format!(
468            "pack/index object count mismatch for {}",
469            idx.pack_path.display()
470        )));
471    }
472
473    let mut by_offset: BTreeMap<u64, ObjectId> = BTreeMap::new();
474    for entry in &idx.entries {
475        by_offset.insert(entry.offset, entry.oid);
476    }
477    let offsets: Vec<u64> = by_offset.keys().copied().collect();
478    if offsets.is_empty() {
479        return Ok(Vec::new());
480    }
481
482    let mut by_oid: HashMap<ObjectId, usize> = HashMap::new();
483    let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
484    for (i, offset) in offsets.iter().copied().enumerate() {
485        let oid = by_offset.get(&offset).copied().ok_or_else(|| {
486            Error::CorruptObject(format!("missing object id for offset {}", offset))
487        })?;
488        let next_off = offsets
489            .get(i + 1)
490            .copied()
491            .unwrap_or((pack_bytes.len() - 20) as u64);
492        if next_off <= offset || next_off > (pack_bytes.len() - 20) as u64 {
493            return Err(Error::CorruptObject(format!(
494                "invalid object boundaries at offset {} in {}",
495                offset,
496                idx.pack_path.display()
497            )));
498        }
499        let mut p = offset as usize;
500        let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
501        let mut base_oid = None;
502        let mut depth = None;
503
504        match packed_type {
505            PackedType::RefDelta => {
506                if p + 20 > pack_bytes.len() {
507                    return Err(Error::CorruptObject(format!(
508                        "truncated ref-delta base at offset {}",
509                        offset
510                    )));
511                }
512                base_oid = Some(ObjectId::from_bytes(&pack_bytes[p..p + 20])?);
513            }
514            PackedType::OfsDelta => {
515                let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
516                let base_depth = records
517                    .iter()
518                    .find(|r| r.offset == base_offset)
519                    .and_then(|r| r.depth)
520                    .unwrap_or(0);
521                depth = Some(base_depth + 1);
522            }
523            PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
524        }
525
526        let size_in_pack = next_off - offset;
527        records.push(VerifyObjectRecord {
528            oid,
529            packed_type,
530            size,
531            size_in_pack,
532            offset,
533            depth,
534            base_oid,
535        });
536        by_oid.insert(oid, i);
537    }
538
539    // Fill ref-delta depths in a second pass once all base objects are known.
540    for i in 0..records.len() {
541        if records[i].packed_type != PackedType::RefDelta {
542            continue;
543        }
544        let base = records[i]
545            .base_oid
546            .ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
547        let base_depth = by_oid
548            .get(&base)
549            .and_then(|idx| records.get(*idx))
550            .and_then(|r| r.depth)
551            .unwrap_or(0);
552        records[i].depth = Some(base_depth + 1);
553    }
554
555    Ok(records)
556}
557
558/// Read alternates recursively, deduplicated in discovery order.
559///
560/// # Errors
561///
562/// Returns [`Error::Io`] when alternate files cannot be read.
563pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
564    let mut visited = HashSet::new();
565    let mut out = Vec::new();
566    read_alternates_inner(objects_dir, &mut visited, &mut out)?;
567    Ok(out)
568}
569
570fn read_alternates_inner(
571    objects_dir: &Path,
572    visited: &mut HashSet<PathBuf>,
573    out: &mut Vec<PathBuf>,
574) -> Result<()> {
575    let canonical = canonical_or_self(objects_dir);
576    let alt_file = canonical.join("info").join("alternates");
577    let text = match fs::read_to_string(&alt_file) {
578        Ok(text) => text,
579        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
580        Err(err) => return Err(Error::Io(err)),
581    };
582
583    for raw in text.lines() {
584        let line = raw.trim();
585        if line.is_empty() {
586            continue;
587        }
588        let candidate = if Path::new(line).is_absolute() {
589            PathBuf::from(line)
590        } else {
591            canonical.join(line)
592        };
593        let candidate = canonical_or_self(&candidate);
594        if visited.insert(candidate.clone()) {
595            out.push(candidate.clone());
596            read_alternates_inner(&candidate, visited, out)?;
597        }
598    }
599    Ok(())
600}
601
602fn canonical_or_self(path: &Path) -> PathBuf {
603    fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
604}
605
606/// Convert a [`PackedType`] to an [`ObjectKind`] for non-delta types.
607fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
608    match pt {
609        PackedType::Commit => Ok(ObjectKind::Commit),
610        PackedType::Tree => Ok(ObjectKind::Tree),
611        PackedType::Blob => Ok(ObjectKind::Blob),
612        PackedType::Tag => Ok(ObjectKind::Tag),
613        PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
614            "cannot convert delta type to object kind directly".to_owned(),
615        )),
616    }
617}
618
619/// Decompress zlib data from a byte slice starting at `pos`.
620///
621/// Returns the decompressed data and advances `pos` past the consumed
622/// compressed bytes.
623fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
624    let slice = &bytes[*pos..];
625    let mut decoder = ZlibDecoder::new(slice);
626    let mut out = Vec::with_capacity(expected_size as usize);
627    decoder
628        .read_to_end(&mut out)
629        .map_err(|e| Error::Zlib(e.to_string()))?;
630    *pos += decoder.total_in() as usize;
631    Ok(out)
632}
633
634/// Read and fully resolve one object from a pack file given its offset.
635///
636/// Handles OFS_DELTA and REF_DELTA by recursively reading the base object.
637/// The `idx` is used for REF_DELTA resolution (to find a base by OID).
638fn read_pack_object_at(
639    pack_bytes: &[u8],
640    offset: u64,
641    idx: &PackIndex,
642    depth: usize,
643) -> Result<(ObjectKind, Vec<u8>)> {
644    if depth > 50 {
645        return Err(Error::CorruptObject(
646            "delta chain too deep (>50)".to_owned(),
647        ));
648    }
649    let mut pos = offset as usize;
650    let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
651
652    match packed_type {
653        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
654            let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
655            let kind = packed_type_to_kind(packed_type)?;
656            Ok((kind, data))
657        }
658        PackedType::OfsDelta => {
659            let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
660            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
661            let (base_kind, base_data) =
662                read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
663            let result = apply_delta(&base_data, &delta_data)?;
664            Ok((base_kind, result))
665        }
666        PackedType::RefDelta => {
667            if pos + 20 > pack_bytes.len() {
668                return Err(Error::CorruptObject(
669                    "truncated ref-delta base OID".to_owned(),
670                ));
671            }
672            let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
673            pos += 20;
674            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
675            // Find the base in the same pack index
676            let base_entry = idx
677                .entries
678                .iter()
679                .find(|e| e.oid == base_oid)
680                .ok_or_else(|| {
681                    Error::CorruptObject(format!(
682                        "ref-delta base {} not found in pack",
683                        base_oid
684                    ))
685                })?;
686            let (base_kind, base_data) =
687                read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
688            let result = apply_delta(&base_data, &delta_data)?;
689            Ok((base_kind, result))
690        }
691    }
692}
693
694/// Read an object from a pack file by its OID.
695///
696/// Searches the given pack index for the OID, then reads and decompresses
697/// the object from the corresponding pack file, resolving delta chains.
698///
699/// # Errors
700///
701/// Returns [`Error::ObjectNotFound`] if the OID is not in this pack.
702pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
703    let entry = idx
704        .entries
705        .iter()
706        .find(|e| e.oid == *oid)
707        .ok_or_else(|| Error::ObjectNotFound(oid.to_hex()))?;
708
709    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
710    let (kind, data) = read_pack_object_at(&pack_bytes, entry.offset, idx, 0)?;
711    Ok(Object::new(kind, data))
712}
713
714/// Search all pack indexes in `objects_dir` for the given OID and read it.
715///
716/// # Errors
717///
718/// Returns [`Error::ObjectNotFound`] if no pack contains the OID.
719pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
720    let indexes = read_local_pack_indexes(objects_dir)?;
721    for idx in &indexes {
722        if idx.entries.iter().any(|e| e.oid == *oid) {
723            return read_object_from_pack(idx, oid);
724        }
725    }
726    Err(Error::ObjectNotFound(oid.to_hex()))
727}
728
729fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
730    let first = *bytes.get(*pos).ok_or_else(|| {
731        Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
732    })?;
733    *pos += 1;
734
735    let type_code = (first >> 4) & 0x7;
736    let mut size = (first & 0x0f) as u64;
737    let mut shift = 4u32;
738    let mut c = first;
739    while (c & 0x80) != 0 {
740        c = *bytes.get(*pos).ok_or_else(|| {
741            Error::CorruptObject("unexpected end of variable size header".to_owned())
742        })?;
743        *pos += 1;
744        size |= ((c & 0x7f) as u64) << shift;
745        shift += 7;
746    }
747
748    let packed_type = match type_code {
749        1 => PackedType::Commit,
750        2 => PackedType::Tree,
751        3 => PackedType::Blob,
752        4 => PackedType::Tag,
753        6 => PackedType::OfsDelta,
754        7 => PackedType::RefDelta,
755        _ => {
756            return Err(Error::CorruptObject(format!(
757                "unsupported packed object type {}",
758                type_code
759            )))
760        }
761    };
762    Ok((packed_type, size))
763}
764
765fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
766    let mut c = *bytes
767        .get(*pos)
768        .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
769    *pos += 1;
770    let mut value = (c & 0x7f) as u64;
771    while (c & 0x80) != 0 {
772        c = *bytes
773            .get(*pos)
774            .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
775        *pos += 1;
776        value = ((value + 1) << 7) | (c & 0x7f) as u64;
777    }
778    this_offset
779        .checked_sub(value)
780        .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
781}
782
783fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
784    if bytes.len() < *pos + 4 {
785        return Err(Error::CorruptObject(
786            "unexpected end of idx while reading u32".to_owned(),
787        ));
788    }
789    let v = u32::from_be_bytes(
790        bytes[*pos..*pos + 4]
791            .try_into()
792            .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
793    );
794    *pos += 4;
795    Ok(v)
796}
797
798fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
799    if bytes.len() < *pos + 8 {
800        return Err(Error::CorruptObject(
801            "unexpected end of idx while reading u64".to_owned(),
802        ));
803    }
804    let v = u64::from_be_bytes(
805        bytes[*pos..*pos + 8]
806            .try_into()
807            .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
808    );
809    *pos += 8;
810    Ok(v)
811}