Skip to main content

git_core/
pack.rs

1//! Packfile reading (pack v2 + index v2), the normal post-`gc`/clone storage.
2//!
3//! A `.idx` (version 2) maps object hashes to byte offsets in the sibling
4//! `.pack`; each pack object is a varint-headed, zlib-compressed blob that may be
5//! whole (commit/tree/blob/tag) or a delta (`OFS_DELTA`/`REF_DELTA`) against
6//! another object. This module resolves deltas recursively and verifies the
7//! SHA-1 of every reconstructed object.
8//!
9//! Reference: git `Documentation/gitformat-pack.txt` (pack + index v2 layouts,
10//! delta instruction encoding).
11
12use std::fs;
13use std::io::Read;
14use std::path::Path;
15
16use flate2::read::ZlibDecoder;
17use sha1::{Digest, Sha1};
18
19use crate::error::{GitError, Result};
20use crate::hash::GitHash;
21use crate::object::{ObjectKind, RawObject};
22
23/// `.idx` version-2 magic: `\377tOc`.
24const IDX_V2_MAGIC: [u8; 4] = [0xff, 0x74, 0x4f, 0x63];
25/// Deflate-bomb guard: reject any object decompressing past 512 MiB.
26const MAX_OBJECT_SIZE: u64 = 512 * 1024 * 1024;
27/// Guard against a pathological delta-base chain (cycles / fuzzed packs).
28const MAX_DELTA_DEPTH: u32 = 64;
29
30/// Read `hash` from any packfile under `objects_dir`. Returns `Ok(None)` when no
31/// pack contains it (so the caller can report a genuine not-found).
32pub fn read_packed(objects_dir: &Path, hash: &GitHash) -> Result<Option<RawObject>> {
33    let pack_dir = objects_dir.join("pack");
34    let Ok(entries) = fs::read_dir(&pack_dir) else {
35        return Ok(None);
36    };
37    let mut idx_paths: Vec<_> = entries
38        .flatten()
39        .map(|e| e.path())
40        .filter(|p| p.extension().is_some_and(|x| x == "idx"))
41        .collect();
42    idx_paths.sort(); // deterministic order across packs
43    for idx_path in idx_paths {
44        let idx = fs::read(&idx_path)?;
45        if let Some(offset) = idx_lookup(&idx, hash)? {
46            let pack = fs::read(idx_path.with_extension("pack"))?;
47            let (kind, data) = read_object_at(&pack, offset, objects_dir, 0)?;
48            let verified = verify(hash, kind, &data);
49            return Ok(Some(RawObject {
50                kind,
51                data,
52                verified,
53            }));
54        }
55    }
56    Ok(None)
57}
58
59/// Enumerate every object SHA across all packfiles under `objects_dir`.
60///
61/// Walks the sorted name table of each `.idx` (version 2). A missing pack
62/// directory yields an empty list; an unsupported (non-v2) index fails loud.
63///
64/// # Errors
65/// Returns [`GitError`] on an I/O failure or a malformed/unsupported index.
66pub fn list_packed(objects_dir: &Path) -> Result<Vec<GitHash>> {
67    let pack_dir = objects_dir.join("pack");
68    let Ok(entries) = fs::read_dir(&pack_dir) else {
69        return Ok(Vec::new());
70    };
71    let mut idx_paths: Vec<_> = entries
72        .flatten()
73        .map(|e| e.path())
74        .filter(|p| p.extension().is_some_and(|x| x == "idx"))
75        .collect();
76    idx_paths.sort();
77
78    let mut out = Vec::new();
79    for idx_path in idx_paths {
80        let idx = fs::read(&idx_path)?;
81        idx_names(&idx, &mut out)?;
82    }
83    Ok(out)
84}
85
86/// Append every SHA in an index-v2 name table to `out`.
87fn idx_names(idx: &[u8], out: &mut Vec<GitHash>) -> Result<()> {
88    if idx.len() < 8 || idx[0..4] != IDX_V2_MAGIC || be_u32(idx, 4)? != 2 {
89        // Mirror `idx_lookup`: a v1 index or anything else is not supported.
90        // The hash is unknown here, so report bounds rather than guess one.
91        return Err(GitError::OutOfBounds);
92    }
93    const FANOUT: usize = 8;
94    let count = be_u32(idx, FANOUT + 255 * 4)? as usize;
95    let names = FANOUT + 256 * 4;
96    for i in 0..count {
97        let name = idx
98            .get(names + i * 20..names + i * 20 + 20)
99            .ok_or(GitError::OutOfBounds)?;
100        out.push(GitHash::from_bytes(name)?);
101    }
102    Ok(())
103}
104
105/// Binary-search an index-v2 fanout/name table for `hash`, returning its pack
106/// byte offset. Non-v2 indexes fail loud rather than silently miss.
107fn idx_lookup(idx: &[u8], hash: &GitHash) -> Result<Option<u64>> {
108    if idx.len() < 8 || idx[0..4] != IDX_V2_MAGIC {
109        // A v1 index (no magic) or anything else: don't guess.
110        return Err(GitError::PackfileUnsupported(*hash));
111    }
112    if be_u32(idx, 4)? != 2 {
113        return Err(GitError::PackfileUnsupported(*hash));
114    }
115    const FANOUT: usize = 8;
116    let want = hash.as_bytes();
117    let first = want[0] as usize;
118    let mut lo = if first == 0 {
119        0
120    } else {
121        be_u32(idx, FANOUT + (first - 1) * 4)?
122    } as usize;
123    let mut hi = be_u32(idx, FANOUT + first * 4)? as usize;
124    let count = be_u32(idx, FANOUT + 255 * 4)? as usize;
125    let names = FANOUT + 256 * 4;
126    while lo < hi {
127        let mid = lo + (hi - lo) / 2;
128        let name = idx
129            .get(names + mid * 20..names + mid * 20 + 20)
130            .ok_or(GitError::OutOfBounds)?;
131        match name.cmp(want.as_slice()) {
132            std::cmp::Ordering::Less => lo = mid + 1,
133            std::cmp::Ordering::Greater => hi = mid,
134            std::cmp::Ordering::Equal => {
135                // names(count*20) then CRCs(count*4) then small offsets(count*4).
136                let offsets = names + count * 20 + count * 4;
137                let raw = be_u32(idx, offsets + mid * 4)?;
138                if raw & 0x8000_0000 == 0 {
139                    return Ok(Some(u64::from(raw)));
140                }
141                // MSB set: index into the 8-byte large-offset table.
142                let large = offsets + count * 4;
143                let slot = (raw & 0x7fff_ffff) as usize;
144                return Ok(Some(be_u64(idx, large + slot * 8)?));
145            }
146        }
147    }
148    Ok(None)
149}
150
151/// Read and fully resolve the object at `offset` in `pack`.
152fn read_object_at(
153    pack: &[u8],
154    offset: u64,
155    objects_dir: &Path,
156    depth: u32,
157) -> Result<(ObjectKind, Vec<u8>)> {
158    if depth > MAX_DELTA_DEPTH {
159        return Err(GitError::InvalidObject("delta chain too deep".into()));
160    }
161    let off = usize::try_from(offset).map_err(|_| GitError::OutOfBounds)?;
162    let (type_id, _size, body) = parse_object_header(pack, off)?;
163    match type_id {
164        1..=4 => Ok((kind_from_type(type_id)?, inflate(pack, body)?)),
165        6 => {
166            // OFS_DELTA: a negative offset (varint) to the base, then the delta.
167            let (back, delta_start) = parse_ofs_delta(pack, body)?;
168            let base_off = off.checked_sub(back).ok_or(GitError::OutOfBounds)?;
169            let (kind, base) = read_object_at(pack, base_off as u64, objects_dir, depth + 1)?;
170            Ok((kind, apply_delta(&base, &inflate(pack, delta_start)?)?))
171        }
172        7 => {
173            // REF_DELTA: a 20-byte base hash, then the delta.
174            let base_hash =
175                GitHash::from_bytes(pack.get(body..body + 20).ok_or(GitError::OutOfBounds)?)?;
176            let delta = inflate(pack, body + 20)?;
177            let (kind, base) = resolve_base(objects_dir, &base_hash, depth + 1)?;
178            Ok((kind, apply_delta(&base, &delta)?))
179        }
180        other => Err(GitError::InvalidObject(format!(
181            "unknown pack object type {other}"
182        ))),
183    }
184}
185
186/// Resolve a `REF_DELTA` base by hash — it may live in a pack or as a loose object.
187fn resolve_base(objects_dir: &Path, hash: &GitHash, depth: u32) -> Result<(ObjectKind, Vec<u8>)> {
188    if depth > MAX_DELTA_DEPTH {
189        return Err(GitError::InvalidObject("delta chain too deep".into()));
190    }
191    if let Some(obj) = read_packed(objects_dir, hash)? {
192        return Ok((obj.kind, obj.data));
193    }
194    let obj = crate::loose::read_loose(objects_dir, hash)?;
195    Ok((obj.kind, obj.data))
196}
197
198/// Parse a pack object header: 3-bit type + variable-length size. Returns
199/// `(type_id, size, body_offset)`.
200fn parse_object_header(pack: &[u8], off: usize) -> Result<(u8, usize, usize)> {
201    let mut pos = off;
202    let b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
203    pos += 1;
204    let type_id = (b >> 4) & 0x7;
205    let mut size = (b & 0x0f) as usize;
206    let mut shift = 4;
207    let mut more = b & 0x80 != 0;
208    while more {
209        let b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
210        pos += 1;
211        size |= ((b & 0x7f) as usize)
212            .checked_shl(shift)
213            .ok_or(GitError::OutOfBounds)?;
214        shift += 7;
215        more = b & 0x80 != 0;
216    }
217    Ok((type_id, size, pos))
218}
219
220/// Parse the `OFS_DELTA` negative base offset. Returns `(distance, body_offset)`.
221fn parse_ofs_delta(pack: &[u8], off: usize) -> Result<(usize, usize)> {
222    let mut pos = off;
223    let mut b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
224    pos += 1;
225    let mut value = (b & 0x7f) as usize;
226    while b & 0x80 != 0 {
227        b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
228        pos += 1;
229        value = value
230            .checked_add(1)
231            .and_then(|v| v.checked_shl(7))
232            .map(|v| v | (b & 0x7f) as usize)
233            .ok_or(GitError::OutOfBounds)?;
234    }
235    Ok((value, pos))
236}
237
238/// Apply a git delta (`base` + delta instructions → reconstructed object).
239///
240/// Exposed for fuzzing: `delta` is fully attacker-controlled, so this must never
241/// panic or read out of bounds regardless of input.
242///
243/// # Errors
244/// Returns [`GitError`] on a malformed delta (bad opcode, out-of-range copy, or
245/// a size disagreeing with the delta header).
246pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
247    let (_src, mut p) = read_delta_size(delta, 0)?;
248    let (target_size, q) = read_delta_size(delta, p)?;
249    p = q;
250    let mut out = Vec::with_capacity(target_size.min(MAX_OBJECT_SIZE as usize));
251    while p < delta.len() {
252        let cmd = delta[p];
253        p += 1;
254        if cmd & 0x80 != 0 {
255            // COPY <offset,size> from base; bytes present per the cmd bitmask.
256            let mut copy_off = 0usize;
257            for i in 0..4 {
258                if cmd & (1 << i) != 0 {
259                    copy_off |= (*delta.get(p).ok_or(GitError::OutOfBounds)? as usize) << (8 * i);
260                    p += 1;
261                }
262            }
263            let mut copy_size = 0usize;
264            for i in 0..3 {
265                if cmd & (1 << (4 + i)) != 0 {
266                    copy_size |= (*delta.get(p).ok_or(GitError::OutOfBounds)? as usize) << (8 * i);
267                    p += 1;
268                }
269            }
270            if copy_size == 0 {
271                copy_size = 0x1_0000;
272            }
273            let end = copy_off
274                .checked_add(copy_size)
275                .ok_or(GitError::OutOfBounds)?;
276            out.extend_from_slice(base.get(copy_off..end).ok_or(GitError::OutOfBounds)?);
277        } else if cmd != 0 {
278            // INSERT: `cmd` literal bytes follow in the delta stream.
279            let n = cmd as usize;
280            out.extend_from_slice(delta.get(p..p + n).ok_or(GitError::OutOfBounds)?);
281            p += n;
282        } else {
283            return Err(GitError::InvalidObject("delta opcode 0 is reserved".into()));
284        }
285    }
286    if out.len() != target_size {
287        return Err(GitError::InvalidObject(format!(
288            "delta produced {} bytes, header said {target_size}",
289            out.len()
290        )));
291    }
292    Ok(out)
293}
294
295/// Read a little-endian 7-bit varint (delta source/target sizes).
296fn read_delta_size(delta: &[u8], mut p: usize) -> Result<(usize, usize)> {
297    let mut value = 0usize;
298    let mut shift = 0u32;
299    loop {
300        let b = *delta.get(p).ok_or(GitError::OutOfBounds)?;
301        p += 1;
302        value |= ((b & 0x7f) as usize)
303            .checked_shl(shift)
304            .ok_or(GitError::OutOfBounds)?;
305        shift += 7;
306        if b & 0x80 == 0 {
307            break;
308        }
309    }
310    Ok((value, p))
311}
312
313/// Inflate the zlib stream beginning at `start`, with a bomb guard.
314fn inflate(pack: &[u8], start: usize) -> Result<Vec<u8>> {
315    let compressed = pack.get(start..).ok_or(GitError::OutOfBounds)?;
316    let mut out = Vec::new();
317    ZlibDecoder::new(compressed)
318        .take(MAX_OBJECT_SIZE + 1)
319        .read_to_end(&mut out)
320        .map_err(|e| GitError::InvalidObject(format!("pack zlib failed: {e}")))?;
321    if out.len() as u64 > MAX_OBJECT_SIZE {
322        return Err(GitError::DeflateBomb);
323    }
324    Ok(out)
325}
326
327fn kind_from_type(type_id: u8) -> Result<ObjectKind> {
328    match type_id {
329        1 => Ok(ObjectKind::Commit),
330        2 => Ok(ObjectKind::Tree),
331        3 => Ok(ObjectKind::Blob),
332        4 => Ok(ObjectKind::Tag),
333        other => Err(GitError::InvalidObject(format!(
334            "pack object type {other} is not a base type"
335        ))),
336    }
337}
338
339fn kind_str(kind: ObjectKind) -> &'static [u8] {
340    match kind {
341        ObjectKind::Commit => b"commit",
342        ObjectKind::Tree => b"tree",
343        ObjectKind::Blob => b"blob",
344        ObjectKind::Tag => b"tag",
345    }
346}
347
348/// SHA-1 over `"<kind> <len>\0<data>"` must equal the object's name.
349fn verify(hash: &GitHash, kind: ObjectKind, data: &[u8]) -> bool {
350    let mut h = Sha1::new();
351    h.update(kind_str(kind));
352    h.update(b" ");
353    h.update(data.len().to_string().as_bytes());
354    h.update(b"\0");
355    h.update(data);
356    let digest: [u8; 20] = h.finalize().into();
357    &digest == hash.as_bytes()
358}
359
360fn be_u32(b: &[u8], off: usize) -> Result<u32> {
361    let s = b.get(off..off + 4).ok_or(GitError::OutOfBounds)?;
362    Ok(u32::from_be_bytes([s[0], s[1], s[2], s[3]]))
363}
364
365fn be_u64(b: &[u8], off: usize) -> Result<u64> {
366    let s = b.get(off..off + 8).ok_or(GitError::OutOfBounds)?;
367    let mut a = [0u8; 8];
368    a.copy_from_slice(s);
369    Ok(u64::from_be_bytes(a))
370}