Skip to main content

grit_lib/
pack.rs

1//! Pack and pack-index helpers for object counting and verification.
2//!
3//! This module implements a focused subset of pack functionality required by
4//! `count-objects`, `verify-pack`, and `show-index`.
5
6use crate::error::{Error, Result};
7use crate::objects::{Object, ObjectId, ObjectKind};
8use crate::unpack_objects::apply_delta;
9use flate2::read::ZlibDecoder;
10use sha1::{Digest, Sha1};
11use sha2::Sha256;
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::fs;
14use std::io;
15use std::io::Read;
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19/// A parsed entry from an index file.
20#[derive(Debug, Clone)]
21pub struct PackIndexEntry {
22    /// Raw object identifier (`20` bytes for SHA-1, `32` for SHA-256).
23    pub oid: Vec<u8>,
24    /// Byte offset of the object in the corresponding `.pack`.
25    pub offset: u64,
26}
27
28/// Parsed data from a `.idx` file (version 2).
29#[derive(Debug, Clone)]
30pub struct PackIndex {
31    /// Absolute path to the `.idx` file.
32    pub idx_path: PathBuf,
33    /// Absolute path to the `.pack` file.
34    pub pack_path: PathBuf,
35    /// OID width in bytes (`20` for SHA-1, `32` for SHA-256).
36    pub hash_bytes: usize,
37    /// Parsed entries in index order (sorted by OID).
38    pub entries: Vec<PackIndexEntry>,
39    /// 256-entry first-byte fanout table: `fanout[b]` is the count of entries whose
40    /// first OID byte is `<= b`. Enables O(log n) lookup via the OID's first byte
41    /// (matches Git's `find_pack_entry_one` in `packfile.c`).
42    pub fanout: [u32; 256],
43}
44
45impl PackIndex {
46    /// Find the offset in the `.pack` file for the given SHA-1 OID via the fanout
47    /// table and binary search; returns `None` when the OID is not present.
48    ///
49    /// Pack indexes containing SHA-256 OIDs are skipped here (callers handling
50    /// SHA-256 should branch on [`PackIndex::hash_bytes`]).
51    #[must_use]
52    pub fn find_offset(&self, oid: &ObjectId) -> Option<u64> {
53        if self.hash_bytes != 20 {
54            return None;
55        }
56        let needle = oid.as_bytes();
57        let first_byte = needle[0] as usize;
58        let lo = if first_byte == 0 {
59            0
60        } else {
61            self.fanout[first_byte - 1] as usize
62        };
63        let hi = self.fanout[first_byte] as usize;
64        if lo >= hi || hi > self.entries.len() {
65            return None;
66        }
67        let slice = &self.entries[lo..hi];
68        slice
69            .binary_search_by(|e| e.oid.as_slice().cmp(needle.as_slice()))
70            .ok()
71            .map(|idx| slice[idx].offset)
72    }
73
74    /// Whether this pack index contains the given SHA-1 OID.
75    #[must_use]
76    pub fn contains(&self, oid: &ObjectId) -> bool {
77        self.find_offset(oid).is_some()
78    }
79}
80
81/// A single entry produced by `show-index`, with an optional CRC32.
82///
83/// Version-1 index files do not store CRC32 values; `crc32` is `None` for
84/// those entries.  Version-2 index files always carry a CRC32.
85#[derive(Debug, Clone)]
86pub struct ShowIndexEntry {
87    /// Raw object identifier (20 or 32 bytes).
88    pub oid: Vec<u8>,
89    /// Byte offset of the object in the corresponding `.pack` file.
90    pub offset: u64,
91    /// CRC32 of the compressed object data (v2 only).
92    pub crc32: Option<u32>,
93}
94
95/// Parse a pack index from a reader (e.g. stdin) and return all entries in
96/// index order.
97///
98/// Both version-1 (legacy) and version-2 index formats are supported.  Only
99/// SHA-1 (20-byte hash) objects are supported; pass `hash_size = 20`.
100///
101/// # Errors
102///
103/// Returns [`Error::CorruptObject`] when the data cannot be parsed as a valid
104/// pack index.
105pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
106    let mut buf = Vec::new();
107    reader.read_to_end(&mut buf).map_err(Error::Io)?;
108
109    if buf.len() < 8 {
110        return Err(Error::CorruptObject(
111            "unable to read header: index file too small".to_owned(),
112        ));
113    }
114
115    let mut pos = 0usize;
116    let first_u32 = read_u32_be(&buf, &mut pos)?;
117
118    const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
119
120    if first_u32 == PACK_IDX_SIGNATURE {
121        // Version 2 (or higher): read version word, then 256-entry fanout.
122        let version = read_u32_be(&buf, &mut pos)?;
123        if version != 2 {
124            return Err(Error::CorruptObject(format!(
125                "unknown index version: {version}"
126            )));
127        }
128        show_index_v2(&buf, &mut pos, hash_size)
129    } else {
130        // Version 1: the two u32s we already started reading are the first two
131        // fanout entries.  Re-read the whole fanout from the top.
132        pos = 0;
133        show_index_v1(&buf, &mut pos, hash_size)
134    }
135}
136
137/// Parse version-1 pack index entries from `buf`.
138fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
139    if buf.len() < 256 * 4 {
140        return Err(Error::CorruptObject(
141            "unable to read index: v1 fanout too short".to_owned(),
142        ));
143    }
144    let mut fanout = [0u32; 256];
145    for slot in &mut fanout {
146        *slot = read_u32_be(buf, pos)?;
147    }
148    let object_count = fanout[255] as usize;
149
150    let mut entries = Vec::with_capacity(object_count);
151    for i in 0..object_count {
152        // Each record: 4-byte big-endian offset + hash_size-byte OID.
153        if *pos + 4 + hash_size > buf.len() {
154            return Err(Error::CorruptObject(format!(
155                "unable to read entry {i}/{object_count}: truncated"
156            )));
157        }
158        let offset = read_u32_be(buf, pos)? as u64;
159        let oid = buf[*pos..*pos + hash_size].to_vec();
160        *pos += hash_size;
161        entries.push(ShowIndexEntry {
162            oid,
163            offset,
164            crc32: None,
165        });
166    }
167    Ok(entries)
168}
169
170/// Parse version-2 pack index entries from `buf` starting after the magic and
171/// version words (fanout table is next).
172fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
173    if buf.len() < *pos + 256 * 4 {
174        return Err(Error::CorruptObject(
175            "unable to read index: v2 fanout too short".to_owned(),
176        ));
177    }
178    let mut fanout = [0u32; 256];
179    for slot in &mut fanout {
180        *slot = read_u32_be(buf, pos)?;
181    }
182    let object_count = fanout[255] as usize;
183
184    // OID table.
185    let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
186    for i in 0..object_count {
187        if *pos + hash_size > buf.len() {
188            return Err(Error::CorruptObject(format!(
189                "unable to read oid {i}/{object_count}: truncated"
190            )));
191        }
192        let oid = buf[*pos..*pos + hash_size].to_vec();
193        *pos += hash_size;
194        oids.push(oid);
195    }
196
197    // CRC32 table.
198    let mut crcs = Vec::with_capacity(object_count);
199    for i in 0..object_count {
200        if *pos + 4 > buf.len() {
201            return Err(Error::CorruptObject(format!(
202                "unable to read crc {i}/{object_count}: truncated"
203            )));
204        }
205        crcs.push(read_u32_be(buf, pos)?);
206    }
207
208    // 32-bit offset table.
209    let mut offsets32 = Vec::with_capacity(object_count);
210    let mut large_count = 0usize;
211    for i in 0..object_count {
212        if *pos + 4 > buf.len() {
213            return Err(Error::CorruptObject(format!(
214                "unable to read 32b offset {i}/{object_count}: truncated"
215            )));
216        }
217        let v = read_u32_be(buf, pos)?;
218        if (v & 0x8000_0000) != 0 {
219            large_count += 1;
220        }
221        offsets32.push(v);
222    }
223
224    // 64-bit large-offset table.
225    let mut large_offsets = Vec::with_capacity(large_count);
226    for i in 0..large_count {
227        if *pos + 8 > buf.len() {
228            return Err(Error::CorruptObject(format!(
229                "unable to read 64b offset {i}: truncated"
230            )));
231        }
232        large_offsets.push(read_u64_be(buf, pos)?);
233    }
234
235    let mut next_large = 0usize;
236    let mut entries = Vec::with_capacity(object_count);
237    for (i, oid) in oids.iter().enumerate() {
238        let raw = offsets32[i];
239        let offset = if (raw & 0x8000_0000) == 0 {
240            raw as u64
241        } else {
242            let idx = (raw & 0x7fff_ffff) as usize;
243            if idx != next_large {
244                return Err(Error::CorruptObject(format!(
245                    "inconsistent 64b offset index at entry {i}"
246                )));
247            }
248            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
249                Error::CorruptObject(format!("missing large offset entry {next_large}"))
250            })?;
251            next_large += 1;
252            off
253        };
254        entries.push(ShowIndexEntry {
255            oid: oid.clone(),
256            offset,
257            crc32: Some(crcs[i]),
258        });
259    }
260    Ok(entries)
261}
262
263/// Basic information about local packs.
264#[derive(Debug, Clone, Default)]
265pub struct LocalPackInfo {
266    /// Number of valid local packs.
267    pub pack_count: usize,
268    /// Total objects across all valid local packs.
269    pub object_count: usize,
270    /// Combined on-disk bytes of `.pack` + `.idx`.
271    pub size_bytes: u64,
272    /// Set of all object IDs present in local packs.
273    pub object_ids: HashSet<ObjectId>,
274}
275
276/// Read all valid `.idx` files in `objects/pack`.
277///
278/// # Errors
279///
280/// Returns [`Error::Io`] for directory-level failures. Individual invalid pack
281/// pairs are skipped.
282pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
283    let pack_dir = objects_dir.join("pack");
284    let rd = match fs::read_dir(&pack_dir) {
285        Ok(rd) => rd,
286        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
287        Err(err) => return Err(Error::Io(err)),
288    };
289
290    let mut out = Vec::new();
291    for entry in rd {
292        let entry = entry.map_err(Error::Io)?;
293        let path = entry.path();
294        if path.extension().and_then(|s| s.to_str()) != Some("idx") {
295            continue;
296        }
297        if let Ok(idx) = read_pack_index(&path) {
298            // Ignore orphan `.idx` files (no `.pack`). They must not make `fsck` think objects
299            // exist (`t7700-repack`); repack also skips them so a stray index does not block work.
300            if !idx.pack_path.is_file() {
301                continue;
302            }
303            out.push(idx);
304        }
305    }
306    Ok(out)
307}
308
309/// Process-wide cache of parsed pack indexes and pack file bytes.
310///
311/// Object lookups in a busy command (`status`, `log`, ancestor walks, packing) re-issue
312/// `read_local_pack_indexes` for every single object, which used to mean re-opening,
313/// re-reading, re-SHA1-verifying every `.idx` (and re-reading the entire `.pack` for each
314/// object). This cache keeps parsed indexes and pack bytes in memory keyed by path with
315/// mtime-based invalidation: if a pack/index is rewritten on disk, we re-parse it on the
316/// next access. New packs added to a directory invalidate the directory listing via the
317/// dir's mtime.
318///
319/// SHA-1 verification of the index trailer is **not** performed on cached reads: Git only
320/// verifies pack indexes during `fsck`/`verify-pack`, not on every object lookup. Use
321/// [`read_pack_index`] when verification is required.
322mod pack_cache {
323    use super::{read_pack_index_no_verify, Error, PackIndex, Result};
324    use std::collections::HashMap;
325    use std::fs;
326    use std::io;
327    use std::path::{Path, PathBuf};
328    use std::sync::{Arc, Mutex, OnceLock};
329    use std::time::SystemTime;
330
331    struct CachedDir {
332        dir_mtime: SystemTime,
333        indexes: Vec<Arc<PackIndex>>,
334    }
335
336    struct CachedIdx {
337        mtime: SystemTime,
338        size: u64,
339        idx: Arc<PackIndex>,
340    }
341
342    struct CachedPack {
343        mtime: SystemTime,
344        size: u64,
345        bytes: Arc<Vec<u8>>,
346    }
347
348    #[derive(Default)]
349    struct State {
350        by_dir: HashMap<PathBuf, CachedDir>,
351        by_idx: HashMap<PathBuf, CachedIdx>,
352        by_pack: HashMap<PathBuf, CachedPack>,
353    }
354
355    static CACHE: OnceLock<Mutex<State>> = OnceLock::new();
356
357    fn lock() -> std::sync::MutexGuard<'static, State> {
358        CACHE
359            .get_or_init(|| Mutex::new(State::default()))
360            .lock()
361            .unwrap_or_else(|p| p.into_inner())
362    }
363
364    fn dir_mtime(path: &Path) -> SystemTime {
365        fs::metadata(path)
366            .and_then(|m| m.modified())
367            .unwrap_or(SystemTime::UNIX_EPOCH)
368    }
369
370    fn file_signature(path: &Path) -> Option<(SystemTime, u64)> {
371        let m = fs::metadata(path).ok()?;
372        let mtime = m.modified().unwrap_or(SystemTime::UNIX_EPOCH);
373        Some((mtime, m.len()))
374    }
375
376    /// Get a parsed pack index from cache, re-parsing from disk only when the file
377    /// is missing from the cache or its mtime/size has changed since last parse.
378    pub fn get_index(idx_path: &Path) -> Result<Arc<PackIndex>> {
379        let sig = file_signature(idx_path);
380        if let Some((mtime, size)) = sig {
381            {
382                let g = lock();
383                if let Some(c) = g.by_idx.get(idx_path) {
384                    if c.mtime == mtime && c.size == size {
385                        return Ok(Arc::clone(&c.idx));
386                    }
387                }
388            }
389            let parsed = Arc::new(read_pack_index_no_verify(idx_path)?);
390            let mut g = lock();
391            g.by_idx.insert(
392                idx_path.to_path_buf(),
393                CachedIdx {
394                    mtime,
395                    size,
396                    idx: Arc::clone(&parsed),
397                },
398            );
399            Ok(parsed)
400        } else {
401            Err(Error::Io(io::Error::new(
402                io::ErrorKind::NotFound,
403                format!("idx not found: {}", idx_path.display()),
404            )))
405        }
406    }
407
408    /// Get all `.idx` files for `objects_dir`, with each parsed index served from cache.
409    /// The directory listing itself is cached and invalidated by the directory mtime.
410    pub fn get_dir_indexes(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
411        let pack_dir = objects_dir.join("pack");
412        let dir_mt = dir_mtime(&pack_dir);
413
414        {
415            let g = lock();
416            if let Some(c) = g.by_dir.get(&pack_dir) {
417                if c.dir_mtime == dir_mt {
418                    return Ok(c.indexes.clone());
419                }
420            }
421        }
422
423        let rd = match fs::read_dir(&pack_dir) {
424            Ok(rd) => rd,
425            Err(err) if err.kind() == io::ErrorKind::NotFound => {
426                let mut g = lock();
427                g.by_dir.insert(
428                    pack_dir.clone(),
429                    CachedDir {
430                        dir_mtime: dir_mt,
431                        indexes: Vec::new(),
432                    },
433                );
434                return Ok(Vec::new());
435            }
436            Err(err) => return Err(Error::Io(err)),
437        };
438
439        let mut out = Vec::new();
440        for entry in rd {
441            let entry = entry.map_err(Error::Io)?;
442            let path = entry.path();
443            if path.extension().and_then(|s| s.to_str()) != Some("idx") {
444                continue;
445            }
446            let Ok(idx) = get_index(&path) else { continue };
447            if !idx.pack_path.is_file() {
448                continue;
449            }
450            out.push(idx);
451        }
452
453        let mut g = lock();
454        g.by_dir.insert(
455            pack_dir,
456            CachedDir {
457                dir_mtime: dir_mt,
458                indexes: out.clone(),
459            },
460        );
461        Ok(out)
462    }
463
464    /// Get the raw bytes of a pack file from cache, re-reading from disk when the
465    /// file's mtime/size changes.
466    pub fn get_pack_bytes(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
467        let sig = file_signature(pack_path);
468        if let Some((mtime, size)) = sig {
469            {
470                let g = lock();
471                if let Some(c) = g.by_pack.get(pack_path) {
472                    if c.mtime == mtime && c.size == size {
473                        return Ok(Arc::clone(&c.bytes));
474                    }
475                }
476            }
477            let bytes = Arc::new(fs::read(pack_path).map_err(Error::Io)?);
478            let mut g = lock();
479            g.by_pack.insert(
480                pack_path.to_path_buf(),
481                CachedPack {
482                    mtime,
483                    size,
484                    bytes: Arc::clone(&bytes),
485                },
486            );
487            Ok(bytes)
488        } else {
489            Err(Error::Io(io::Error::new(
490                io::ErrorKind::NotFound,
491                format!("pack not found: {}", pack_path.display()),
492            )))
493        }
494    }
495
496    /// Drop all cached pack indexes and pack bytes. Used by `repack`/`gc` and by tests
497    /// that mutate the pack directory in-place without changing its mtime.
498    pub fn clear() {
499        let mut g = lock();
500        g.by_dir.clear();
501        g.by_idx.clear();
502        g.by_pack.clear();
503    }
504}
505
506/// Read all pack indexes under `<objects_dir>/pack/` from the process-wide cache.
507///
508/// Cached reads skip the `.idx` SHA-1 trailer verification that [`read_pack_index`]
509/// performs; corruption checks happen during `fsck`/`verify-pack`, not on every object
510/// lookup (matches Git). The directory listing itself is cached and invalidated when
511/// the pack directory's mtime changes (i.e. when packs are added or removed).
512///
513/// # Errors
514///
515/// Returns [`Error::Io`] when the directory cannot be enumerated.
516pub fn read_local_pack_indexes_cached(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
517    pack_cache::get_dir_indexes(objects_dir)
518}
519
520/// Read a single pack index from the process-wide cache (parses from disk on miss
521/// or when the file's mtime/size has changed). Skips trailer verification.
522///
523/// # Errors
524///
525/// Returns [`Error::Io`] when the file is missing or [`Error::CorruptObject`] for
526/// malformed indexes.
527pub fn read_pack_index_cached(idx_path: &Path) -> Result<Arc<PackIndex>> {
528    pack_cache::get_index(idx_path)
529}
530
531/// Read pack file bytes from the process-wide cache.
532///
533/// # Errors
534///
535/// Returns [`Error::Io`] when the pack cannot be read.
536pub fn read_pack_bytes_cached(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
537    pack_cache::get_pack_bytes(pack_path)
538}
539
540/// Drop all cached pack indexes and pack bytes (call after `repack`/`gc`).
541pub fn clear_pack_cache() {
542    pack_cache::clear();
543}
544
545/// Collect aggregate local pack metrics.
546///
547/// # Errors
548///
549/// Returns [`Error::Io`] when reading pack metadata fails.
550pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
551    let indexes = read_local_pack_indexes(objects_dir)?;
552    let mut info = LocalPackInfo::default();
553    for idx in indexes {
554        let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
555        let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
556        info.pack_count += 1;
557        info.object_count += idx.entries.len();
558        info.size_bytes += pack_meta.len() + idx_meta.len();
559        for entry in idx.entries {
560            if entry.oid.len() == 20 {
561                if let Ok(oid) = ObjectId::from_bytes(&entry.oid) {
562                    info.object_ids.insert(oid);
563                }
564            }
565        }
566    }
567    Ok(info)
568}
569
570fn verify_idx_trailing_checksum(idx_path: &Path, bytes: &[u8]) -> Result<()> {
571    if bytes.len() < 20 {
572        return Err(Error::CorruptObject(format!(
573            "index file {} missing checksum",
574            idx_path.display()
575        )));
576    }
577    let idx_body_end = bytes.len() - 20;
578    let mut h = Sha1::new();
579    h.update(&bytes[..idx_body_end]);
580    let digest = h.finalize();
581    if digest.as_slice() != &bytes[idx_body_end..] {
582        return Err(Error::CorruptObject(format!(
583            "index checksum mismatch for {}",
584            idx_path.display()
585        )));
586    }
587    Ok(())
588}
589
590fn read_pack_index_v1(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
591    let mut pos = 0usize;
592    if bytes.len() < 256 * 4 + 20 {
593        return Err(Error::CorruptObject(format!(
594            "index file {} is too small",
595            idx_path.display()
596        )));
597    }
598    let mut fanout = [0u32; 256];
599    for slot in &mut fanout {
600        *slot = read_u32_be(bytes, &mut pos)?;
601    }
602    let object_count = fanout[255] as usize;
603    let need = pos
604        .saturating_add(object_count.saturating_mul(24))
605        .saturating_add(20);
606    if bytes.len() < need {
607        return Err(Error::CorruptObject(format!(
608            "truncated idx file {}",
609            idx_path.display()
610        )));
611    }
612
613    let mut entries: Vec<PackIndexEntry> = Vec::with_capacity(object_count);
614    for i in 0..object_count {
615        let offset = read_u32_be(bytes, &mut pos)? as u64;
616        let oid = bytes[pos..pos + 20].to_vec();
617        pos += 20;
618        if i > 0 && entries[i - 1].oid.cmp(&oid) != std::cmp::Ordering::Less {
619            return Err(Error::CorruptObject(format!(
620                "oid lookup out of order in {}",
621                idx_path.display()
622            )));
623        }
624        entries.push(PackIndexEntry { oid, offset });
625    }
626
627    if verify {
628        verify_idx_trailing_checksum(idx_path, bytes)?;
629    }
630
631    let mut pack_path = idx_path.to_path_buf();
632    pack_path.set_extension("pack");
633
634    let fanout = compute_fanout_from_entries(&entries);
635    Ok(PackIndex {
636        idx_path: idx_path.to_path_buf(),
637        pack_path,
638        hash_bytes: 20,
639        entries,
640        fanout,
641    })
642}
643
644/// Compute the 256-entry fanout from a sorted entry list (used for v1 indexes
645/// where the fanout is not stored explicitly in a usable form for lookups).
646fn compute_fanout_from_entries(entries: &[PackIndexEntry]) -> [u32; 256] {
647    let mut fanout = [0u32; 256];
648    let mut idx = 0usize;
649    for byte in 0u32..256 {
650        let needle = byte as u8;
651        while idx < entries.len() && entries[idx].oid.first().copied().unwrap_or(0) <= needle {
652            idx += 1;
653        }
654        fanout[byte as usize] = u32::try_from(idx).unwrap_or(u32::MAX);
655    }
656    fanout
657}
658
659fn read_pack_index_v2(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
660    if bytes.len() < 8 + 256 * 4 + 40 {
661        return Err(Error::CorruptObject(format!(
662            "index file {} is too small",
663            idx_path.display()
664        )));
665    }
666
667    let mut pos = 0usize;
668    pos += 4;
669    let version = read_u32_be(bytes, &mut pos)?;
670    if version != 2 {
671        return Err(Error::CorruptObject(format!(
672            "unsupported idx version {} in {}",
673            version,
674            idx_path.display()
675        )));
676    }
677
678    let mut fanout = [0u32; 256];
679    for slot in &mut fanout {
680        *slot = read_u32_be(bytes, &mut pos)?;
681    }
682    let object_count = fanout[255] as usize;
683
684    let idx_file_len = bytes.len();
685    let hash_bytes = detect_idx_hash_bytes_v2(idx_file_len, pos, object_count, idx_path)?;
686
687    let need = pos
688        .saturating_add(object_count * hash_bytes)
689        .saturating_add(object_count * 4)
690        .saturating_add(object_count * 4)
691        .saturating_add(40);
692    if bytes.len() < need {
693        return Err(Error::CorruptObject(format!(
694            "truncated idx file {}",
695            idx_path.display()
696        )));
697    }
698
699    let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
700    for _ in 0..object_count {
701        let slice = &bytes[pos..pos + hash_bytes];
702        pos += hash_bytes;
703        oids.push(slice.to_vec());
704    }
705
706    pos += object_count * 4;
707
708    let mut offsets32 = Vec::with_capacity(object_count);
709    let mut large_count = 0usize;
710    for _ in 0..object_count {
711        let v = read_u32_be(bytes, &mut pos)?;
712        if (v & 0x8000_0000) != 0 {
713            large_count += 1;
714        }
715        offsets32.push(v);
716    }
717
718    if bytes.len() < pos + large_count * 8 + 40 {
719        return Err(Error::CorruptObject(format!(
720            "truncated large offset table in {}",
721            idx_path.display()
722        )));
723    }
724    let mut large_offsets = Vec::with_capacity(large_count);
725    for _ in 0..large_count {
726        large_offsets.push(read_u64_be(bytes, &mut pos)?);
727    }
728
729    let mut next_large = 0usize;
730    let mut entries = Vec::with_capacity(object_count);
731    for (i, oid) in oids.into_iter().enumerate() {
732        let raw = offsets32[i];
733        let offset = if (raw & 0x8000_0000) == 0 {
734            raw as u64
735        } else {
736            let off = large_offsets.get(next_large).copied().ok_or_else(|| {
737                Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
738            })?;
739            next_large += 1;
740            off
741        };
742        entries.push(PackIndexEntry { oid, offset });
743    }
744
745    let mut pack_path = idx_path.to_path_buf();
746    pack_path.set_extension("pack");
747
748    if verify {
749        verify_idx_trailing_checksum(idx_path, bytes)?;
750    }
751
752    Ok(PackIndex {
753        idx_path: idx_path.to_path_buf(),
754        pack_path,
755        hash_bytes,
756        entries,
757        fanout,
758    })
759}
760
761/// Infer OID width for a version-2 index using Git's file-size bounds (`packfile.c` `load_idx`).
762///
763/// The first OID byte cannot disambiguate SHA-1 vs SHA-256 (both use the same fanout slot for
764/// small repos), so we require the total `.idx` size to match exactly one `(hashsz, large_offset_count)` pair.
765fn detect_idx_hash_bytes_v2(
766    idx_file_len: usize,
767    fanout_end: usize,
768    object_count: usize,
769    idx_path: &Path,
770) -> Result<usize> {
771    if object_count == 0 {
772        return Ok(20);
773    }
774    if idx_file_len < 20 {
775        return Err(Error::CorruptObject(format!(
776            "index file {} missing checksum",
777            idx_path.display()
778        )));
779    }
780    let body_without_checksum = idx_file_len.saturating_sub(20);
781
782    for &hb in &[20usize, 32] {
783        // Body is everything before the 20-byte SHA-1 index checksum: tables, optional 64-bit
784        // offset extension, then `hb`-byte pack checksum (see `packfile.c` `load_idx`).
785        let min_body = fanout_end
786            .saturating_add(object_count.saturating_mul(hb + 4 + 4))
787            .saturating_add(hb);
788        if body_without_checksum < min_body {
789            continue;
790        }
791        let mut max_body = min_body;
792        if object_count > 0 {
793            max_body = max_body.saturating_add((object_count - 1).saturating_mul(8));
794        }
795        if body_without_checksum > max_body {
796            continue;
797        }
798        let extra = body_without_checksum.saturating_sub(min_body);
799        if extra % 8 != 0 {
800            continue;
801        }
802        return Ok(hb);
803    }
804
805    Err(Error::CorruptObject(format!(
806        "wrong index v2 file size in {}",
807        idx_path.display()
808    )))
809}
810
811#[must_use]
812pub fn oid_bytes_to_hex(oid: &[u8]) -> String {
813    hex::encode(oid)
814}
815
816/// True when `entry` stores a SHA-1 OID matching `oid` (SHA-256 pack entries are ignored).
817#[must_use]
818pub fn pack_index_entry_matches_sha1_oid(entry: &PackIndexEntry, oid: &ObjectId) -> bool {
819    entry.oid.len() == 20 && entry.oid.as_slice() == oid.as_bytes().as_slice()
820}
821
822/// Hash canonical loose object bytes (`kind SP size NUL data`) with the repo hash width.
823pub fn hash_object_bytes(kind: ObjectKind, data: &[u8], hash_bytes: usize) -> Result<Vec<u8>> {
824    let header = format!("{} {}\0", kind, data.len());
825    match hash_bytes {
826        20 => {
827            let mut hasher = Sha1::new();
828            hasher.update(header.as_bytes());
829            hasher.update(data);
830            Ok(hasher.finalize().to_vec())
831        }
832        32 => {
833            use sha2::Digest as _;
834            let mut hasher = Sha256::new();
835            hasher.update(header.as_bytes());
836            hasher.update(data);
837            Ok(hasher.finalize().to_vec())
838        }
839        other => Err(Error::CorruptObject(format!(
840            "unsupported object hash width: {other}"
841        ))),
842    }
843}
844
845/// Parse a pack index file (version 1 legacy or version 2), verifying the SHA-1
846/// trailer checksum.
847///
848/// Used by `fsck`/`verify-pack` and similar code that wants on-disk validation. Hot
849/// object-lookup paths should call [`read_pack_index_cached`] (which skips trailer
850/// verification, matching Git's normal read path).
851///
852/// # Errors
853///
854/// Returns [`Error::CorruptObject`] when format checks fail.
855pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
856    let bytes = fs::read(idx_path).map_err(Error::Io)?;
857    parse_pack_index_bytes(idx_path, &bytes, true)
858}
859
860/// Parse a pack index file without verifying the SHA-1 trailer checksum. Used by
861/// the cached lookup path (`read_pack_index_cached`); not part of the public API.
862fn read_pack_index_no_verify(idx_path: &Path) -> Result<PackIndex> {
863    let bytes = fs::read(idx_path).map_err(Error::Io)?;
864    parse_pack_index_bytes(idx_path, &bytes, false)
865}
866
867fn parse_pack_index_bytes(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
868    if bytes.len() < 8 {
869        return Err(Error::CorruptObject(format!(
870            "index file {} is too small",
871            idx_path.display()
872        )));
873    }
874    let magic = &bytes[0..4];
875    if magic == [0xff, b't', b'O', b'c'] {
876        read_pack_index_v2(idx_path, bytes, verify)
877    } else {
878        read_pack_index_v1(idx_path, bytes, verify)
879    }
880}
881
882/// A pack object type as encoded in the packed stream header.
883#[derive(Debug, Clone, Copy, PartialEq, Eq)]
884pub enum PackedType {
885    /// Commit object.
886    Commit,
887    /// Tree object.
888    Tree,
889    /// Blob object.
890    Blob,
891    /// Tag object.
892    Tag,
893    /// Offset delta.
894    OfsDelta,
895    /// Reference delta.
896    RefDelta,
897}
898
899impl PackedType {
900    /// Printable name used by `verify-pack -v` output.
901    #[must_use]
902    pub fn as_str(self) -> &'static str {
903        match self {
904            Self::Commit => "commit",
905            Self::Tree => "tree",
906            Self::Blob => "blob",
907            Self::Tag => "tag",
908            Self::OfsDelta => "ofs-delta",
909            Self::RefDelta => "ref-delta",
910        }
911    }
912}
913
914/// A decoded object header record used by `verify-pack`.
915#[derive(Debug, Clone)]
916pub struct VerifyObjectRecord {
917    /// Object ID from the index (20 or 32 raw bytes).
918    pub oid: Vec<u8>,
919    /// Type from the pack stream header.
920    pub packed_type: PackedType,
921    /// Uncompressed object size from the pack header.
922    pub size: u64,
923    /// Total bytes in pack occupied by this object slot.
924    pub size_in_pack: u64,
925    /// Offset in pack file.
926    pub offset: u64,
927    /// Delta chain depth, if deltified.
928    pub depth: Option<u64>,
929    /// Base object for ref-delta objects.
930    pub base_oid: Option<Vec<u8>>,
931}
932
933/// Verify one pack/index pair and optionally return object records.
934///
935/// # Errors
936///
937/// Returns [`Error::CorruptObject`] when the index or pack are malformed.
938pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
939    let idx = read_pack_index(idx_path)?;
940    let idx_file_bytes = fs::read(idx_path).map_err(Error::Io)?;
941    let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
942    let hb = idx.hash_bytes;
943    if pack_bytes.len() < 12 + hb {
944        return Err(Error::CorruptObject(format!(
945            "pack file {} is too small",
946            idx.pack_path.display()
947        )));
948    }
949    let pack_end = pack_bytes.len() - hb;
950    match hb {
951        20 => {
952            let mut h = Sha1::new();
953            h.update(&pack_bytes[..pack_end]);
954            let digest = h.finalize();
955            if digest.as_slice() != &pack_bytes[pack_end..] {
956                return Err(Error::CorruptObject(format!(
957                    "pack trailing checksum mismatch for {}",
958                    idx.pack_path.display()
959                )));
960            }
961        }
962        32 => {
963            use sha2::Digest as _;
964            let mut h = Sha256::new();
965            h.update(&pack_bytes[..pack_end]);
966            let digest = h.finalize();
967            if digest.as_slice() != &pack_bytes[pack_end..] {
968                return Err(Error::CorruptObject(format!(
969                    "pack trailing checksum mismatch for {}",
970                    idx.pack_path.display()
971                )));
972            }
973        }
974        _ => {
975            return Err(Error::CorruptObject(format!(
976                "unsupported OID width {} for pack {}",
977                hb,
978                idx.pack_path.display()
979            )));
980        }
981    }
982    if idx_file_bytes.len() >= hb + 20 {
983        let embedded = &idx_file_bytes[idx_file_bytes.len() - (hb + 20)..idx_file_bytes.len() - 20];
984        if embedded != &pack_bytes[pack_end..] {
985            return Err(Error::CorruptObject(format!(
986                "pack checksum in index does not match {}",
987                idx.pack_path.display()
988            )));
989        }
990    }
991    if &pack_bytes[0..4] != b"PACK" {
992        return Err(Error::CorruptObject(format!(
993            "pack file {} has invalid signature",
994            idx.pack_path.display()
995        )));
996    }
997    let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
998    if version != 2 && version != 3 {
999        return Err(Error::CorruptObject(format!(
1000            "unsupported pack version {} in {}",
1001            version,
1002            idx.pack_path.display()
1003        )));
1004    }
1005    let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
1006    if count != idx.entries.len() {
1007        return Err(Error::CorruptObject(format!(
1008            "pack/index object count mismatch for {}",
1009            idx.pack_path.display()
1010        )));
1011    }
1012
1013    let mut by_offset: BTreeMap<u64, Vec<u8>> = BTreeMap::new();
1014    for entry in &idx.entries {
1015        by_offset.insert(entry.offset, entry.oid.clone());
1016    }
1017    let offsets: Vec<u64> = by_offset.keys().copied().collect();
1018    if offsets.is_empty() {
1019        return Ok(Vec::new());
1020    }
1021
1022    let mut by_oid: HashMap<Vec<u8>, usize> = HashMap::new();
1023    let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
1024    for (i, offset) in offsets.iter().copied().enumerate() {
1025        let oid = by_offset.get(&offset).cloned().ok_or_else(|| {
1026            Error::CorruptObject(format!("missing object id for offset {}", offset))
1027        })?;
1028        let next_off = offsets
1029            .get(i + 1)
1030            .copied()
1031            .unwrap_or((pack_bytes.len() - hb) as u64);
1032        if next_off <= offset || next_off > (pack_bytes.len() - hb) as u64 {
1033            return Err(Error::CorruptObject(format!(
1034                "invalid object boundaries at offset {} in {}",
1035                offset,
1036                idx.pack_path.display()
1037            )));
1038        }
1039        let mut p = offset as usize;
1040        let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
1041        let mut base_oid: Option<Vec<u8>> = None;
1042        let mut depth = None;
1043
1044        match packed_type {
1045            PackedType::RefDelta => {
1046                if p + hb > pack_bytes.len() {
1047                    return Err(Error::CorruptObject(format!(
1048                        "truncated ref-delta base at offset {}",
1049                        offset
1050                    )));
1051                }
1052                base_oid = Some(pack_bytes[p..p + hb].to_vec());
1053            }
1054            PackedType::OfsDelta => {
1055                let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
1056                let base_depth = records
1057                    .iter()
1058                    .find(|r| r.offset == base_offset)
1059                    .and_then(|r| r.depth)
1060                    .unwrap_or(0);
1061                depth = Some(base_depth + 1);
1062            }
1063            PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
1064        }
1065
1066        let size_in_pack = next_off - offset;
1067        records.push(VerifyObjectRecord {
1068            oid: oid.clone(),
1069            packed_type,
1070            size,
1071            size_in_pack,
1072            offset,
1073            depth,
1074            base_oid,
1075        });
1076        by_oid.insert(oid, i);
1077    }
1078
1079    for i in 0..records.len() {
1080        if records[i].packed_type != PackedType::RefDelta {
1081            continue;
1082        }
1083        let base = records[i]
1084            .base_oid
1085            .as_ref()
1086            .ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
1087        let base_depth = by_oid
1088            .get(base)
1089            .and_then(|ix| records.get(*ix))
1090            .and_then(|r| r.depth)
1091            .unwrap_or(0);
1092        records[i].depth = Some(base_depth + 1);
1093    }
1094
1095    for entry in &idx.entries {
1096        let obj = read_object_from_pack_bytes(&pack_bytes, &idx, &entry.oid)?;
1097        let computed = hash_object_bytes(obj.kind, &obj.data, hb)?;
1098        if computed.as_slice() != entry.oid.as_slice() {
1099            return Err(Error::CorruptObject(format!(
1100                "pack object hash mismatch at offset {} (index says {})",
1101                entry.offset,
1102                oid_bytes_to_hex(&entry.oid)
1103            )));
1104        }
1105    }
1106
1107    Ok(records)
1108}
1109
1110/// Read alternates recursively, deduplicated in discovery order.
1111///
1112/// # Errors
1113///
1114/// Returns [`Error::Io`] when alternate files cannot be read.
1115pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
1116    let mut visited = HashSet::new();
1117    let mut out = Vec::new();
1118    read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
1119    Ok(out)
1120}
1121
1122/// Maximum alternate chain depth (git uses 5).
1123const MAX_ALTERNATE_DEPTH: usize = 5;
1124
1125fn read_alternates_inner(
1126    objects_dir: &Path,
1127    visited: &mut HashSet<PathBuf>,
1128    out: &mut Vec<PathBuf>,
1129    depth: usize,
1130) -> Result<()> {
1131    if depth > MAX_ALTERNATE_DEPTH {
1132        return Ok(());
1133    }
1134    let canonical = canonical_or_self(objects_dir);
1135    let alt_file = canonical.join("info").join("alternates");
1136    let text = match fs::read_to_string(&alt_file) {
1137        Ok(text) => text,
1138        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
1139        Err(err) => return Err(Error::Io(err)),
1140    };
1141
1142    for raw in text.lines() {
1143        let line = raw.trim();
1144        if line.is_empty() {
1145            continue;
1146        }
1147        let candidate = if Path::new(line).is_absolute() {
1148            PathBuf::from(line)
1149        } else {
1150            canonical.join(line)
1151        };
1152        let candidate = canonical_or_self(&candidate);
1153        if visited.insert(candidate.clone()) {
1154            out.push(candidate.clone());
1155            read_alternates_inner(&candidate, visited, out, depth + 1)?;
1156        }
1157    }
1158    Ok(())
1159}
1160
1161fn canonical_or_self(path: &Path) -> PathBuf {
1162    fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
1163}
1164
1165/// Convert a [`PackedType`] to an [`ObjectKind`] for non-delta types.
1166fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
1167    match pt {
1168        PackedType::Commit => Ok(ObjectKind::Commit),
1169        PackedType::Tree => Ok(ObjectKind::Tree),
1170        PackedType::Blob => Ok(ObjectKind::Blob),
1171        PackedType::Tag => Ok(ObjectKind::Tag),
1172        PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
1173            "cannot convert delta type to object kind directly".to_owned(),
1174        )),
1175    }
1176}
1177
1178/// Decompress zlib data from a byte slice starting at `pos`.
1179///
1180/// Returns the decompressed data and advances `pos` past the consumed
1181/// compressed bytes.
1182fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
1183    let slice = &bytes[*pos..];
1184    let mut decoder = ZlibDecoder::new(slice);
1185    let mut out = Vec::with_capacity(expected_size as usize);
1186    decoder
1187        .read_to_end(&mut out)
1188        .map_err(|e| Error::Zlib(e.to_string()))?;
1189    *pos += decoder.total_in() as usize;
1190    Ok(out)
1191}
1192
1193/// Read and fully resolve one object from a pack file given its offset.
1194///
1195/// Handles OFS_DELTA and REF_DELTA by recursively reading the base object.
1196/// The `idx` is used for REF_DELTA resolution (to find a base by OID).
1197fn read_pack_object_at(
1198    pack_bytes: &[u8],
1199    offset: u64,
1200    idx: &PackIndex,
1201    depth: usize,
1202) -> Result<(ObjectKind, Vec<u8>)> {
1203    if depth > 50 {
1204        return Err(Error::CorruptObject(
1205            "delta chain too deep (>50)".to_owned(),
1206        ));
1207    }
1208    let mut pos = offset as usize;
1209    let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
1210
1211    match packed_type {
1212        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
1213            let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1214            let kind = packed_type_to_kind(packed_type)?;
1215            Ok((kind, data))
1216        }
1217        PackedType::OfsDelta => {
1218            let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
1219            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1220            let (base_kind, base_data) =
1221                read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
1222            let result = apply_delta(&base_data, &delta_data)?;
1223            Ok((base_kind, result))
1224        }
1225        PackedType::RefDelta => {
1226            let hb = idx.hash_bytes;
1227            if pos + hb > pack_bytes.len() {
1228                return Err(Error::CorruptObject(
1229                    "truncated ref-delta base OID".to_owned(),
1230                ));
1231            }
1232            let base_raw = pack_bytes[pos..pos + hb].to_vec();
1233            pos += hb;
1234            let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
1235            // Find the base in the same pack index
1236            let base_entry = idx
1237                .entries
1238                .iter()
1239                .find(|e| e.oid == base_raw)
1240                .ok_or_else(|| {
1241                    Error::CorruptObject(format!(
1242                        "ref-delta base {} not found in pack",
1243                        oid_bytes_to_hex(&base_raw)
1244                    ))
1245                })?;
1246            let (base_kind, base_data) =
1247                read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
1248            let result = apply_delta(&base_data, &delta_data)?;
1249            Ok((base_kind, result))
1250        }
1251    }
1252}
1253
1254/// Read an object from a pack file by its OID.
1255///
1256/// Searches the given pack index for the OID, then reads and decompresses
1257/// the object from the corresponding pack file, resolving delta chains.
1258///
1259/// # Errors
1260///
1261/// Returns [`Error::ObjectNotFound`] if the OID is not in this pack.
1262pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
1263    if idx.find_offset(oid).is_none() {
1264        return Err(Error::ObjectNotFound(oid.to_hex()));
1265    }
1266
1267    let pack_bytes = read_pack_bytes_cached(&idx.pack_path)?;
1268    read_object_from_pack_bytes(&pack_bytes, idx, oid.as_bytes().as_slice())
1269}
1270
1271/// Resolve an object from already-loaded pack bytes (used by `verify-pack`).
1272pub fn read_object_from_pack_bytes(
1273    pack_bytes: &[u8],
1274    idx: &PackIndex,
1275    oid: &[u8],
1276) -> Result<Object> {
1277    let entry = idx
1278        .entries
1279        .iter()
1280        .find(|e| e.oid.as_slice() == oid)
1281        .ok_or_else(|| Error::ObjectNotFound(oid_bytes_to_hex(oid)))?;
1282    let (kind, data) = read_pack_object_at(pack_bytes, entry.offset, idx, 0)?;
1283    Ok(Object::new(kind, data))
1284}
1285
1286/// Search all pack indexes in `objects_dir` for the given OID and read it.
1287///
1288/// # Errors
1289///
1290/// Returns [`Error::ObjectNotFound`] if no pack contains the OID.
1291pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
1292    let indexes = read_local_pack_indexes_cached(objects_dir)?;
1293    for idx in &indexes {
1294        if idx.find_offset(oid).is_some() {
1295            return read_object_from_pack(idx, oid);
1296        }
1297    }
1298    Err(Error::ObjectNotFound(oid.to_hex()))
1299}
1300
1301/// When `oid` is stored as a delta in a pack, return its delta base object id.
1302/// Returns [`None`] for loose objects and for non-delta packed objects.
1303/// If `oid` is stored as `REF_DELTA` or `OFS_DELTA` in a local pack and its base OID is in
1304/// `packed_set`, return the base OID and the **uncompressed** delta payload (Git binary delta).
1305///
1306/// Callers re-zlib when writing a new pack so we do not depend on copying raw deflate streams.
1307///
1308/// # Errors
1309///
1310/// Returns [`Error::CorruptObject`] when the pack stream is malformed.
1311pub fn packed_ref_delta_reuse_slice(
1312    objects_dir: &Path,
1313    oid: &ObjectId,
1314    packed_set: &HashSet<ObjectId>,
1315) -> Result<Option<(ObjectId, Vec<u8>)>> {
1316    let mut indexes = read_local_pack_indexes(objects_dir)?;
1317    sort_pack_indexes_oldest_first(&mut indexes);
1318    for idx in indexes {
1319        let Some(entry) = idx
1320            .entries
1321            .iter()
1322            .find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes().as_slice())
1323        else {
1324            continue;
1325        };
1326        let hb = idx.hash_bytes;
1327        if hb != 20 {
1328            continue;
1329        }
1330        let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1331        let mut p = entry.offset as usize;
1332        let (packed_type, _size) = parse_pack_object_header(&pack_bytes, &mut p)?;
1333        let base = match packed_type {
1334            PackedType::RefDelta => {
1335                if p + hb > pack_bytes.len() {
1336                    return Err(Error::CorruptObject(
1337                        "truncated ref-delta base oid while scanning for reuse".to_owned(),
1338                    ));
1339                }
1340                let bo = ObjectId::from_bytes(&pack_bytes[p..p + hb])?;
1341                p += hb;
1342                bo
1343            }
1344            PackedType::OfsDelta => {
1345                let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
1346                let Some(base_entry) = idx.entries.iter().find(|e| e.offset == base_off) else {
1347                    continue;
1348                };
1349                if base_entry.oid.len() != 20 {
1350                    continue;
1351                }
1352                ObjectId::from_bytes(base_entry.oid.as_slice())?
1353            }
1354            _ => {
1355                // Same OID may exist as a full object in an older pack and as a delta in a newer
1356                // one; keep scanning packs.
1357                continue;
1358            }
1359        };
1360        if !packed_set.contains(&base) {
1361            continue;
1362        }
1363        let zlib_start = p;
1364        let mut end_pos = zlib_start;
1365        if skip_one_pack_object(&pack_bytes, &mut end_pos, entry.offset, hb).is_err() {
1366            continue;
1367        }
1368        let compressed = &pack_bytes[zlib_start..end_pos];
1369        let mut dec = ZlibDecoder::new(compressed);
1370        let mut delta = Vec::new();
1371        if dec.read_to_end(&mut delta).is_err() {
1372            continue;
1373        }
1374        return Ok(Some((base, delta)));
1375    }
1376    Ok(None)
1377}
1378
1379/// Prefer older packs when the same OID exists as a full object in a fresh repack and as a delta
1380/// in an earlier thin pack (t5316).
1381fn sort_pack_indexes_oldest_first(indexes: &mut [PackIndex]) {
1382    indexes.sort_by(|a, b| {
1383        let ta = fs::metadata(&a.pack_path)
1384            .and_then(|m| m.modified())
1385            .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1386        let tb = fs::metadata(&b.pack_path)
1387            .and_then(|m| m.modified())
1388            .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1389        ta.cmp(&tb).then_with(|| a.pack_path.cmp(&b.pack_path))
1390    });
1391}
1392
1393fn sort_pack_indexes_newest_first(indexes: &mut [PackIndex]) {
1394    indexes.sort_by(|a, b| {
1395        let ta = fs::metadata(&a.pack_path)
1396            .and_then(|m| m.modified())
1397            .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1398        let tb = fs::metadata(&b.pack_path)
1399            .and_then(|m| m.modified())
1400            .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
1401        tb.cmp(&ta).then_with(|| b.pack_path.cmp(&a.pack_path))
1402    });
1403}
1404
1405pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
1406    let mut indexes = read_local_pack_indexes(objects_dir)?;
1407    sort_pack_indexes_newest_first(&mut indexes);
1408    for idx in &indexes {
1409        if idx.hash_bytes != 20 {
1410            continue;
1411        }
1412        let Some(entry) = idx
1413            .entries
1414            .iter()
1415            .find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes().as_slice())
1416        else {
1417            continue;
1418        };
1419        let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
1420        let mut p = entry.offset as usize;
1421        let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
1422        match packed_type {
1423            PackedType::RefDelta => {
1424                let hb = idx.hash_bytes;
1425                if p + hb > pack_bytes.len() {
1426                    return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
1427                }
1428                return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + hb])?));
1429            }
1430            PackedType::OfsDelta => {
1431                let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
1432                return Ok(idx
1433                    .entries
1434                    .iter()
1435                    .find(|e| e.offset == base_off)
1436                    .and_then(|e| ObjectId::from_bytes(e.oid.as_slice()).ok()));
1437            }
1438            _ => continue,
1439        }
1440    }
1441    Ok(None)
1442}
1443
1444fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
1445    let first = *bytes.get(*pos).ok_or_else(|| {
1446        Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
1447    })?;
1448    *pos += 1;
1449
1450    let type_code = (first >> 4) & 0x7;
1451    let mut size = (first & 0x0f) as u64;
1452    let mut shift = 4u32;
1453    let mut c = first;
1454    while (c & 0x80) != 0 {
1455        c = *bytes.get(*pos).ok_or_else(|| {
1456            Error::CorruptObject("unexpected end of variable size header".to_owned())
1457        })?;
1458        *pos += 1;
1459        size |= ((c & 0x7f) as u64) << shift;
1460        shift += 7;
1461    }
1462
1463    let packed_type = match type_code {
1464        1 => PackedType::Commit,
1465        2 => PackedType::Tree,
1466        3 => PackedType::Blob,
1467        4 => PackedType::Tag,
1468        6 => PackedType::OfsDelta,
1469        7 => PackedType::RefDelta,
1470        _ => {
1471            return Err(Error::CorruptObject(format!(
1472                "unsupported packed object type {}",
1473                type_code
1474            )))
1475        }
1476    };
1477    Ok((packed_type, size))
1478}
1479
1480/// Dependency of a packed delta object at `object_offset` within `pack_bytes`.
1481#[derive(Debug, Clone, Copy)]
1482pub enum PackedDeltaDependency {
1483    /// OFS_DELTA: base object offset within the same pack.
1484    OfsBase {
1485        /// Pack offset of the base object.
1486        base_offset: u64,
1487    },
1488    /// REF_DELTA: base object id (may live in another pack).
1489    RefBase {
1490        /// OID of the delta base.
1491        base_oid: ObjectId,
1492    },
1493}
1494
1495/// If the object at `object_offset` is a delta, return how it refers to its base.
1496pub fn read_packed_delta_dependency(
1497    pack_bytes: &[u8],
1498    object_offset: u64,
1499) -> Result<Option<PackedDeltaDependency>> {
1500    let mut pos = object_offset as usize;
1501    let (ty, _) = parse_pack_object_header(pack_bytes, &mut pos)?;
1502    match ty {
1503        PackedType::OfsDelta => {
1504            let base = parse_ofs_delta_base(pack_bytes, &mut pos, object_offset)?;
1505            Ok(Some(PackedDeltaDependency::OfsBase { base_offset: base }))
1506        }
1507        PackedType::RefDelta => {
1508            if pos + 20 > pack_bytes.len() {
1509                return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
1510            }
1511            let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
1512            Ok(Some(PackedDeltaDependency::RefBase { base_oid }))
1513        }
1514        _ => Ok(None),
1515    }
1516}
1517
1518fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
1519    let mut c = *bytes
1520        .get(*pos)
1521        .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
1522    *pos += 1;
1523    let mut value = (c & 0x7f) as u64;
1524    while (c & 0x80) != 0 {
1525        c = *bytes
1526            .get(*pos)
1527            .ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
1528        *pos += 1;
1529        value = ((value + 1) << 7) | (c & 0x7f) as u64;
1530    }
1531    this_offset
1532        .checked_sub(value)
1533        .ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
1534}
1535
1536/// Advance `pos` past one packed object (including zlib payload).
1537///
1538/// `object_start_offset` is the byte offset of this object within the pack file
1539/// (used for `OFS_DELTA` base resolution).
1540/// Raw bytes of one packed object (header + zlib payload) starting at `object_start_offset`.
1541///
1542/// `hash_bytes` is the ref-delta base OID width in this pack (`20` for SHA-1, `32` for SHA-256).
1543#[must_use]
1544pub fn slice_one_pack_object(
1545    bytes: &[u8],
1546    object_start_offset: u64,
1547    hash_bytes: usize,
1548) -> Result<&[u8]> {
1549    let start = object_start_offset as usize;
1550    let mut pos = start;
1551    skip_one_pack_object(bytes, &mut pos, object_start_offset, hash_bytes)?;
1552    Ok(&bytes[start..pos])
1553}
1554
1555pub fn skip_one_pack_object(
1556    bytes: &[u8],
1557    pos: &mut usize,
1558    object_start_offset: u64,
1559    hash_bytes: usize,
1560) -> Result<()> {
1561    let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
1562    match packed_type {
1563        PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
1564            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1565            let mut tmp = Vec::with_capacity(size as usize);
1566            dec.read_to_end(&mut tmp)
1567                .map_err(|e| Error::Zlib(e.to_string()))?;
1568            *pos += dec.total_in() as usize;
1569        }
1570        PackedType::RefDelta => {
1571            if *pos + hash_bytes > bytes.len() {
1572                return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
1573            }
1574            *pos += hash_bytes;
1575            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1576            let mut tmp = Vec::with_capacity(size as usize);
1577            dec.read_to_end(&mut tmp)
1578                .map_err(|e| Error::Zlib(e.to_string()))?;
1579            *pos += dec.total_in() as usize;
1580        }
1581        PackedType::OfsDelta => {
1582            let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
1583            let mut dec = ZlibDecoder::new(&bytes[*pos..]);
1584            let mut tmp = Vec::with_capacity(size as usize);
1585            dec.read_to_end(&mut tmp)
1586                .map_err(|e| Error::Zlib(e.to_string()))?;
1587            *pos += dec.total_in() as usize;
1588        }
1589    }
1590    Ok(())
1591}
1592
1593fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
1594    if bytes.len() < *pos + 4 {
1595        return Err(Error::CorruptObject(
1596            "unexpected end of idx while reading u32".to_owned(),
1597        ));
1598    }
1599    let v = u32::from_be_bytes(
1600        bytes[*pos..*pos + 4]
1601            .try_into()
1602            .map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
1603    );
1604    *pos += 4;
1605    Ok(v)
1606}
1607
1608fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
1609    if bytes.len() < *pos + 8 {
1610        return Err(Error::CorruptObject(
1611            "unexpected end of idx while reading u64".to_owned(),
1612        ));
1613    }
1614    let v = u64::from_be_bytes(
1615        bytes[*pos..*pos + 8]
1616            .try_into()
1617            .map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
1618    );
1619    *pos += 8;
1620    Ok(v)
1621}
1622
1623/// Read all object IDs from a `.idx` file.
1624pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
1625    let index = read_pack_index(idx_path)?;
1626    let mut out = Vec::new();
1627    for e in index.entries {
1628        if e.oid.len() == 20 {
1629            out.push(ObjectId::from_bytes(&e.oid)?);
1630        }
1631    }
1632    Ok(out)
1633}