Skip to main content

grit_lib/
midx.rs

1//! Multi-pack-index (MIDX) file writing and minimal reading.
2//!
3//! Writes a Git-compatible `multi-pack-index` file (version 1, SHA-1) covering
4//! selected `pack-*.idx` files. Objects that appear in multiple packs keep the
5//! preferred pack's copy when `preferred_pack_idx` is set (matching Git's
6//! geometric repack tests).
7//!
8//! Incremental writes follow Git's split layout: layers live under
9//! `pack/multi-pack-index.d/multi-pack-index-<sha1>.midx` with ordering in
10//! `multi-pack-index-chain` (oldest hash first, newest last).
11
12use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18use sha2::{Digest as Sha256Digest, Sha256};
19
20use crate::error::{Error, Result};
21use crate::objects::ObjectId;
22use crate::pack::{read_pack_index_no_verify, PackIndex};
23
24const MIDX_SIGNATURE: u32 = 0x4d49_4458;
25const MIDX_VERSION_V1: u8 = 1;
26const MIDX_VERSION_V2: u8 = 2;
27const HASH_VERSION_SHA1: u8 = 1;
28const HASH_VERSION_SHA256: u8 = 2;
29const MIDX_HEADER_SIZE: usize = 12;
30const CHUNK_TOC_ENTRY_SIZE: usize = 12;
31const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
32const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
33const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
34const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
35const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
36const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
37const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
38
39// Git `pack-revindex.h` / `pack-write.c` (standalone `.rev` next to MIDX).
40const RIDX_SIGNATURE: u32 = 0x5249_4458;
41const RIDX_VERSION: u32 = 1;
42const RIDX_HEADER_SIZE: usize = 12;
43const MIDX_CHUNK_ALIGNMENT: usize = 4;
44
45// `git midx.h` (MIDX_LARGE_OFFSET_NEEDED).
46const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
47
48struct MidxEntry {
49    oid: ObjectId,
50    pack_id: u32,
51    offset: u64,
52    pack_mtime: std::time::SystemTime,
53}
54
55/// Options for writing a multi-pack index (extension of the simple writer).
56#[derive(Debug, Clone, Default)]
57pub struct WriteMultiPackIndexOptions {
58    /// When set, objects also present in other packs are taken from this pack
59    /// (`pack_names` index in the sorted name list).
60    pub preferred_pack_idx: Option<u32>,
61    /// Basename of the preferred pack (e.g. `pack-abc.idx` or `pack-abc.pack`); resolved against
62    /// the working pack name list after optional subset filtering.
63    pub preferred_pack_name: Option<String>,
64    /// If set, only these `pack-*.idx` basenames are included, in this order (Git `--stdin-packs`).
65    pub pack_names_subset_ordered: Option<Vec<String>>,
66    /// When true, append RIDX + empty BTMP chunks so `test-tool read-midx --bitmap` succeeds.
67    pub write_bitmap_placeholders: bool,
68    /// When true, write a new layer in `multi-pack-index.d/` and extend the chain file
69    /// instead of replacing `pack/multi-pack-index`.
70    pub incremental: bool,
71    /// When true with [`Self::write_bitmap_placeholders`], also create an empty `.rev`
72    /// sidecar (Git `GIT_TEST_MIDX_WRITE_REV` compatibility).
73    pub write_rev_placeholder: bool,
74    /// On-disk MIDX format version to write (`1` or `2`). `None` writes the default (v2).
75    /// Set from `midx.version`.
76    pub version: Option<u8>,
77}
78
79fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
80    let t = raw.trim();
81    let t = std::path::Path::new(t)
82        .file_name()
83        .and_then(|s| s.to_str())
84        .unwrap_or(t);
85    let t = t.strip_prefix("./").unwrap_or(t);
86    if t.ends_with(".idx") {
87        Ok(t.to_string())
88    } else if t.ends_with(".pack") {
89        Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
90    } else {
91        Ok(format!("{t}.idx"))
92    }
93}
94
95/// Read a big-endian `u32` from `data` at byte offset `off`.
96///
97/// Returns [`Error::CorruptObject`] if `data` does not contain 4 bytes at `off`,
98/// replacing the previous fixed-width-slice `.try_into().unwrap()` with real
99/// bounds handling (the success-path value is unchanged).
100fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
101    let end = off.checked_add(4).filter(|&e| e <= data.len());
102    let Some(end) = end else {
103        return Err(Error::CorruptObject(
104            "truncated MIDX data reading u32".to_owned(),
105        ));
106    };
107    let bytes: [u8; 4] = data[off..end]
108        .try_into()
109        .map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
110    Ok(u32::from_be_bytes(bytes))
111}
112
113/// Read a big-endian `u64` from `data` at byte offset `off`.
114///
115/// Returns [`Error::CorruptObject`] if `data` does not contain 8 bytes at `off`,
116/// replacing the previous fixed-width-slice `.try_into().unwrap()` with real
117/// bounds handling (the success-path value is unchanged).
118fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
119    let end = off.checked_add(8).filter(|&e| e <= data.len());
120    let Some(end) = end else {
121        return Err(Error::CorruptObject(
122            "truncated MIDX data reading u64".to_owned(),
123        ));
124    };
125    let bytes: [u8; 8] = data[off..end]
126        .try_into()
127        .map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
128    Ok(u64::from_be_bytes(bytes))
129}
130
131struct MidxFileHeader {
132    num_chunks: u8,
133}
134
135fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
136    if data.len() < MIDX_HEADER_SIZE + 20 {
137        return Err(Error::CorruptObject("midx file too small".to_owned()));
138    }
139    let sig = read_be_u32(data, 0)?;
140    if sig != MIDX_SIGNATURE {
141        return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
142    }
143    let version = data[4];
144    if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
145        return Err(Error::CorruptObject(format!(
146            "multi-pack-index version {version} not recognized"
147        )));
148    }
149    let object_hash_bytes = data[5];
150    let num_chunks = data[6];
151    let _num_packs = read_be_u32(data, 8)?;
152    Ok((
153        MidxFileHeader { num_chunks },
154        MIDX_HEADER_SIZE,
155        object_hash_bytes,
156    ))
157}
158
159fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
160    let mut names = Vec::new();
161    let mut start = 0usize;
162    for (i, &b) in pn.iter().enumerate() {
163        if b == 0 && i >= start {
164            if i > start {
165                let s = std::str::from_utf8(&pn[start..i])
166                    .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
167                names.push(s.to_string());
168            }
169            start = i + 1;
170        }
171    }
172    Ok(names)
173}
174
175/// Compare a pack basename that may use `.pack` or `.idx` with an MIDX pack name (`.idx`).
176fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
177    let a = idx_or_pack_name.as_bytes();
178    let b = idx_name.as_bytes();
179    let mut i = 0usize;
180    let min = a.len().min(b.len());
181    while i < min && a[i] == b[i] {
182        i += 1;
183    }
184    let suf_a = &a[i..];
185    let suf_b = &b[i..];
186    if suf_b == b"idx" && suf_a == b"pack" {
187        return std::cmp::Ordering::Equal;
188    }
189    suf_a.cmp(suf_b)
190}
191
192fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
193    let mut best: Option<(usize, std::time::SystemTime)> = None;
194    for (i, n) in names.iter().enumerate() {
195        let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
196        let mtime = meta.modified().map_err(Error::Io)?;
197        match best {
198            None => best = Some((i, mtime)),
199            Some((_, t)) if mtime < t => best = Some((i, mtime)),
200            _ => {}
201        }
202    }
203    Ok(best.map(|(i, _)| i))
204}
205
206fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
207    pack_dir.join("multi-pack-index.d")
208}
209
210fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
211    midx_d_dir(pack_dir).join("multi-pack-index-chain")
212}
213
214fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
215    let path = chain_file_path(pack_dir);
216    let f = fs::File::open(&path).map_err(Error::Io)?;
217    let mut out = Vec::new();
218    for line in BufReader::new(f).lines() {
219        let line = line.map_err(Error::Io)?;
220        let t = line.trim();
221        if t.is_empty() {
222            continue;
223        }
224        if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
225            return Err(Error::CorruptObject(format!(
226                "invalid multi-pack-index chain line: {t}"
227            )));
228        }
229        out.push(t.to_ascii_lowercase());
230    }
231    Ok(out)
232}
233
234/// Resolve the path to the newest MIDX layer (root `multi-pack-index` or last chain entry).
235/// Return the MIDX hash-version byte expected for the repository owning `pack_dir`,
236/// mirroring git's `oid_version(r->hash_algo)` (SHA-1 → 1, SHA-256 → 2).
237///
238/// `pack_dir` is `<gitdir>/objects/pack`; the object format lives in the gitdir's
239/// `config` under `extensions.objectformat`. When the config cannot be read or the
240/// extension is absent, the default SHA-1 version (1) is returned.
241fn repo_midx_hash_version(pack_dir: &Path) -> u8 {
242    // pack_dir = <gitdir>/objects/pack -> gitdir = pack_dir/../..
243    let Some(objects_dir) = pack_dir.parent() else {
244        return HASH_VERSION_SHA1;
245    };
246    repo_midx_hash_version_for_objects_dir(objects_dir)
247}
248
249// ── Process-lifetime MIDX read cache ─────────────────────────────────
250//
251// `try_read_object_via_midx` / `midx_oid_listed_in_tip` run once per object
252// lookup, and each used to re-read the entire multi-pack-index file, re-parse
253// the referenced pack `.idx`, and re-scan `[extensions] objectformat` from the
254// repo config. History walks paid for it per object (`log --stat` issued ~90
255// full MIDX reads per commit). Cache the MIDX bytes keyed by path and the
256// sniffed hash version keyed by config path, both revalidated with stat
257// stamps (mtime + size, recorded before the read) on every access. In-process
258// MIDX writers evict their pack dir, closing the same-mtime-tick rewrite
259// window; C git opens the MIDX once per process with no revalidation at all,
260// so serving a stamped copy is strictly more conservative than upstream.
261mod midx_cache {
262    use crate::error::{Error, Result};
263    use std::collections::HashMap;
264    use std::fs;
265    use std::path::{Path, PathBuf};
266    use std::sync::{Arc, Mutex, OnceLock};
267    use std::time::SystemTime;
268
269    type Stamp = (SystemTime, u64);
270
271    #[derive(Default)]
272    struct State {
273        bytes: HashMap<PathBuf, (Stamp, Arc<Vec<u8>>)>,
274        hash_version: HashMap<PathBuf, (Option<Stamp>, u8)>,
275    }
276
277    static CACHE: OnceLock<Mutex<State>> = OnceLock::new();
278
279    fn lock() -> std::sync::MutexGuard<'static, State> {
280        CACHE
281            .get_or_init(|| Mutex::new(State::default()))
282            .lock()
283            .unwrap_or_else(std::sync::PoisonError::into_inner)
284    }
285
286    fn stamp(path: &Path) -> Option<Stamp> {
287        let m = fs::metadata(path).ok()?;
288        Some((m.modified().unwrap_or(SystemTime::UNIX_EPOCH), m.len()))
289    }
290
291    /// MIDX file bytes, re-read from disk only when the file's stamp changes.
292    pub fn get_bytes(path: &Path) -> Result<Arc<Vec<u8>>> {
293        let sig = stamp(path);
294        if let Some(sig) = sig {
295            let g = lock();
296            if let Some((s, b)) = g.bytes.get(path) {
297                if *s == sig {
298                    return Ok(Arc::clone(b));
299                }
300            }
301        }
302        let data = Arc::new(fs::read(path).map_err(Error::Io)?);
303        if let Some(sig) = sig {
304            lock()
305                .bytes
306                .insert(path.to_path_buf(), (sig, Arc::clone(&data)));
307        }
308        Ok(data)
309    }
310
311    /// Cached `[extensions] objectformat` sniff keyed by the config path,
312    /// re-computed only when the config file's stamp changes (an absent
313    /// config is cached too, stamped as `None`).
314    pub fn hash_version(config_path: &Path, compute: impl FnOnce() -> u8) -> u8 {
315        let sig = stamp(config_path);
316        {
317            let g = lock();
318            if let Some((s, v)) = g.hash_version.get(config_path) {
319                if *s == sig {
320                    return *v;
321                }
322            }
323        }
324        let v = compute();
325        lock()
326            .hash_version
327            .insert(config_path.to_path_buf(), (sig, v));
328        v
329    }
330
331    /// Drop cached MIDX bytes under `pack_dir` (called by in-process writers).
332    pub fn evict_pack_dir(pack_dir: &Path) {
333        lock().bytes.retain(|p, _| !p.starts_with(pack_dir));
334    }
335}
336
337/// Like [`repo_midx_hash_version`] but starting from the `objects` directory.
338/// The config sniff is cached per config path with stat-stamp revalidation
339/// (see [`midx_cache`]).
340fn repo_midx_hash_version_for_objects_dir(objects_dir: &Path) -> u8 {
341    let Some(gitdir) = objects_dir.parent() else {
342        return HASH_VERSION_SHA1;
343    };
344    let config_path = gitdir.join("config");
345    midx_cache::hash_version(&config_path, || {
346        sniff_objectformat_hash_version(&config_path)
347    })
348}
349
350/// Uncached `[extensions] objectformat` scan of one config file.
351fn sniff_objectformat_hash_version(config_path: &Path) -> u8 {
352    let Ok(text) = fs::read_to_string(config_path) else {
353        return HASH_VERSION_SHA1;
354    };
355    // Minimal scan for `[extensions]` ... `objectformat = sha256`. Section and key
356    // names are case-insensitive in git config; values are case-sensitive but git
357    // only accepts the literals "sha1"/"sha256".
358    let mut in_extensions = false;
359    for raw in text.lines() {
360        let line = raw.trim();
361        if line.starts_with('[') {
362            let section = line.trim_start_matches('[').trim_end_matches(']');
363            let name = section.split_whitespace().next().unwrap_or("");
364            in_extensions = name.eq_ignore_ascii_case("extensions");
365            continue;
366        }
367        if !in_extensions {
368            continue;
369        }
370        if let Some((key, value)) = line.split_once('=') {
371            if key.trim().eq_ignore_ascii_case("objectformat")
372                && value.trim().eq_ignore_ascii_case("sha256")
373            {
374                return HASH_VERSION_SHA256;
375            }
376        }
377    }
378    HASH_VERSION_SHA1
379}
380
381pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
382    let root = pack_dir.join("multi-pack-index");
383    if root.exists() {
384        return Some(root);
385    }
386    let hashes = read_chain_layer_hashes(pack_dir).ok()?;
387    let last = hashes.last()?;
388    Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
389}
390
391/// Resolve a specific MIDX layer file by its lowercase hex checksum. Searches the
392/// incremental chain (`multi-pack-index.d/multi-pack-index-<hash>.midx`) and the
393/// single-file root MIDX. Returns `None` when no layer matches that checksum.
394pub fn resolve_midx_layer_path(pack_dir: &Path, checksum: &str) -> Option<std::path::PathBuf> {
395    let checksum = checksum.to_ascii_lowercase();
396    if let Ok(hashes) = read_chain_layer_hashes(pack_dir) {
397        if hashes.contains(&checksum) {
398            return Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{checksum}.midx")));
399        }
400    }
401    let root = pack_dir.join("multi-pack-index");
402    if root.exists() {
403        if let Ok(hex) = midx_checksum_hex_from_path(&root) {
404            if hex == checksum {
405                return Some(root);
406            }
407        }
408    }
409    None
410}
411
412fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
413    let data = fs::read(path).map_err(Error::Io)?;
414    let _ = parse_midx_header(&data)?;
415    Ok(data)
416}
417
418/// OID width implied by a MIDX file's header hash-version byte (`data[5]`):
419/// 2 → SHA-256 (32 bytes), anything else → SHA-1 (20 bytes).
420fn midx_hash_len(data: &[u8]) -> usize {
421    if data.len() > 5 && data[5] == 2 {
422        32
423    } else {
424        20
425    }
426}
427
428fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
429    let hash_len = midx_hash_len(data);
430    let (_, hdr_end, _) = parse_midx_header(data)?;
431    let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
432    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
433    let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
434    let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
435    let num_objects = ooff_len / 8;
436    if oidl_len != num_objects * hash_len {
437        return Err(Error::CorruptObject(
438            "MIDX oid-lookup size mismatch".to_owned(),
439        ));
440    }
441    let mut oids = HashSet::with_capacity(num_objects);
442    for i in 0..num_objects {
443        let start = oidl_off + i * hash_len;
444        let oid = ObjectId::from_bytes(&data[start..start + hash_len])?;
445        oids.insert(oid);
446    }
447    Ok((oids, pack_names))
448}
449
450fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
451    let mut oids = HashSet::new();
452    let mut packs = HashSet::new();
453    let root = pack_dir.join("multi-pack-index");
454    let chain_path = chain_file_path(pack_dir);
455    if chain_path.exists() {
456        for h in read_chain_layer_hashes(pack_dir)? {
457            let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
458            let data = load_midx_file(&p)?;
459            let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
460            oids.extend(layer_oids);
461            for n in names {
462                packs.insert(n);
463            }
464        }
465        return Ok((oids, packs));
466    }
467    if root.exists() {
468        let data = load_midx_file(&root)?;
469        let (o, names) = oids_and_packs_from_midx_data(&data)?;
470        oids = o;
471        for n in names {
472            packs.insert(n);
473        }
474    }
475    Ok((oids, packs))
476}
477
478fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
479    let data = fs::read(path).map_err(Error::Io)?;
480    if data.len() < 20 {
481        return Err(Error::CorruptObject(
482            "midx too small for checksum".to_owned(),
483        ));
484    }
485    let hash = &data[data.len() - 20..];
486    Ok(hex::encode(hash))
487}
488
489fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
490    let _ = fs::remove_file(dst);
491    if fs::hard_link(src, dst).is_ok() {
492        return Ok(());
493    }
494    fs::copy(src, dst).map_err(Error::Io)?;
495    Ok(())
496}
497
498fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
499    let midx_d = midx_d_dir(pack_dir);
500    fs::create_dir_all(&midx_d).map_err(Error::Io)?;
501    let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
502    hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
503    let exts = ["bitmap", "rev"];
504    for ext in exts {
505        let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
506        if src.exists() {
507            let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
508            hard_link_or_copy(&src, &dst)?;
509        }
510    }
511    Ok(())
512}
513
514fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
515    let midx_d = midx_d_dir(pack_dir);
516    if !midx_d.exists() {
517        return Ok(());
518    }
519    let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
520    for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
521        let ent = ent.map_err(Error::Io)?;
522        let name = ent.file_name().to_string_lossy().to_string();
523        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
524            continue;
525        };
526        let Some((hash_part, _ext)) = rest.split_once('.') else {
527            continue;
528        };
529        if hash_part.len() == 40 && !keep.contains(hash_part) {
530            let _ = fs::remove_file(ent.path());
531        }
532    }
533    Ok(())
534}
535
536/// Remove every incremental MIDX layer file (`multi-pack-index-<hash>.midx`,
537/// `.bitmap`, `.rev`) from `multi-pack-index.d/` and unlink the chain file, but
538/// leave the (now empty) directory in place.
539///
540/// This mirrors git's `clear_incremental_midx_files_ext` plus the chain unlink in
541/// `clear_midx_files` for a non-incremental write: git iterates the directory and
542/// `unlink`s the matching files individually and never `rmdir`s the directory, so
543/// a single-file MIDX write leaves an empty `multi-pack-index.d/` behind rather
544/// than removing it (see t5334 "convert incremental to non-incremental").
545fn clear_incremental_midx_files(pack_dir: &Path) -> Result<()> {
546    let midx_d = midx_d_dir(pack_dir);
547    // Unlink the chain file regardless of whether other entries remain.
548    let _ = fs::remove_file(chain_file_path(pack_dir));
549    if !midx_d.exists() {
550        return Ok(());
551    }
552    for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
553        let ent = ent.map_err(Error::Io)?;
554        let name = ent.file_name().to_string_lossy().to_string();
555        if name.starts_with("multi-pack-index-")
556            && (name.ends_with(".midx") || name.ends_with(".bitmap") || name.ends_with(".rev"))
557        {
558            let _ = fs::remove_file(ent.path());
559        }
560    }
561    Ok(())
562}
563
564fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
565    fs::metadata(&idx.pack_path)
566        .and_then(|m| m.modified())
567        .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
568}
569
570fn midx_pick_better_entry(
571    cur: &MidxEntry,
572    cand_pack: u32,
573    cand_offset: u64,
574    cand_mtime: std::time::SystemTime,
575    preferred_pack: Option<u32>,
576) -> bool {
577    let cur_pref = preferred_pack == Some(cur.pack_id);
578    let new_pref = preferred_pack == Some(cand_pack);
579    if new_pref && !cur_pref {
580        return true;
581    }
582    if cur_pref && !new_pref {
583        return false;
584    }
585    match cand_mtime.cmp(&cur.pack_mtime) {
586        std::cmp::Ordering::Greater => true,
587        std::cmp::Ordering::Less => false,
588        std::cmp::Ordering::Equal => {
589            if cand_pack != cur.pack_id {
590                cand_pack < cur.pack_id
591            } else {
592                cand_offset < cur.offset
593            }
594        }
595    }
596}
597
598/// Build a MIDX layer's bytes, omitting objects whose OID is present in
599/// `exclude_oids` (the base chain for incremental layers and compaction, where
600/// objects already provided by a lower layer must not be repeated). Pass `None`
601/// for a full (non-incremental) MIDX.
602#[allow(clippy::too_many_arguments)]
603fn build_midx_bytes_filtered(
604    idx_names: &[String],
605    indexes: &[PackIndex],
606    preferred_idx: Option<usize>,
607    write_bitmap_placeholders: bool,
608    omit_embedded_ridx_chunk: bool,
609    version: u8,
610    hash_version: u8,
611    exclude_oids: Option<&HashSet<ObjectId>>,
612) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
613    // OID width implied by the MIDX hash version (1 → SHA-1/20, 2 → SHA-256/32).
614    let hash_len = if hash_version == 2 { 32 } else { 20 };
615    let preferred_pack_idx = preferred_idx.map(|p| p as u32);
616    let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
617
618    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
619    for (pack_id, idx) in indexes.iter().enumerate() {
620        let pack_id = u32::try_from(pack_id).map_err(|_| {
621            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
622        })?;
623        let mtime = pack_mtimes[pack_id as usize];
624        for e in &idx.entries {
625            if e.oid.len() != hash_len {
626                continue;
627            }
628            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
629                continue;
630            };
631            if let Some(ex) = exclude_oids {
632                if ex.contains(&oid) {
633                    continue;
634                }
635            }
636            let cand = MidxEntry {
637                oid,
638                pack_id,
639                offset: e.offset,
640                pack_mtime: mtime,
641            };
642            match best.get(&oid) {
643                None => {
644                    best.insert(oid, cand);
645                }
646                Some(cur) => {
647                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
648                        best.insert(oid, cand);
649                    }
650                }
651            }
652        }
653    }
654
655    let mut entries: Vec<MidxEntry> = best.into_values().collect();
656    entries.sort_by_key(|a| a.oid);
657
658    // Decide how object offsets are encoded, mirroring git/midx-write.c.
659    // `large_offsets_needed` becomes true only when some offset cannot fit in a
660    // 32-bit field (> 0xffffffff); in that mode every offset that does not fit in
661    // 31 bits (> 0x7fffffff) is stored in the 64-bit large-offset (LOFF) chunk and
662    // its 32-bit slot is `MIDX_LARGE_OFFSET_NEEDED | slot`. When no offset exceeds
663    // 32 bits, offsets in [2^31, 2^32) are written directly as raw 32-bit values
664    // and no LOFF chunk is emitted.
665    let large_offsets_needed = entries.iter().any(|e| e.offset > u64::from(u32::MAX));
666
667    let num_packs = indexes.len() as u32;
668
669    let mut pack_names_blob = Vec::new();
670    for name in idx_names {
671        pack_names_blob.extend_from_slice(name.as_bytes());
672        pack_names_blob.push(0);
673    }
674    let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
675        % MIDX_CHUNK_ALIGNMENT;
676    pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
677    let chunk_pnam = pack_names_blob;
678
679    let mut chunk_oidf = vec![0u8; 256 * 4];
680    let mut j = 0usize;
681    for i in 0..256 {
682        while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
683            j += 1;
684        }
685        chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
686    }
687
688    let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
689    for e in &entries {
690        chunk_oidl.extend_from_slice(e.oid.as_bytes());
691    }
692
693    let mut large_offsets: Vec<u64> = Vec::new();
694    let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
695    for e in &entries {
696        chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
697        let encoded = if large_offsets_needed && e.offset >> 31 != 0 {
698            let slot = u32::try_from(large_offsets.len()).map_err(|_| {
699                Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
700            })?;
701            large_offsets.push(e.offset);
702            MIDX_LARGE_OFFSET_NEEDED | slot
703        } else {
704            // When large offsets are not needed, an offset in [2^31, 2^32) is
705            // written verbatim (truncation via `as u32` is exact here because the
706            // value fits in 32 bits).
707            e.offset as u32
708        };
709        chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
710    }
711
712    let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
713        Vec::new()
714    } else {
715        let mut v = Vec::with_capacity(large_offsets.len() * 8);
716        for off in &large_offsets {
717            v.extend_from_slice(&off.to_be_bytes());
718        }
719        v
720    };
721
722    let pref = preferred_pack_idx;
723    let mut order: Vec<u32> = (0..entries.len() as u32).collect();
724    order.sort_by(|&ai, &bi| {
725        let a = &entries[ai as usize];
726        let b = &entries[bi as usize];
727        let a_pref = pref == Some(a.pack_id);
728        let b_pref = pref == Some(b.pack_id);
729        b_pref
730            .cmp(&a_pref)
731            .then_with(|| a.pack_id.cmp(&b.pack_id))
732            .then_with(|| a.offset.cmp(&b.offset))
733            .then_with(|| ai.cmp(&bi))
734    });
735
736    let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
737    for oid_idx in &order {
738        chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
739    }
740
741    // BTMP: per-pack (bitmap_pos, bitmap_nr) in the pseudo-bitmap namespace, matching Git's
742    // `write_midx_bitmapped_packs` (cumulative start + object count per pack).
743    let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
744        Some(order.clone())
745    } else {
746        None
747    };
748    let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
749        // Per-pack `(bitmap_pos, bitmap_nr)`: position of the pack's first object in
750        // the MIDX pack-order traversal and the number of (deduplicated) MIDX objects
751        // selected from that pack — matching `write_midx_bitmapped_packs` in
752        // git/midx-write.c (counts MIDX entries per pack, not raw idx entry counts).
753        let num_packs_usize = indexes.len();
754        let mut bitmap_pos = vec![u32::MAX; num_packs_usize];
755        let mut bitmap_nr = vec![0u32; num_packs_usize];
756        for (rank, &oid_idx) in order.iter().enumerate() {
757            let pack = entries[oid_idx as usize].pack_id as usize;
758            if let Some(p) = bitmap_pos.get_mut(pack) {
759                if *p == u32::MAX {
760                    *p = rank as u32;
761                }
762            }
763            if let Some(n) = bitmap_nr.get_mut(pack) {
764                *n += 1;
765            }
766        }
767        let mut v = Vec::new();
768        for pack in 0..num_packs_usize {
769            let pos = if bitmap_pos[pack] == u32::MAX {
770                0
771            } else {
772                bitmap_pos[pack]
773            };
774            v.extend_from_slice(&pos.to_be_bytes());
775            v.extend_from_slice(&bitmap_nr[pack].to_be_bytes());
776        }
777        let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
778        v.extend(std::iter::repeat_n(0u8, pad));
779        v
780    } else {
781        Vec::new()
782    };
783
784    let mut chunks: Vec<(u32, Vec<u8>)> = vec![
785        (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
786        (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
787        (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
788        (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
789    ];
790    if !chunk_loff.is_empty() {
791        chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
792    }
793    if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
794        chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
795    }
796    if write_bitmap_placeholders {
797        chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
798    }
799
800    let num_chunks: u8 = chunks
801        .len()
802        .try_into()
803        .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
804
805    let mut body = Vec::new();
806    let mut cur_offset =
807        MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
808
809    for (id, data) in &chunks {
810        body.extend_from_slice(&id.to_be_bytes());
811        body.extend_from_slice(&cur_offset.to_be_bytes());
812        cur_offset += data.len() as u64;
813    }
814    body.extend_from_slice(&0u32.to_be_bytes());
815    body.extend_from_slice(&cur_offset.to_be_bytes());
816
817    for (_, data) in &chunks {
818        body.extend_from_slice(data);
819    }
820
821    let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
822    out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
823    out.push(if version == MIDX_VERSION_V1 {
824        MIDX_VERSION_V1
825    } else {
826        MIDX_VERSION_V2
827    });
828    out.push(hash_version);
829    out.push(num_chunks);
830    out.push(0);
831    out.extend_from_slice(&num_packs.to_be_bytes());
832    out.extend_from_slice(&body);
833
834    // Trailing checksum matches the MIDX hash version (SHA-1 for 1, SHA-256 for 2).
835    if hash_version == 2 {
836        let mut hasher = Sha256::new();
837        Sha256Digest::update(&mut hasher, &out);
838        out.extend_from_slice(&hasher.finalize());
839    } else {
840        let mut hasher = Sha1::new();
841        hasher.update(&out);
842        out.extend_from_slice(&hasher.finalize());
843    }
844
845    Ok((out, rev_sidecar_order))
846}
847
848/// Standalone MIDX `.rev` file (Git `write_rev_file_order` / `RIDX_SIGNATURE`).
849///
850/// `midx_file_hash` is the MIDX's own trailing checksum (20 bytes for SHA-1, 32
851/// for SHA-256); its width selects the RIDX hash-id (1 or 2).
852fn write_midx_rev_sidecar(path: &Path, pack_order: &[u32], midx_file_hash: &[u8]) -> Result<()> {
853    let hash_id: u32 = if midx_file_hash.len() == 32 { 2 } else { 1 };
854    let mut body =
855        Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + midx_file_hash.len());
856    body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
857    body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
858    body.extend_from_slice(&hash_id.to_be_bytes());
859    for idx in pack_order {
860        body.extend_from_slice(&idx.to_be_bytes());
861    }
862    body.extend_from_slice(midx_file_hash);
863    fs::write(path, body).map_err(Error::Io)
864}
865
866fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
867    let (hdr, _, _) = parse_midx_header(data)?;
868    let n = hdr.num_chunks as usize;
869    let pos = header_end;
870    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
871    if data.len() < toc_end + 20 {
872        return Err(Error::CorruptObject(
873            "truncated MIDX chunk table".to_owned(),
874        ));
875    }
876    for i in 0..n {
877        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
878        let id = read_be_u32(data, base)?;
879        let off = read_be_u64(data, base + 4)? as usize;
880        if id == chunk_id {
881            let next_off = if i + 1 < n {
882                let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
883                read_be_u64(data, nb + 4)? as usize
884            } else {
885                let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
886                read_be_u64(data, term + 4)? as usize
887            };
888            return Ok((off, next_off.saturating_sub(off)));
889        }
890    }
891    Err(Error::CorruptObject(format!(
892        "MIDX chunk {chunk_id:08x} not found"
893    )))
894}
895
896/// A fatal MIDX parse failure (Git `die()` in `load_multi_pack_index`). The
897/// contained message is the exact text Git prints, without the `error:`/`fatal:`
898/// prefix.
899#[derive(Debug, Clone)]
900pub struct MidxLoadError(pub String);
901
902impl std::fmt::Display for MidxLoadError {
903    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
904        write!(f, "{}", self.0)
905    }
906}
907
908/// Parsed table-of-contents entry: `(chunk_id, file_offset)`.
909struct TocEntry {
910    id: u32,
911    offset: usize,
912}
913
914/// Walk the MIDX chunk table of contents, mirroring `read_table_of_contents`
915/// in `git/chunk-format.c`. Returns the chunk list plus any reported errors,
916/// or a fatal `MidxLoadError` for the conditions Git treats as `die()`-worthy.
917fn parse_midx_toc(
918    data: &[u8],
919    hash_len: usize,
920    errors: &mut Vec<String>,
921) -> std::result::Result<Vec<TocEntry>, MidxLoadError> {
922    if data.len() < MIDX_HEADER_SIZE + hash_len {
923        return Err(MidxLoadError("multi-pack-index file too small".to_owned()));
924    }
925    let num_chunks = data[6] as usize;
926    let toc_off = MIDX_HEADER_SIZE;
927    let needed = toc_off + (num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE;
928    if data.len() < needed {
929        return Err(MidxLoadError(
930            "multi-pack-index chunk table is truncated".to_owned(),
931        ));
932    }
933    let file_size = data.len();
934    let mut chunks: Vec<TocEntry> = Vec::with_capacity(num_chunks);
935
936    let read_be64 = |off: usize| -> u64 {
937        let mut b = [0u8; 8];
938        b.copy_from_slice(&data[off..off + 8]);
939        u64::from_be_bytes(b)
940    };
941    let read_be32 = |off: usize| -> u32 {
942        let mut b = [0u8; 4];
943        b.copy_from_slice(&data[off..off + 4]);
944        u32::from_be_bytes(b)
945    };
946
947    for i in 0..num_chunks {
948        let entry = toc_off + i * CHUNK_TOC_ENTRY_SIZE;
949        let chunk_id = read_be32(entry);
950        let chunk_offset = read_be64(entry + 4);
951
952        if chunk_id == 0 {
953            errors.push("terminating chunk id appears earlier than expected".to_owned());
954            return Err(MidxLoadError(
955                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
956            ));
957        }
958        if !(chunk_offset as usize).is_multiple_of(MIDX_CHUNK_ALIGNMENT) {
959            errors.push(format!(
960                "chunk id {chunk_id:x} not {MIDX_CHUNK_ALIGNMENT}-byte aligned"
961            ));
962            return Err(MidxLoadError(
963                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
964            ));
965        }
966
967        let next_entry = toc_off + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
968        let next_chunk_offset = read_be64(next_entry + 4);
969
970        if next_chunk_offset < chunk_offset
971            || next_chunk_offset > (file_size as u64).saturating_sub(hash_len as u64)
972        {
973            errors.push(format!(
974                "improper chunk offset(s) {chunk_offset:x} and {next_chunk_offset:x}"
975            ));
976            return Err(MidxLoadError(
977                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
978            ));
979        }
980
981        if chunks.iter().any(|c| c.id == chunk_id) {
982            errors.push(format!("duplicate chunk ID {chunk_id:x} found"));
983            return Err(MidxLoadError(
984                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
985            ));
986        }
987
988        chunks.push(TocEntry {
989            id: chunk_id,
990            offset: chunk_offset as usize,
991        });
992    }
993
994    // Terminating TOC entry must have chunk id 0.
995    let term_entry = toc_off + num_chunks * CHUNK_TOC_ENTRY_SIZE;
996    let final_id = read_be32(term_entry);
997    if final_id != 0 {
998        errors.push(format!("final chunk has non-zero id {final_id:x}"));
999        return Err(MidxLoadError(
1000            "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
1001        ));
1002    }
1003
1004    // Record the terminator offset as a sentinel (id 0) so the final real chunk's
1005    // length is taken from the table — not a hash-width-dependent file-size guess.
1006    let term_offset = read_be64(term_entry + 4) as usize;
1007    chunks.push(TocEntry {
1008        id: 0,
1009        offset: term_offset,
1010    });
1011
1012    Ok(chunks)
1013}
1014
1015/// Look up `(start, len)` of a chunk in a parsed TOC.
1016fn toc_chunk_range(chunks: &[TocEntry], data_len: usize, id: u32) -> Option<(usize, usize)> {
1017    for (i, c) in chunks.iter().enumerate() {
1018        if c.id == id {
1019            let next = if i + 1 < chunks.len() {
1020                chunks[i + 1].offset
1021            } else {
1022                data_len.saturating_sub(20)
1023            };
1024            return Some((c.offset, next.saturating_sub(c.offset)));
1025        }
1026    }
1027    None
1028}
1029
1030/// Full multi-pack-index verification, mirroring `verify_midx_file` in `git/midx.c`
1031/// plus the `die()`/`error()` conditions in `load_multi_pack_index`. On any problem
1032/// returns the list of error lines (without `error:`/`fatal:` prefixes) in the order
1033/// Git emits them; an empty list means the MIDX is valid.
1034///
1035/// `objects_dir` is the object database (e.g. `.git/objects`).
1036pub fn verify_midx(objects_dir: &Path) -> std::result::Result<(), Vec<String>> {
1037    let pack_dir = objects_dir.join("pack");
1038    let path = match resolve_tip_midx_path(&pack_dir) {
1039        Some(p) => p,
1040        None => return Ok(()),
1041    };
1042    let data = match fs::read(&path) {
1043        Ok(d) => d,
1044        Err(_) => return Ok(()),
1045    };
1046
1047    let mut fatal: Vec<String> = Vec::new();
1048    let mut errors: Vec<String> = Vec::new();
1049
1050    // --- header checks (load_multi_pack_index) ---
1051    if data.len() < MIDX_HEADER_SIZE + 20 {
1052        return Err(vec!["multi-pack-index file is too small".to_owned()]);
1053    }
1054    let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
1055    if sig != MIDX_SIGNATURE {
1056        return Err(vec![format!(
1057            "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
1058        )]);
1059    }
1060    let version = data[4];
1061    if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
1062        return Err(vec![format!(
1063            "multi-pack-index version {version} not recognized"
1064        )]);
1065    }
1066    let hash_version = data[5];
1067    let expected_hash_version = repo_midx_hash_version_for_objects_dir(objects_dir);
1068    if hash_version != expected_hash_version {
1069        return Err(vec![format!(
1070            "multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
1071        )]);
1072    }
1073    let hash_len = if hash_version == 2 { 32usize } else { 20usize };
1074    let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1075
1076    // --- table of contents ---
1077    let chunks = match parse_midx_toc(&data, hash_len, &mut errors) {
1078        Ok(c) => c,
1079        Err(e) => {
1080            errors.push(e.0);
1081            return Err(errors);
1082        }
1083    };
1084
1085    // required pack-names chunk
1086    let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
1087    else {
1088        errors.push("multi-pack-index required pack-name chunk missing or corrupted".to_owned());
1089        return Err(errors);
1090    };
1091
1092    // oid-fanout chunk + ordering check
1093    let Some((fan_off, fan_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
1094    else {
1095        errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
1096        return Err(errors);
1097    };
1098    if fan_len != 256 * 4 {
1099        errors.push("multi-pack-index OID fanout is of the wrong size".to_owned());
1100        errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
1101        return Err(errors);
1102    }
1103    let fanout = |i: usize| -> u32 {
1104        let b = fan_off + i * 4;
1105        u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
1106    };
1107    for i in 0..255 {
1108        let f1 = fanout(i);
1109        let f2 = fanout(i + 1);
1110        if f1 > f2 {
1111            errors.push(format!(
1112                "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
1113                i + 1
1114            ));
1115            errors
1116                .push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
1117            return Err(errors);
1118        }
1119    }
1120    let num_objects = fanout(255) as usize;
1121
1122    // oid-lookup chunk (size depends on num_objects)
1123    let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
1124    else {
1125        errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1126        return Err(errors);
1127    };
1128    if oidl_len != hash_len * num_objects {
1129        errors.push("multi-pack-index OID lookup chunk is the wrong size".to_owned());
1130        errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1131        return Err(errors);
1132    }
1133
1134    // object-offsets chunk
1135    let Some((ooff_off, ooff_len)) =
1136        toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1137    else {
1138        errors
1139            .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1140        return Err(errors);
1141    };
1142    if ooff_len != num_objects * 8 {
1143        errors.push("multi-pack-index object offset chunk is the wrong size".to_owned());
1144        errors
1145            .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1146        return Err(errors);
1147    }
1148
1149    let large_off = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1150
1151    // pack names: parse and (for V1) verify ordering.
1152    let names = match parse_pack_names_blob(&data[pn_off..pn_off + pn_len]) {
1153        Ok(n) => n,
1154        Err(_) => {
1155            errors.push("multi-pack-index pack-name chunk is too short".to_owned());
1156            return Err(errors);
1157        }
1158    };
1159    if version == MIDX_VERSION_V1 {
1160        for i in 1..names.len() {
1161            if names[i] <= names[i - 1] {
1162                fatal.push(format!(
1163                    "multi-pack-index pack names out of order: '{}' before '{}'",
1164                    names[i - 1],
1165                    names[i]
1166                ));
1167                // Git die()s here while loading; surface immediately.
1168                errors.extend(fatal);
1169                return Err(errors);
1170            }
1171        }
1172    }
1173
1174    // --- checksum ---
1175    if !midx_checksum_is_valid(&data) {
1176        errors.push("incorrect checksum".to_owned());
1177    }
1178
1179    // --- load each referenced pack (failed to load pack) ---
1180    let mut pack_indexes: Vec<Option<PackIndex>> = Vec::with_capacity(num_packs);
1181    for i in 0..num_packs {
1182        // Load the pack idx without verifying its trailing checksum: `git
1183        // multi-pack-index verify` uses `open_pack_index`, which only parses the
1184        // index header/tables. The 64-bit-offset tests deliberately corrupt a
1185        // pack `.idx` (invalidating its checksum) and still expect the MIDX
1186        // verify to read recorded offsets out of that idx for comparison.
1187        let loaded = match names.get(i) {
1188            Some(name) => read_pack_index_no_verify(&pack_dir.join(name)).ok(),
1189            None => None,
1190        };
1191        if loaded.is_none() {
1192            errors.push(format!("failed to load pack in position {i}"));
1193        }
1194        pack_indexes.push(loaded);
1195    }
1196
1197    if num_objects == 0 {
1198        errors.push("the midx contains no oid".to_owned());
1199        if errors.is_empty() {
1200            return Ok(());
1201        }
1202        return Err(errors);
1203    }
1204
1205    // --- OID lookup order ---
1206    let oid_at =
1207        |i: usize| -> &[u8] { &data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len] };
1208    for i in 0..num_objects.saturating_sub(1) {
1209        let a = oid_at(i);
1210        let b = oid_at(i + 1);
1211        if a >= b {
1212            errors.push(format!(
1213                "oid lookup out of order: oid[{i}] = {} >= {} = oid[{}]",
1214                hex::encode(a),
1215                hex::encode(b),
1216                i + 1
1217            ));
1218        }
1219    }
1220
1221    // --- object offsets vs pack index ---
1222    for i in 0..num_objects {
1223        let ob = ooff_off + i * 8;
1224        let pack_int_id = u32::from_be_bytes([data[ob], data[ob + 1], data[ob + 2], data[ob + 3]]);
1225        let off_raw = u32::from_be_bytes([data[ob + 4], data[ob + 5], data[ob + 6], data[ob + 7]]);
1226        let oid_hex = hex::encode(oid_at(i));
1227
1228        if pack_int_id as usize >= num_packs {
1229            errors.push(format!(
1230                "bad pack-int-id: {pack_int_id} ({num_packs} total packs)"
1231            ));
1232            errors.push(format!(
1233                "failed to load pack entry for oid[{i}] = {oid_hex}"
1234            ));
1235            continue;
1236        }
1237
1238        // resolve MIDX-recorded offset (handle large offsets)
1239        let m_offset: u64 = if off_raw & MIDX_LARGE_OFFSET_NEEDED != 0 {
1240            let slot = (off_raw & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1241            match large_off {
1242                Some((lo_off, lo_len)) if (slot + 1) * 8 <= lo_len => {
1243                    let b = lo_off + slot * 8;
1244                    let mut arr = [0u8; 8];
1245                    arr.copy_from_slice(&data[b..b + 8]);
1246                    u64::from_be_bytes(arr)
1247                }
1248                _ => {
1249                    errors.push("multi-pack-index large offset out of bounds".to_owned());
1250                    continue;
1251                }
1252            }
1253        } else {
1254            u64::from(off_raw)
1255        };
1256
1257        let Some(Some(idx)) = pack_indexes.get(pack_int_id as usize) else {
1258            errors.push(format!(
1259                "failed to load pack entry for oid[{i}] = {oid_hex}"
1260            ));
1261            continue;
1262        };
1263        let Ok(oid) = ObjectId::from_bytes(oid_at(i)) else {
1264            errors.push(format!(
1265                "failed to load pack entry for oid[{i}] = {oid_hex}"
1266            ));
1267            continue;
1268        };
1269        match idx.find_offset(&oid) {
1270            Some(p_offset) => {
1271                if m_offset != p_offset {
1272                    errors.push(format!(
1273                        "incorrect object offset for oid[{i}] = {oid_hex}: {m_offset:x} != {p_offset:x}"
1274                    ));
1275                }
1276            }
1277            None => {
1278                errors.push(format!(
1279                    "failed to load pack entry for oid[{i}] = {oid_hex}"
1280                ));
1281            }
1282        }
1283    }
1284
1285    if errors.is_empty() {
1286        Ok(())
1287    } else {
1288        Err(errors)
1289    }
1290}
1291
1292/// Validate the trailing checksum of an in-memory MIDX image, using the
1293/// algorithm implied by the header hash version (SHA-1 or SHA-256).
1294fn midx_checksum_is_valid(data: &[u8]) -> bool {
1295    let hash_len = midx_hash_len(data);
1296    if data.len() < hash_len {
1297        return false;
1298    }
1299    let body = &data[..data.len() - hash_len];
1300    let stored = &data[data.len() - hash_len..];
1301    if hash_len == 32 {
1302        let mut hasher = Sha256::new();
1303        Sha256Digest::update(&mut hasher, body);
1304        hasher.finalize().as_slice() == stored
1305    } else {
1306        let mut hasher = Sha1::new();
1307        hasher.update(body);
1308        hasher.finalize().as_slice() == stored
1309    }
1310}
1311
1312/// Return the `pack-*.idx` basename for the MIDX preferred pack (RIDX position 0).
1313///
1314/// `objects_dir` is the repository object database (e.g. `.git/objects`), not `objects/pack`.
1315///
1316/// Used by `test-tool read-midx --preferred-pack` compatibility.
1317/// Pack index basenames (`pack-*.idx`) stored in the MIDX pack-names chunk.
1318pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
1319    let pack_dir = objects_dir.join("pack");
1320    let path = resolve_tip_midx_path(&pack_dir)
1321        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1322    let data = fs::read(&path).map_err(Error::Io)?;
1323    let (_, hdr_end, _) = parse_midx_header(&data)?;
1324    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1325    parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
1326}
1327
1328/// A single MIDX-referenced object together with the pack it is attributed to.
1329pub struct MidxObjectRef {
1330    pub oid: ObjectId,
1331    /// Index into the pack-names list returned alongside this.
1332    pub pack_int_id: usize,
1333}
1334
1335/// Read the tip MIDX and return `(pack_names, objects)`, where each object names
1336/// the pack it is attributed to (`pack_int_id`). Mirrors the per-object
1337/// `nth_midxed_pack_int_id` iteration in Git used by expire/repack.
1338pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
1339    let pack_dir = objects_dir.join("pack");
1340    let path = resolve_tip_midx_path(&pack_dir)
1341        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1342    let data = fs::read(&path).map_err(Error::Io)?;
1343    let (_, hdr_end, _) = parse_midx_header(&data)?;
1344    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1345    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1346    let hash_len = midx_hash_len(&data);
1347    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1348    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1349    if oidl_len % hash_len != 0 || ooff_len % 8 != 0 {
1350        return Err(Error::CorruptObject(
1351            "bad MIDX oid-lookup / object-offsets size".to_owned(),
1352        ));
1353    }
1354    let num = oidl_len / hash_len;
1355    if num * 8 != ooff_len {
1356        return Err(Error::CorruptObject(
1357            "MIDX oid count does not match object-offsets".to_owned(),
1358        ));
1359    }
1360    let mut objects = Vec::with_capacity(num);
1361    for i in 0..num {
1362        let oid = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])
1363            .map_err(|e| Error::CorruptObject(e.to_string()))?;
1364        let base = ooff_off + i * 8;
1365        let pack_id = read_be_u32(&data, base)? as usize;
1366        objects.push(MidxObjectRef {
1367            oid,
1368            pack_int_id: pack_id,
1369        });
1370    }
1371    Ok((names, objects))
1372}
1373
1374/// Trailing 40-character SHA-1 hex of the active MIDX (root or chain tip).
1375pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
1376    let pack_dir = objects_dir.join("pack");
1377    let path = resolve_tip_midx_path(&pack_dir)
1378        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1379    midx_checksum_hex_from_path(&path)
1380}
1381
1382/// Resolve the MIDX file to read for `test-tool read-midx`: a specific layer when
1383/// `checksum` is `Some`, otherwise the chain tip / root MIDX. A checksum that does
1384/// not name any layer yields a `could not find MIDX with checksum` error matching
1385/// git's `test-read-midx.c`.
1386fn resolve_read_midx_path(pack_dir: &Path, checksum: Option<&str>) -> Result<std::path::PathBuf> {
1387    match checksum {
1388        Some(cs) => resolve_midx_layer_path(pack_dir, cs)
1389            .ok_or_else(|| Error::CorruptObject(format!("could not find MIDX with checksum {cs}"))),
1390        None => resolve_tip_midx_path(pack_dir)
1391            .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned())),
1392    }
1393}
1394
1395/// Human-readable dump of the MIDX (matches `test-tool read-midx` layout closely enough for grep-based tests).
1396/// Emit one line per MIDX object: `{oid} {offset}\t{pack-idx-name}` (matches Git `test-read-midx.c`).
1397pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
1398    format_midx_show_objects_layer(objects_dir, None)
1399}
1400
1401/// Like [`format_midx_show_objects`] but reads a specific layer by checksum.
1402pub fn format_midx_show_objects_layer(
1403    objects_dir: &Path,
1404    checksum: Option<&str>,
1405) -> Result<String> {
1406    let mut out = format_midx_dump_layer(objects_dir, checksum)?;
1407    let pack_dir = objects_dir.join("pack");
1408    let path = resolve_read_midx_path(&pack_dir, checksum)?;
1409    let data = fs::read(&path).map_err(Error::Io)?;
1410    let (_, hdr_end, _) = parse_midx_header(&data)?;
1411    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1412    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1413    let hash_len = midx_hash_len(&data);
1414    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1415    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1416    if oidl_len % hash_len != 0 || ooff_len % 8 != 0 {
1417        return Err(Error::CorruptObject(
1418            "bad MIDX oid-lookup / object-offsets size".to_owned(),
1419        ));
1420    }
1421    let num = oidl_len / hash_len;
1422    if num * 8 != ooff_len {
1423        return Err(Error::CorruptObject(
1424            "MIDX oid count does not match object-offsets".to_owned(),
1425        ));
1426    }
1427    for i in 0..num {
1428        let oid = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])
1429            .map_err(|e| Error::CorruptObject(e.to_string()))?;
1430        let base = ooff_off + i * 8;
1431        let pack_id = read_be_u32(&data, base)? as usize;
1432        let offset = u64::from(read_be_u32(&data, base + 4)?);
1433        let idx_name = names
1434            .get(pack_id)
1435            .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
1436        // Match `test-read-midx.c`, which prints `e.p->pack_name`: the full pack
1437        // path `<object-dir>/pack/<stem>.pack`. A relative object dir gets a `./`
1438        // prefix (Git `relative_path`).
1439        let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1440        let dir_disp = objects_dir.display().to_string();
1441        let dir_disp = if objects_dir.is_absolute() || dir_disp.starts_with("./") {
1442            dir_disp
1443        } else {
1444            format!("./{dir_disp}")
1445        };
1446        out.push_str(&format!(
1447            "{} {}\t{}/pack/{}.pack\n",
1448            oid.to_hex(),
1449            offset,
1450            dir_disp,
1451            stem
1452        ));
1453    }
1454    Ok(out)
1455}
1456
1457pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
1458    format_midx_dump_layer(objects_dir, None)
1459}
1460
1461/// Like [`format_midx_dump`] but reads a specific layer by checksum (chain layer or
1462/// root MIDX). Used by `test-tool read-midx <object-dir> <checksum>`.
1463pub fn format_midx_dump_layer(objects_dir: &Path, checksum: Option<&str>) -> Result<String> {
1464    let pack_dir = objects_dir.join("pack");
1465    let path = resolve_read_midx_path(&pack_dir, checksum)?;
1466    let data = fs::read(&path).map_err(Error::Io)?;
1467    let (hdr, hdr_end, _) = parse_midx_header(&data)?;
1468    let sig = read_be_u32(&data, 0)?;
1469    let version = data[4];
1470    // The C `read-midx` test tool prints `m->hash_len`, the raw hash length
1471    // (20 for SHA-1, 32 for SHA-256), not the on-disk hash-version byte.
1472    let hash_len: u8 = match data[5] {
1473        1 => 20,
1474        2 => 32,
1475        other => other,
1476    };
1477    let num_chunks = hdr.num_chunks;
1478    let num_packs = read_be_u32(&data, 8)?;
1479
1480    let mut chunk_tags: Vec<&'static str> = Vec::new();
1481    let n = num_chunks as usize;
1482    let pos = hdr_end;
1483    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
1484    if data.len() < toc_end + 20 {
1485        return Err(Error::CorruptObject(
1486            "truncated MIDX chunk table".to_owned(),
1487        ));
1488    }
1489    for i in 0..n {
1490        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
1491        let id = read_be_u32(&data, base)?;
1492        let tag = match id {
1493            x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
1494            x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
1495            x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
1496            x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
1497            x if x == MIDX_CHUNKID_LARGEOFFSETS => "large-offsets",
1498            x if x == MIDX_CHUNKID_REVINDEX => "revindex",
1499            x if x == 0x4254_4d50 => "bitmapped-packs",
1500            _ => "unknown",
1501        };
1502        chunk_tags.push(tag);
1503    }
1504
1505    let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1506    let num_objects = ooff_len / 8;
1507
1508    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1509    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1510
1511    let mut out = String::new();
1512    out.push_str(&format!(
1513        "header: {:08x} {} {} {} {}\n",
1514        sig, version, hash_len, num_chunks, num_packs
1515    ));
1516    out.push_str("chunks:");
1517    for t in &chunk_tags {
1518        out.push(' ');
1519        out.push_str(t);
1520    }
1521    out.push('\n');
1522    out.push_str(&format!("num_objects: {num_objects}\n"));
1523    out.push_str("packs:\n");
1524    for n in &pack_names {
1525        out.push_str(n);
1526        out.push('\n');
1527    }
1528    out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
1529    Ok(out)
1530}
1531
1532/// OID rows from the active multi-pack-index, plus reverse-index order for pack-reuse bitmap bits.
1533///
1534/// Git assigns each object a **global bitmap bit** equal to its position in the MIDX reverse index
1535/// (`RIDX` chunk) traversal order — not its position in the pack `.idx` file. Helpers on this struct
1536/// map [`ObjectId`] → global bit the same way as `midx-write.c` (`midx_pack_order`).
1537#[derive(Debug, Clone)]
1538pub struct MidxReuseTables {
1539    /// OIDs in MIDX lexicographic order (same order as the OID lookup chunk).
1540    pub oids: Vec<ObjectId>,
1541    /// `(pack_int_id, in-pack offset)` parallel to `oids`.
1542    pub pack_and_offset: Vec<(u32, u64)>,
1543    /// `rid_order[rank]` is the OID-table index of the object at global bitmap rank `rank`.
1544    pub rid_order: Vec<u32>,
1545    /// Inverse map: global bitmap rank for each OID-table index.
1546    pub oid_idx_to_rank: Vec<u32>,
1547}
1548
1549/// Load OID / object-offset / reverse-index tables from the tip MIDX (root or chain tip).
1550///
1551/// Returns [`None`] when there is no MIDX or no `RIDX` chunk (no pseudo-bitmap ordering).
1552pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
1553    let pack_dir = objects_dir.join("pack");
1554    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1555        return Ok(None);
1556    };
1557    let data = fs::read(&path).map_err(Error::Io)?;
1558    let hash_len = midx_hash_len(&data);
1559    let (_, hdr_end, _) = parse_midx_header(&data)?;
1560    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1561    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1562    let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
1563        return Ok(None);
1564    };
1565    if oid_l_len % hash_len != 0 || ooff_len != oid_l_len / hash_len * 8 {
1566        return Err(Error::CorruptObject(
1567            "MIDX OID / offset chunk size mismatch".to_owned(),
1568        ));
1569    }
1570    let num_objects = oid_l_len / hash_len;
1571    if ridx_len != num_objects.saturating_mul(4) {
1572        return Err(Error::CorruptObject(
1573            "MIDX reverse index length does not match object count".to_owned(),
1574        ));
1575    }
1576    if num_objects == 0 {
1577        return Ok(None);
1578    }
1579
1580    let mut oids = Vec::with_capacity(num_objects);
1581    for i in 0..num_objects {
1582        let base = oidl_off + i * hash_len;
1583        oids.push(ObjectId::from_bytes(&data[base..base + hash_len])?);
1584    }
1585
1586    let mut pack_and_offset = Vec::with_capacity(num_objects);
1587    for i in 0..num_objects {
1588        let ob = ooff_off + i * 8;
1589        let pack_id = read_be_u32(&data, ob)?;
1590        let off32 = read_be_u32(&data, ob + 4)?;
1591        pack_and_offset.push((pack_id, u64::from(off32)));
1592    }
1593
1594    let mut rid_order = Vec::with_capacity(num_objects);
1595    for i in 0..num_objects {
1596        let base = ridx_off + i * 4;
1597        rid_order.push(read_be_u32(&data, base)?);
1598    }
1599
1600    let mut oid_idx_to_rank = vec![0u32; num_objects];
1601    for (rank, &oid_idx) in rid_order.iter().enumerate() {
1602        let idx = usize::try_from(oid_idx)
1603            .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
1604        if idx >= num_objects {
1605            return Err(Error::CorruptObject(
1606                "MIDX reverse index out of range".to_owned(),
1607            ));
1608        }
1609        oid_idx_to_rank[idx] = u32::try_from(rank)
1610            .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
1611    }
1612
1613    Ok(Some(MidxReuseTables {
1614        oids,
1615        pack_and_offset,
1616        rid_order,
1617        oid_idx_to_rank,
1618    }))
1619}
1620
1621impl MidxReuseTables {
1622    /// Global pseudo-bitmap index for `oid`, or [`None`] if the object is not in this MIDX.
1623    #[must_use]
1624    pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
1625        let oid_idx = self.oids.binary_search(oid).ok()?;
1626        Some(self.oid_idx_to_rank[oid_idx])
1627    }
1628
1629    /// MIDX-canonical pack id for `oid` (the single copy the MIDX selected after deduplication),
1630    /// or [`None`] if the object is not in this MIDX. Used to reject cross-pack delta reuse: a
1631    /// delta is only reusable verbatim when its base resolves to the *same* pack the delta lives
1632    /// in, mirroring Git's `midx_pair_to_pack_pos` check in `try_partial_reuse`.
1633    #[must_use]
1634    pub fn canonical_pack(&self, oid: &ObjectId) -> Option<u32> {
1635        let oid_idx = self.oids.binary_search(oid).ok()?;
1636        Some(self.pack_and_offset[oid_idx].0)
1637    }
1638}
1639
1640/// One pack's slice of the MIDX pseudo-bitmap namespace (`BTMP` chunk).
1641#[derive(Debug, Clone, Copy)]
1642pub struct MidxBtmpPackRange {
1643    /// Pack index in the MIDX pack-names list.
1644    pub pack_id: u32,
1645    /// First bit index assigned to this pack (cumulative object order).
1646    pub bitmap_pos: u32,
1647    /// Number of objects in this pack (same as `.idx` entry count).
1648    pub bitmap_nr: u32,
1649}
1650
1651/// Read per-pack `(bitmap_pos, bitmap_nr)` from the active MIDX `BTMP` chunk.
1652///
1653/// Returns an empty vector when the MIDX has no bitmapped-packs chunk.
1654pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
1655    let pack_dir = objects_dir.join("pack");
1656    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1657        return Ok(Vec::new());
1658    };
1659    let data = fs::read(&path).map_err(Error::Io)?;
1660    let (_, hdr_end, _) = parse_midx_header(&data)?;
1661    let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1662        return Ok(Vec::new());
1663    };
1664    if btmp_len == 0 || btmp_len % 8 != 0 {
1665        return Err(Error::CorruptObject(
1666            "invalid MIDX BTMP chunk length".to_owned(),
1667        ));
1668    }
1669    let num_packs = read_be_u32(&data, 8)?;
1670    let n_entries = btmp_len / 8;
1671    if u32::try_from(n_entries).ok() != Some(num_packs) {
1672        return Err(Error::CorruptObject(
1673            "MIDX BTMP entry count does not match num_packs".to_owned(),
1674        ));
1675    }
1676    let mut out = Vec::with_capacity(n_entries);
1677    for i in 0..n_entries {
1678        let base = btmp_off + i * 8;
1679        let bitmap_pos = read_be_u32(&data, base)?;
1680        let bitmap_nr = read_be_u32(&data, base + 4)?;
1681        out.push(MidxBtmpPackRange {
1682            pack_id: u32::try_from(i)
1683                .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
1684            bitmap_pos,
1685            bitmap_nr,
1686        });
1687    }
1688    Ok(out)
1689}
1690
1691/// Format `test-tool read-midx --bitmap` output for the active MIDX: per pack, a
1692/// line with `<pack>.pack`, then `  bitmap_pos:` and `  bitmap_nr:`. Returns an
1693/// error whose message is `MIDX does not contain the BTMP chunk` when the MIDX has
1694/// no `BTMP` chunk (mirrors `nth_bitmapped_pack` in git/midx.c).
1695pub fn format_midx_bitmapped_packs(objects_dir: &Path) -> Result<String> {
1696    let pack_dir = objects_dir.join("pack");
1697    let path = resolve_tip_midx_path(&pack_dir)
1698        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1699    let data = fs::read(&path).map_err(Error::Io)?;
1700    let (_, hdr_end, _) = parse_midx_header(&data)?;
1701    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1702    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1703    let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1704        return Err(Error::CorruptObject(
1705            "MIDX does not contain the BTMP chunk".to_owned(),
1706        ));
1707    };
1708    let n_entries = btmp_len / 8;
1709    let mut out = String::new();
1710    for i in 0..n_entries {
1711        let base = btmp_off + i * 8;
1712        let bitmap_pos = read_be_u32(&data, base)?;
1713        let bitmap_nr = read_be_u32(&data, base + 4)?;
1714        let idx_name = names.get(i).ok_or_else(|| {
1715            Error::CorruptObject("BTMP entry has no corresponding pack name".to_owned())
1716        })?;
1717        let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1718        out.push_str(&format!("{stem}.pack\n"));
1719        out.push_str(&format!("  bitmap_pos: {bitmap_pos}\n"));
1720        out.push_str(&format!("  bitmap_nr: {bitmap_nr}\n"));
1721    }
1722    Ok(out)
1723}
1724
1725/// Look up which pack and in-pack offset holds `oid` according to the active MIDX.
1726pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
1727    let pack_dir = objects_dir.join("pack");
1728    let path = resolve_tip_midx_path(&pack_dir)
1729        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1730    let data = fs::read(&path).map_err(Error::Io)?;
1731    let hash_len = midx_hash_len(&data);
1732    let (_, hdr_end, _) = parse_midx_header(&data)?;
1733    let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1734    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1735    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1736    if fanout_len != 256 * 4 || oid_l_len % hash_len != 0 || ooff_len != oid_l_len / hash_len * 8 {
1737        return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
1738    }
1739    let num_objects = oid_l_len / hash_len;
1740    let first = oid.as_bytes()[0] as usize;
1741    let j0 = if first == 0 {
1742        0usize
1743    } else {
1744        read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
1745    };
1746    let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
1747    let mut lo = j0;
1748    let mut hi = j1;
1749    while lo < hi {
1750        let mid = (lo + hi) / 2;
1751        let base = oidl_off + mid * hash_len;
1752        let cmp = data[base..base + hash_len].cmp(oid.as_bytes());
1753        if cmp == std::cmp::Ordering::Less {
1754            lo = mid + 1;
1755        } else {
1756            hi = mid;
1757        }
1758    }
1759    if lo >= num_objects {
1760        return Err(Error::CorruptObject(format!(
1761            "object {} not in multi-pack-index",
1762            oid.to_hex()
1763        )));
1764    }
1765    let base = oidl_off + lo * hash_len;
1766    if data[base..base + hash_len] != *oid.as_bytes() {
1767        return Err(Error::CorruptObject(format!(
1768            "object {} not in multi-pack-index",
1769            oid.to_hex()
1770        )));
1771    }
1772    let ob = ooff_off + lo * 8;
1773    let pack_id = read_be_u32(&data, ob)?;
1774    let off32 = read_be_u32(&data, ob + 4)?;
1775    Ok((pack_id, u64::from(off32)))
1776}
1777
1778/// Returns whether `oid` appears in the active MIDX OID table for `objects_dir`.
1779///
1780/// [`None`] means there is no MIDX at the pack tip. [`Some`] is the lookup result when a MIDX exists.
1781pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
1782    let pack_dir = objects_dir.join("pack");
1783    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1784        return Ok(None);
1785    };
1786    let data = midx_cache::get_bytes(&midx_path)?;
1787    let hash_len = midx_hash_len(&data);
1788    let MidxReadView {
1789        oidf_off,
1790        oidl_off,
1791        num_objects,
1792        ..
1793    } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1794        MidxLoadResult::Ok(v) => v,
1795        MidxLoadResult::Skip => return Ok(None),
1796    };
1797
1798    let first = oid.as_bytes()[0] as usize;
1799    let lo = if first == 0 {
1800        0u32
1801    } else {
1802        read_be_u32(&data, oidf_off + (first - 1) * 4)?
1803    };
1804    let hi = read_be_u32(&data, oidf_off + first * 4)?;
1805
1806    let mut i = lo as usize;
1807    while i < hi as usize && i < num_objects {
1808        let o = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])?;
1809        match o.cmp(oid) {
1810            std::cmp::Ordering::Equal => return Ok(Some(true)),
1811            std::cmp::Ordering::Greater => return Ok(Some(false)),
1812            std::cmp::Ordering::Less => i += 1,
1813        }
1814    }
1815    Ok(Some(false))
1816}
1817
1818/// Chunk offsets and metadata of a successfully loaded MIDX, ready for object reads.
1819struct MidxReadView {
1820    oidf_off: usize,
1821    oidl_off: usize,
1822    ooff_off: usize,
1823    loff: Option<(usize, usize)>,
1824    num_objects: usize,
1825    pack_names: Vec<String>,
1826}
1827
1828enum MidxLoadResult {
1829    Ok(MidxReadView),
1830    /// The MIDX is unusable but not fatal (Git returns NULL and falls back to packs);
1831    /// an `error:`/`warning:` line has already been printed.
1832    Skip,
1833}
1834
1835/// Print a recoverable MIDX `error:`/`warning:` line at most once per process.
1836///
1837/// Git loads the MIDX once and caches it, so a recoverable corruption is reported a
1838/// single time. grit re-reads the MIDX per object lookup, so without deduping the same
1839/// line would repeat; this guard restores the single-report behavior the tests expect.
1840fn midx_warn_once(line: &str) {
1841    use std::sync::Mutex;
1842    use std::sync::OnceLock;
1843    static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
1844    let seen = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
1845    if let Ok(mut set) = seen.lock() {
1846        if set.insert(line.to_string()) {
1847            eprintln!("{line}");
1848        }
1849    } else {
1850        eprintln!("{line}");
1851    }
1852}
1853
1854/// Print Git-style `error:`/`fatal:` lines and exit 128, mirroring `die()` after the
1855/// preceding `error()` calls. `lines` are printed as `error:` except the last as `fatal:`.
1856fn midx_die(lines: &[&str]) -> ! {
1857    use std::io::Write;
1858    let mut err = std::io::stderr().lock();
1859    let n = lines.len();
1860    for (i, l) in lines.iter().enumerate() {
1861        if i + 1 == n {
1862            let _ = writeln!(err, "fatal: {l}");
1863        } else {
1864            let _ = writeln!(err, "error: {l}");
1865        }
1866    }
1867    let _ = err.flush();
1868    std::process::exit(128);
1869}
1870
1871/// Validate and load a MIDX image for object reads, mirroring `load_multi_pack_index`
1872/// in git/midx.c. Fatal corruptions print `error:`/`fatal:` and exit (Git `die()`);
1873/// recoverable corruptions print an `error:`/`warning:` and return [`MidxLoadResult::Skip`].
1874fn midx_load_for_read(data: &[u8], expected_hash_version: u8) -> MidxLoadResult {
1875    if data.len() < MIDX_HEADER_SIZE + 20 {
1876        return MidxLoadResult::Skip;
1877    }
1878    let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
1879    if sig != MIDX_SIGNATURE {
1880        midx_die(&[&format!(
1881            "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
1882        )]);
1883    }
1884    let version = data[4];
1885    if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
1886        midx_die(&[&format!(
1887            "multi-pack-index version {version} not recognized"
1888        )]);
1889    }
1890    let hash_version = data[5];
1891    if hash_version != expected_hash_version {
1892        // `load_multi_pack_index` error()s then `goto cleanup_fail` (returns NULL),
1893        // so this is recoverable, not fatal. The expected version is the repository's
1894        // own `oid_version(hash_algo)` (SHA-1 → 1, SHA-256 → 2).
1895        midx_warn_once(&format!(
1896            "error: multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
1897        ));
1898        return MidxLoadResult::Skip;
1899    }
1900    let hash_len = if hash_version == 2 { 32usize } else { 20usize };
1901    let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1902
1903    // Table of contents (chunk-format.c read_table_of_contents). Recoverable failures
1904    // (unaligned / improper offset / duplicate / non-zero terminator) print error() and
1905    // return NULL.
1906    let mut toc_errors: Vec<String> = Vec::new();
1907    let chunks = match parse_midx_toc(data, hash_len, &mut toc_errors) {
1908        Ok(c) => c,
1909        Err(_) => {
1910            for e in &toc_errors {
1911                midx_warn_once(&format!("error: {e}"));
1912            }
1913            return MidxLoadResult::Skip;
1914        }
1915    };
1916
1917    // Required pack-names chunk.
1918    let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
1919    else {
1920        midx_die(&["multi-pack-index required pack-name chunk missing or corrupted"]);
1921    };
1922
1923    // Required oid-fanout chunk + size + ordering (midx_read_oid_fanout).
1924    let Some((oidf_off, oidf_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
1925    else {
1926        midx_die(&["multi-pack-index required OID fanout chunk missing or corrupted"]);
1927    };
1928    if oidf_len != 256 * 4 {
1929        midx_die(&[
1930            "multi-pack-index OID fanout is of the wrong size",
1931            "multi-pack-index required OID fanout chunk missing or corrupted",
1932        ]);
1933    }
1934    let fanout = |i: usize| -> u32 {
1935        let b = oidf_off + i * 4;
1936        u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
1937    };
1938    for i in 0..255 {
1939        let f1 = fanout(i);
1940        let f2 = fanout(i + 1);
1941        if f1 > f2 {
1942            midx_die(&[
1943                &format!(
1944                    "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
1945                    i + 1
1946                ),
1947                "multi-pack-index required OID fanout chunk missing or corrupted",
1948            ]);
1949        }
1950    }
1951    let num_objects = fanout(255) as usize;
1952
1953    // Required oid-lookup chunk + size (midx_read_oid_lookup).
1954    let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
1955    else {
1956        midx_die(&["multi-pack-index required OID lookup chunk missing or corrupted"]);
1957    };
1958    if oidl_len != hash_len * num_objects {
1959        midx_die(&[
1960            "multi-pack-index OID lookup chunk is the wrong size",
1961            "multi-pack-index required OID lookup chunk missing or corrupted",
1962        ]);
1963    }
1964
1965    // Required object-offsets chunk + size (midx_read_object_offsets).
1966    let Some((ooff_off, ooff_len)) =
1967        toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1968    else {
1969        midx_die(&["multi-pack-index required object offsets chunk missing or corrupted"]);
1970    };
1971    if ooff_len != num_objects * 8 {
1972        midx_die(&[
1973            "multi-pack-index object offset chunk is the wrong size",
1974            "multi-pack-index required object offsets chunk missing or corrupted",
1975        ]);
1976    }
1977
1978    let loff = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1979
1980    // Optional revindex chunk — wrong size warns but does not fail the load.
1981    if let Some((_, rlen)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_REVINDEX) {
1982        if rlen != num_objects * 4 {
1983            midx_warn_once("error: multi-pack-index reverse-index chunk is the wrong size");
1984            midx_warn_once("warning: multi-pack bitmap is missing required reverse index");
1985        }
1986    }
1987
1988    // Pack-name parsing (die if a name is unterminated).
1989    let mut pack_names: Vec<String> = Vec::with_capacity(num_packs);
1990    let blob = &data[pn_off..pn_off + pn_len];
1991    let mut start = 0usize;
1992    for _ in 0..num_packs {
1993        let Some(rel) = blob[start..].iter().position(|&b| b == 0) else {
1994            midx_die(&["multi-pack-index pack-name chunk is too short"]);
1995        };
1996        let name = match std::str::from_utf8(&blob[start..start + rel]) {
1997            Ok(s) => s.to_string(),
1998            Err(_) => midx_die(&["multi-pack-index pack-name chunk is too short"]),
1999        };
2000        if version == MIDX_VERSION_V1
2001            && !pack_names.is_empty()
2002            && name.as_str() <= pack_names.last().map(|s| s.as_str()).unwrap_or("")
2003        {
2004            midx_die(&[&format!(
2005                "multi-pack-index pack names out of order: '{}' before '{name}'",
2006                pack_names.last().cloned().unwrap_or_default()
2007            )]);
2008        }
2009        pack_names.push(name);
2010        start += rel + 1;
2011    }
2012
2013    MidxLoadResult::Ok(MidxReadView {
2014        oidf_off,
2015        oidl_off,
2016        ooff_off,
2017        loff,
2018        num_objects,
2019        pack_names,
2020    })
2021}
2022
2023/// Eagerly validate that every pack named by the active MIDX has a readable `.idx`.
2024///
2025/// Mirrors git/packfile.c `open_pack_index`: when `prepare_packed_git` registers the
2026/// packs the MIDX references, a pack whose `.idx` cannot be opened (truncated/corrupt)
2027/// triggers `error: packfile <pack> index unavailable`. Git reports this once because the
2028/// MIDX/pack store is prepared a single time; this routine reproduces that even when the
2029/// object that triggered the read is found loose (so it never reaches the per-object MIDX
2030/// lookup). Runs at most once per process per `objects_dir`.
2031pub fn validate_midx_referenced_packs(objects_dir: &Path) {
2032    use std::sync::Mutex;
2033    use std::sync::OnceLock;
2034    static DONE: OnceLock<Mutex<HashSet<std::path::PathBuf>>> = OnceLock::new();
2035    let done = DONE.get_or_init(|| Mutex::new(HashSet::new()));
2036    if let Ok(mut set) = done.lock() {
2037        if !set.insert(objects_dir.to_path_buf()) {
2038            return;
2039        }
2040    }
2041
2042    let pack_dir = objects_dir.join("pack");
2043    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
2044        return;
2045    };
2046    let Ok(data) = fs::read(&midx_path) else {
2047        return;
2048    };
2049    let MidxReadView { pack_names, .. } =
2050        match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
2051            MidxLoadResult::Ok(v) => v,
2052            MidxLoadResult::Skip => return,
2053        };
2054    for idx_name in &pack_names {
2055        let idx_path = pack_dir.join(idx_name);
2056        // A MIDX may name a pack whose files were later deleted; Git skips the missing
2057        // pack silently (it is not "unavailable", just gone). Only a present-but-corrupt
2058        // idx produces the "index unavailable" error.
2059        if !idx_path.exists() {
2060            continue;
2061        }
2062        // Match Git's `open_pack_index`, which parses the idx header/tables but does
2063        // not verify the trailing checksum: a structurally valid idx with a stale
2064        // checksum (the 64-bit-offset tests corrupt one offset byte in place) loads
2065        // fine and must NOT be reported "unavailable". Only an unparseable idx
2066        // (e.g. truncated, as in `corrupt idx reports errors`) is unavailable.
2067        if crate::pack::read_pack_index_no_verify(&idx_path).is_err() {
2068            let mut pack_path = idx_path.clone();
2069            pack_path.set_extension("pack");
2070            midx_warn_once(&format!(
2071                "error: packfile {} index unavailable",
2072                pack_path.display()
2073            ));
2074        }
2075    }
2076}
2077
2078/// When `core.multiPackIndex` is enabled, try to read `oid` from the active MIDX in `objects_dir`.
2079///
2080/// Returns [`None`] when no MIDX exists or `oid` is not listed. Returns [`Some(Err(..))`] when the
2081/// MIDX is present but malformed (callers surface Git-style `error:` / `fatal:` messages).
2082pub fn try_read_object_via_midx(
2083    objects_dir: &Path,
2084    oid: &ObjectId,
2085) -> Result<Option<crate::objects::Object>> {
2086    let pack_dir = objects_dir.join("pack");
2087    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
2088        return Ok(None);
2089    };
2090    let data = midx_cache::get_bytes(&midx_path)?;
2091
2092    // Load-time validation, mirroring `load_multi_pack_index` in git/midx.c.
2093    // Fatal corruptions `die()` (print error + fatal, exit 128); recoverable
2094    // ones (e.g. an unaligned chunk table) skip the MIDX entirely.
2095    let MidxReadView {
2096        oidf_off,
2097        oidl_off,
2098        ooff_off,
2099        loff,
2100        num_objects,
2101        pack_names,
2102    } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
2103        MidxLoadResult::Ok(v) => v,
2104        MidxLoadResult::Skip => return Ok(None),
2105    };
2106
2107    let first = oid.as_bytes()[0] as usize;
2108    let lo = if first == 0 {
2109        0u32
2110    } else {
2111        read_be_u32(&data, oidf_off + (first - 1) * 4)?
2112    };
2113    let hi = read_be_u32(&data, oidf_off + first * 4)?;
2114
2115    let hash_len = midx_hash_len(&data);
2116    let mut pos = None;
2117    let mut i = lo as usize;
2118    while i < hi as usize && i < num_objects {
2119        let o = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])?;
2120        let c = o.cmp(oid);
2121        if c == std::cmp::Ordering::Equal {
2122            pos = Some(i);
2123            break;
2124        }
2125        if c == std::cmp::Ordering::Greater {
2126            break;
2127        }
2128        i += 1;
2129    }
2130    let Some(pos) = pos else {
2131        return Ok(None);
2132    };
2133
2134    let obase = ooff_off + pos * 8;
2135    let pack_id = read_be_u32(&data, obase)?;
2136    let raw_off = read_be_u32(&data, obase + 4)?;
2137    let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
2138        let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
2139        let need = (idx + 1) * 8;
2140        match loff {
2141            Some((loff_off, loff_len)) if loff_len >= need => {
2142                read_be_u64(&data, loff_off + idx * 8)?
2143            }
2144            _ => {
2145                // git/midx.c `nth_midxed_offset`: die on out-of-bounds large offset.
2146                midx_die(&["multi-pack-index large offset out of bounds"]);
2147            }
2148        }
2149    } else {
2150        u64::from(raw_off)
2151    };
2152
2153    let idx_name = pack_names
2154        .get(pack_id as usize)
2155        .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
2156    let idx_path = pack_dir.join(idx_name);
2157    // A multi-pack-index can outlive packs it names (e.g. a `repack -d` deleted a
2158    // pack but did not rewrite the MIDX). Git tolerates such stale entries by
2159    // skipping the missing pack; mirror that by falling through to other object
2160    // sources instead of surfacing the open error.
2161    if !idx_path.exists() {
2162        return Ok(None);
2163    }
2164    // Mirror git/packfile.c `open_pack_index`: when a pack's idx cannot be read
2165    // (e.g. truncated/corrupt), Git emits `error: packfile <pack> index unavailable`,
2166    // marks the pack invalid, and continues to other object sources. The object
2167    // may still be found loose or in another pack, so fall through rather than
2168    // surfacing the parse error as fatal. Use the non-verifying parse to match
2169    // `open_pack_index`, which does not validate the trailing checksum (a pack
2170    // `.idx` with a stale checksum but valid structure must still be usable).
2171    let idx = match crate::pack::read_pack_index_cached(&idx_path) {
2172        Ok(idx) => idx,
2173        Err(_) => {
2174            let mut pack_path = idx_path.clone();
2175            pack_path.set_extension("pack");
2176            midx_warn_once(&format!(
2177                "error: packfile {} index unavailable",
2178                pack_path.display()
2179            ));
2180            return Ok(None);
2181        }
2182    };
2183    crate::pack::read_object_from_pack(&idx, oid).map(Some)
2184}
2185
2186pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
2187    let pack_dir = objects_dir.join("pack");
2188    let path = resolve_tip_midx_path(&pack_dir)
2189        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
2190    let data = fs::read(&path).map_err(Error::Io)?;
2191    let (_, hdr_end, _) = parse_midx_header(&data)?;
2192    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2193    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
2194    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
2195    // The preferred pack is recorded in the MIDX reverse index, which is only
2196    // present when the MIDX has a bitmap. Without it, the preferred pack is
2197    // unknowable (git/midx.c `midx_preferred_pack` returns -1). Prefer the
2198    // embedded RIDX chunk; otherwise fall back to a `multi-pack-index*.rev`
2199    // sidecar, matching `load_midx_revindex`.
2200    let (ridx_off, ridx_len) = match find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) {
2201        Ok(r) => r,
2202        Err(_) => {
2203            return Err(Error::CorruptObject(
2204                "could not determine MIDX preferred pack".to_owned(),
2205            ));
2206        }
2207    };
2208
2209    if ridx_len < 4 || ooff_len < 8 {
2210        return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
2211    }
2212    let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
2213    let entry_base = ooff_off + first_oid_idx * 8;
2214    if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
2215        return Err(Error::CorruptObject(
2216            "bad MIDX object-offsets index".to_owned(),
2217        ));
2218    }
2219    let pack_id = read_be_u32(&data, entry_base)?;
2220    let idx = usize::try_from(pack_id)
2221        .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
2222    names
2223        .get(idx)
2224        .cloned()
2225        .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
2226}
2227
2228/// Build `objects/pack/multi-pack-index` for all pack indexes in `pack_dir`.
2229///
2230/// Returns an error if there are no `.idx` files, if an object offset does not
2231/// fit in 31 bits (no `LOFF` chunk yet), or if I/O fails.
2232/// Remove every multi-pack-index file under `pack_dir` (root file, sidecars, and
2233/// `multi-pack-index.d/`). Used by full `repack -a` so stale incremental chains do not survive.
2234pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
2235    let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2236    scrub_root_midx_sidecars_except(pack_dir, None)?;
2237    let midx_d = midx_d_dir(pack_dir);
2238    if midx_d.exists() {
2239        let _ = fs::remove_dir_all(&midx_d);
2240    }
2241    Ok(())
2242}
2243
2244pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
2245    write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
2246}
2247
2248/// Write `multi-pack-index` with optional preferred pack, placeholders, and incremental chain.
2249pub fn write_multi_pack_index_with_options(
2250    pack_dir: &Path,
2251    opts: &WriteMultiPackIndexOptions,
2252) -> Result<()> {
2253    // Git warns and ignores an existing MIDX whose checksum does not validate when
2254    // writing a fresh (non-stdin-packs) MIDX (git/midx-write.c `write_midx_internal`).
2255    if opts.pack_names_subset_ordered.is_none() {
2256        if let Some(existing) = resolve_tip_midx_path(pack_dir) {
2257            if let Ok(bytes) = fs::read(&existing) {
2258                if midx_checksum_is_valid(&bytes) {
2259                    // A fresh write copies the existing MIDX's packs. Loading a pack
2260                    // it references whose `.pack` is gone fails with `could not load
2261                    // pack N` (git/midx-write.c `fill_pack_from_midx`).
2262                    if let Ok((_, existing_names)) = oids_and_packs_from_midx_data(&bytes) {
2263                        for (i, name) in existing_names.iter().enumerate() {
2264                            let stem = name.strip_suffix(".idx").unwrap_or(name);
2265                            if !pack_dir.join(format!("{stem}.pack")).exists() {
2266                                eprintln!("error: could not load pack {i}");
2267                                return Err(Error::CorruptObject(format!(
2268                                    "could not load pack {i}"
2269                                )));
2270                            }
2271                        }
2272                    }
2273                } else {
2274                    eprintln!("warning: ignoring existing multi-pack-index; checksum mismatch");
2275                }
2276            }
2277        }
2278    }
2279
2280    // Git's MIDX covers every pack index in the directory regardless of its
2281    // basename (the `.git/objects/pack/test-*.idx` packs created by t7900's
2282    // incremental-repack test, for instance), so include any `*.idx` whose
2283    // companion `.pack` exists.
2284    let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
2285        .map(|rd| {
2286            rd.filter_map(|e| e.ok())
2287                .filter_map(|e| {
2288                    let name = e.file_name().to_string_lossy().to_string();
2289                    let stem = name.strip_suffix(".idx")?;
2290                    if pack_dir.join(format!("{stem}.pack")).exists() {
2291                        Some(name)
2292                    } else {
2293                        None
2294                    }
2295                })
2296                .collect()
2297        })
2298        .unwrap_or_default();
2299    idx_names.sort();
2300
2301    let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
2302        let mut out = Vec::new();
2303        for line in sub {
2304            let want = normalize_pack_idx_basename(line)?;
2305            if let Some(found) = idx_names.iter().find(|n| **n == want).cloned() {
2306                if !out.contains(&found) {
2307                    out.push(found);
2308                }
2309            }
2310            // Unknown names on stdin are silently ignored (Git skips packs it
2311            // cannot find rather than failing the whole write).
2312        }
2313        out
2314    } else {
2315        idx_names
2316    };
2317
2318    // Resolve / validate the preferred pack against the working pack set. Git emits a
2319    // (non-fatal) `warning: unknown preferred pack: '<name>'` when it cannot be matched.
2320    let mut preferred_warned = false;
2321    if let Some(raw) = opts.preferred_pack_name.as_deref() {
2322        if opts.preferred_pack_idx.is_none()
2323            && !idx_names
2324                .iter()
2325                .any(|n| cmp_idx_or_pack_name(raw, n).is_eq())
2326        {
2327            eprintln!("warning: unknown preferred pack: '{raw}'");
2328            preferred_warned = true;
2329        }
2330    }
2331
2332    if idx_names.is_empty() {
2333        // Git `write_midx_internal`: `error("no pack files to index.")` then fail.
2334        eprintln!("error: no pack files to index.");
2335        return Err(Error::CorruptObject("no pack files to index.".to_owned()));
2336    }
2337
2338    let (base_oids, base_pack_names) = if opts.incremental {
2339        collect_incremental_base(pack_dir)?
2340    } else {
2341        (HashSet::new(), HashSet::new())
2342    };
2343
2344    let layer_idx_names: Vec<String> = if opts.incremental {
2345        idx_names
2346            .iter()
2347            .filter(|n| {
2348                !base_pack_names
2349                    .iter()
2350                    .any(|bp| pack_names_match_layer(bp, n))
2351            })
2352            .cloned()
2353            .collect()
2354    } else {
2355        idx_names.clone()
2356    };
2357
2358    if opts.incremental && layer_idx_names.is_empty() {
2359        return Ok(());
2360    }
2361
2362    let work_names = if opts.incremental {
2363        &layer_idx_names[..]
2364    } else {
2365        &idx_names[..]
2366    };
2367
2368    let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
2369    if preferred_idx.is_none() && !preferred_warned {
2370        if let Some(raw) = opts.preferred_pack_name.as_deref() {
2371            // Already validated against `idx_names`; resolve against the working set.
2372            preferred_idx = work_names
2373                .iter()
2374                .position(|n| cmp_idx_or_pack_name(raw, n).is_eq());
2375        }
2376    }
2377    if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
2378        preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
2379    }
2380    if let Some(p) = preferred_idx {
2381        if p >= work_names.len() {
2382            return Err(Error::CorruptObject(
2383                "preferred pack index out of range".to_owned(),
2384            ));
2385        }
2386    }
2387
2388    let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
2389    for name in work_names {
2390        let path = pack_dir.join(name);
2391        // Do not re-verify the idx trailer here; Git reads the offset table
2392        // directly (t5319 forces a deliberately corrupt-but-valid 64-bit idx).
2393        indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2394    }
2395
2396    // Git refuses an explicitly preferred pack that has no objects.
2397    if let Some(p) = preferred_idx {
2398        if indexes.get(p).map(|i| i.entries.len()).unwrap_or(0) == 0 {
2399            let name = work_names.get(p).cloned().unwrap_or_default();
2400            let pack_name = name.strip_suffix(".idx").unwrap_or(&name);
2401            eprintln!("error: cannot select preferred pack {pack_name}.pack with no objects");
2402            return Err(Error::CorruptObject(
2403                "cannot select preferred pack with no objects".to_owned(),
2404            ));
2405        }
2406    }
2407
2408    let pack_mtimes_layer: Vec<std::time::SystemTime> =
2409        indexes.iter().map(pack_mtime_for_midx).collect();
2410    let preferred_u32 = preferred_idx.map(|p| p as u32);
2411    let select_hash_len = if repo_midx_hash_version(pack_dir) == 2 { 32 } else { 20 };
2412
2413    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
2414    for (pack_id, idx) in indexes.iter().enumerate() {
2415        let pack_id = u32::try_from(pack_id).map_err(|_| {
2416            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
2417        })?;
2418        let mtime = pack_mtimes_layer[pack_id as usize];
2419        for e in &idx.entries {
2420            if e.oid.len() != select_hash_len {
2421                continue;
2422            }
2423            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
2424                continue;
2425            };
2426            if opts.incremental && base_oids.contains(&oid) {
2427                continue;
2428            }
2429            let cand = MidxEntry {
2430                oid,
2431                pack_id,
2432                offset: e.offset,
2433                pack_mtime: mtime,
2434            };
2435            match best.get(&oid) {
2436                None => {
2437                    best.insert(oid, cand);
2438                }
2439                Some(cur) => {
2440                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
2441                        best.insert(oid, cand);
2442                    }
2443                }
2444            }
2445        }
2446    }
2447
2448    let bitmap_placeholders =
2449        opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
2450
2451    let omit_embedded_ridx = opts.write_rev_placeholder;
2452    // An incremental layer must not repeat objects already provided by the base
2453    // chain even when the layer's own pack physically contains them (a fresh pack
2454    // built with `--revs` from a tag range, for instance). Filter by base OID.
2455    let exclude = if opts.incremental && !base_oids.is_empty() {
2456        Some(&base_oids)
2457    } else {
2458        None
2459    };
2460    let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2461        work_names,
2462        &indexes,
2463        preferred_idx,
2464        bitmap_placeholders,
2465        omit_embedded_ridx,
2466        opts.version.unwrap_or(MIDX_VERSION_V2),
2467        repo_midx_hash_version(pack_dir),
2468        exclude,
2469    )?;
2470
2471    let hash_len = if repo_midx_hash_version(pack_dir) == 2 { 32 } else { 20 };
2472    let hash = &out[out.len() - hash_len..];
2473    let hash_hex = hex::encode(hash);
2474    let hash_arr: Vec<u8> = hash.to_vec();
2475
2476    if opts.incremental {
2477        let root_midx = pack_dir.join("multi-pack-index");
2478        let chain_path = chain_file_path(pack_dir);
2479        let chain_existed = chain_path.exists();
2480
2481        let mut chain = if root_midx.exists() && !chain_existed {
2482            let root_hex = midx_checksum_hex_from_path(&root_midx)?;
2483            link_root_midx_into_chain(pack_dir, &root_hex)?;
2484            vec![root_hex]
2485        } else {
2486            read_chain_layer_hashes(pack_dir).unwrap_or_default()
2487        };
2488
2489        chain.push(hash_hex.clone());
2490
2491        let midx_d = midx_d_dir(pack_dir);
2492        fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2493
2494        let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2495        fs::write(&layer_path, &out).map_err(Error::Io)?;
2496
2497        let mut chain_data = String::new();
2498        for h in &chain {
2499            chain_data.push_str(h);
2500            chain_data.push('\n');
2501        }
2502        fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2503
2504        clear_stale_split_layers(pack_dir, &chain)?;
2505
2506        let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2507        scrub_root_midx_sidecars(pack_dir)?;
2508        if bitmap_placeholders {
2509            let full = hex::encode(hash);
2510            fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
2511                .map_err(Error::Io)?;
2512            if opts.write_rev_placeholder {
2513                let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
2514                if let Some(order) = rev_sidecar_order.as_ref() {
2515                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2516                } else {
2517                    fs::write(rev_path, []).map_err(Error::Io)?;
2518                }
2519            }
2520        }
2521    } else {
2522        // A non-incremental write replaces any prior split layout. Git removes the
2523        // individual incremental layer files inside `multi-pack-index.d/` and
2524        // unlinks the chain file, but never `rmdir`s the directory itself, so an
2525        // empty `multi-pack-index.d/` is left behind (t5334 expects
2526        // `test_dir_is_empty $midxdir` after the conversion).
2527        let dest = pack_dir.join("multi-pack-index");
2528
2529        // Git's `midx_needs_update`: if the new MIDX is byte-identical to the one
2530        // already on disk and we are not (re)writing a bitmap, leave the file
2531        // untouched so its mtime is preserved (t5319 `test_midx_is_retained`).
2532        let bitmap_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap"));
2533        let bitmap_ok = !opts.write_bitmap_placeholders || bitmap_path.exists();
2534        // Only short-circuit when there is no active incremental chain to collapse;
2535        // an empty leftover `multi-pack-index.d/` (from a prior conversion) must not
2536        // defeat the retention optimization, so key off the chain file, not the dir.
2537        if bitmap_ok && !chain_file_path(pack_dir).exists() {
2538            if let Ok(existing) = fs::read(&dest) {
2539                if existing == out {
2540                    return Ok(());
2541                }
2542            }
2543        }
2544
2545        clear_incremental_midx_files(pack_dir)?;
2546
2547        fs::write(&dest, &out).map_err(Error::Io)?;
2548
2549        scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
2550
2551        if opts.write_bitmap_placeholders {
2552            fs::write(
2553                pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2554                [],
2555            )
2556            .map_err(Error::Io)?;
2557            if opts.write_rev_placeholder {
2558                let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
2559                if let Some(order) = rev_sidecar_order.as_ref() {
2560                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2561                } else {
2562                    fs::write(rev_path, []).map_err(Error::Io)?;
2563                }
2564            }
2565        }
2566    }
2567
2568    midx_cache::evict_pack_dir(pack_dir);
2569    Ok(())
2570}
2571
2572fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
2573    if base_name == disk_idx {
2574        return true;
2575    }
2576    cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
2577}
2578
2579/// Failure modes of [`compact_multi_pack_index`], each mapping to one of git's
2580/// user-facing diagnostics in `cmd_multi_pack_index_compact`.
2581#[derive(Debug)]
2582pub enum CompactError {
2583    /// `--incremental` was requested but no chain exists yet.
2584    NoChain,
2585    /// One of the endpoint checksums does not name a layer in the chain. Carries the
2586    /// raw argument text so the message matches `could not find MIDX: <arg>`.
2587    MissingEndpoint(String),
2588    /// Both endpoints resolve to the same layer.
2589    IdenticalEndpoints,
2590    /// `from` (argv[0]) is newer than `to` (argv[1]); git requires `from` to be an
2591    /// ancestor of `to`. Carries `(from, to)` arg text for the diagnostic.
2592    NotAncestor(String, String),
2593    /// Compaction was requested with the v1 on-disk MIDX format.
2594    V1Format,
2595    /// Any underlying I/O or parse failure.
2596    Other(String),
2597}
2598
2599impl std::fmt::Display for CompactError {
2600    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2601        match self {
2602            CompactError::NoChain => write!(f, "no multi-pack-index chain to compact"),
2603            CompactError::MissingEndpoint(s) => write!(f, "could not find MIDX: {s}"),
2604            CompactError::IdenticalEndpoints => {
2605                write!(f, "MIDX compaction endpoints must be unique")
2606            }
2607            CompactError::NotAncestor(from, to) => {
2608                write!(f, "MIDX {from} must be an ancestor of {to}")
2609            }
2610            CompactError::V1Format => write!(f, "cannot perform MIDX compaction with v1 format"),
2611            CompactError::Other(s) => write!(f, "{s}"),
2612        }
2613    }
2614}
2615
2616impl From<Error> for CompactError {
2617    fn from(e: Error) -> Self {
2618        CompactError::Other(e.to_string())
2619    }
2620}
2621
2622/// Collect every OID provided by the chain layers in `hashes` (each layer file is
2623/// self-contained: it lists only its own incremental objects).
2624fn collect_layer_oids(pack_dir: &Path, hashes: &[String]) -> Result<HashSet<ObjectId>> {
2625    let mut oids = HashSet::new();
2626    for h in hashes {
2627        let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
2628        let data = load_midx_file(&p)?;
2629        let (layer_oids, _) = oids_and_packs_from_midx_data(&data)?;
2630        oids.extend(layer_oids);
2631    }
2632    Ok(oids)
2633}
2634
2635/// Pack idx basenames listed by a single chain layer, in the layer's stored order.
2636fn layer_pack_names(pack_dir: &Path, hash: &str) -> Result<Vec<String>> {
2637    let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{hash}.midx"));
2638    let data = load_midx_file(&p)?;
2639    let (_, hdr_end, _) = parse_midx_header(&data)?;
2640    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2641    parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
2642}
2643
2644/// `git multi-pack-index compact <from> <to>`: merge the inclusive chain range
2645/// `[from..to]` (oldest→newest, matching git's `from`=argv[0] / `to`=argv[1]) into a
2646/// single new incremental layer, preserving pack order, and rewrite the chain as
2647/// `[layers before from] + [compacted layer] + [layers after to]`.
2648///
2649/// Mirrors `write_midx_file_compact` (git/midx-write.c). Because grit's chain layers
2650/// are self-contained (each lists only its own packs/objects), layers outside the
2651/// compacted range keep their existing files and checksums untouched.
2652pub fn compact_multi_pack_index(
2653    pack_dir: &Path,
2654    from_arg: &str,
2655    to_arg: &str,
2656    write_bitmaps: bool,
2657    write_rev: bool,
2658    version: Option<u8>,
2659) -> std::result::Result<(), CompactError> {
2660    if version == Some(MIDX_VERSION_V1) {
2661        return Err(CompactError::V1Format);
2662    }
2663
2664    let chain = read_chain_layer_hashes(pack_dir).map_err(|_| CompactError::NoChain)?;
2665    if chain.is_empty() {
2666        return Err(CompactError::NoChain);
2667    }
2668
2669    let from_hex = from_arg.to_ascii_lowercase();
2670    let to_hex = to_arg.to_ascii_lowercase();
2671
2672    let from_pos = chain.iter().position(|h| *h == from_hex);
2673    let to_pos = chain.iter().position(|h| *h == to_hex);
2674
2675    // Match git: report `from` first, then `to`, when an endpoint is missing.
2676    let Some(from_pos) = from_pos else {
2677        return Err(CompactError::MissingEndpoint(from_arg.to_string()));
2678    };
2679    let Some(to_pos) = to_pos else {
2680        return Err(CompactError::MissingEndpoint(to_arg.to_string()));
2681    };
2682
2683    if from_pos == to_pos {
2684        return Err(CompactError::IdenticalEndpoints);
2685    }
2686    // git walks `base_midx` from `from`; reaching `to` means `from` is an ancestor of
2687    // `to`, i.e. `from` is newer (higher chain index) than `to`. That is the reverse
2688    // of what compaction expects, so report the "must be an ancestor" error.
2689    if from_pos > to_pos {
2690        return Err(CompactError::NotAncestor(
2691            from_arg.to_string(),
2692            to_arg.to_string(),
2693        ));
2694    }
2695
2696    // Layers strictly before `from` form the base; their objects are excluded from
2697    // the compacted layer.
2698    let base_hashes = &chain[..from_pos];
2699    let merged_hashes = &chain[from_pos..=to_pos];
2700    let upper_hashes = &chain[to_pos + 1..];
2701
2702    let base_oids = collect_layer_oids(pack_dir, base_hashes)?;
2703
2704    // Gather the merged layers' pack idx names in chain order (oldest layer first),
2705    // preserving each layer's internal order (git's `fill_packs_from_midx_range`).
2706    let mut ordered_idx_names: Vec<String> = Vec::new();
2707    for h in merged_hashes {
2708        for name in layer_pack_names(pack_dir, h)? {
2709            if !ordered_idx_names.contains(&name) {
2710                ordered_idx_names.push(name);
2711            }
2712        }
2713    }
2714
2715    if ordered_idx_names.is_empty() {
2716        return Err(CompactError::Other(
2717            "no packs found in compaction range".to_owned(),
2718        ));
2719    }
2720
2721    // Load the pack indexes in the resolved order.
2722    let mut indexes: Vec<PackIndex> = Vec::with_capacity(ordered_idx_names.len());
2723    for name in &ordered_idx_names {
2724        let path = pack_dir.join(name);
2725        indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2726    }
2727
2728    // When writing a bitmap, git sets the preferred pack to the first (oldest) pack
2729    // of the compacted range so its objects win duplicate selection.
2730    let preferred_idx = if write_bitmaps { Some(0usize) } else { None };
2731
2732    let exclude = if base_oids.is_empty() {
2733        None
2734    } else {
2735        Some(&base_oids)
2736    };
2737
2738    let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2739        &ordered_idx_names,
2740        &indexes,
2741        preferred_idx,
2742        write_bitmaps,
2743        write_rev,
2744        version.unwrap_or(MIDX_VERSION_V2),
2745        repo_midx_hash_version(pack_dir),
2746        exclude,
2747    )?;
2748
2749    let hash_len = if repo_midx_hash_version(pack_dir) == 2 { 32 } else { 20 };
2750    let hash = &out[out.len() - hash_len..];
2751    let hash_hex = hex::encode(hash);
2752    let hash_arr: Vec<u8> = hash.to_vec();
2753
2754    let midx_d = midx_d_dir(pack_dir);
2755    fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2756
2757    let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2758    fs::write(&layer_path, &out).map_err(Error::Io)?;
2759
2760    // New chain: base layers, the compacted layer, then the untouched upper layers.
2761    let mut new_chain: Vec<String> = Vec::new();
2762    new_chain.extend(base_hashes.iter().cloned());
2763    new_chain.push(hash_hex.clone());
2764    new_chain.extend(upper_hashes.iter().cloned());
2765
2766    let mut chain_data = String::new();
2767    for h in &new_chain {
2768        chain_data.push_str(h);
2769        chain_data.push('\n');
2770    }
2771    fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2772
2773    if write_bitmaps {
2774        fs::write(
2775            midx_d.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2776            [],
2777        )
2778        .map_err(Error::Io)?;
2779        let rev_path = midx_d.join(format!("multi-pack-index-{hash_hex}.rev"));
2780        if write_rev {
2781            if let Some(order) = rev_sidecar_order.as_ref() {
2782                write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2783            } else {
2784                fs::write(rev_path, []).map_err(Error::Io)?;
2785            }
2786        }
2787    }
2788
2789    // Drop the now-removed range layers and their sidecars.
2790    clear_stale_split_layers(pack_dir, &new_chain)?;
2791
2792    midx_cache::evict_pack_dir(pack_dir);
2793    Ok(())
2794}
2795
2796fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
2797    scrub_root_midx_sidecars_except(pack_dir, None)
2798}
2799
2800fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
2801    let Ok(rd) = fs::read_dir(pack_dir) else {
2802        return Ok(());
2803    };
2804    for ent in rd {
2805        let ent = ent.map_err(Error::Io)?;
2806        let name = ent.file_name().to_string_lossy().to_string();
2807        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
2808            continue;
2809        };
2810        if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
2811            continue;
2812        }
2813        let hash_part = rest
2814            .strip_suffix(".bitmap")
2815            .or_else(|| rest.strip_suffix(".rev"))
2816            .unwrap_or(rest);
2817        // Git's `clear_midx_files_ext` removes any `multi-pack-index-<hash>.<ext>`
2818        // sidecar that does not belong to the current MIDX, regardless of the
2819        // hash's textual length (t5319 plants a `multi-pack-index-abc.rev`).
2820        if keep_hex.is_some_and(|k| k == hash_part) {
2821            continue;
2822        }
2823        let _ = fs::remove_file(ent.path());
2824    }
2825    Ok(())
2826}