Skip to main content

grit_lib/
midx.rs

1//! Multi-pack-index (MIDX) file writing and minimal reading.
2//!
3//! Writes a Git-compatible `multi-pack-index` file (version 1, SHA-1) covering
4//! selected `pack-*.idx` files. Objects that appear in multiple packs keep the
5//! preferred pack's copy when `preferred_pack_idx` is set (matching Git's
6//! geometric repack tests).
7//!
8//! Incremental writes follow Git's split layout: layers live under
9//! `pack/multi-pack-index.d/multi-pack-index-<sha1>.midx` with ordering in
10//! `multi-pack-index-chain` (oldest hash first, newest last).
11
12use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::ObjectId;
21use crate::pack::{read_pack_index_no_verify, PackIndex};
22
23const MIDX_SIGNATURE: u32 = 0x4d49_4458;
24const MIDX_VERSION_V1: u8 = 1;
25const MIDX_VERSION_V2: u8 = 2;
26const HASH_VERSION_SHA1: u8 = 1;
27const HASH_VERSION_SHA256: u8 = 2;
28const MIDX_HEADER_SIZE: usize = 12;
29const CHUNK_TOC_ENTRY_SIZE: usize = 12;
30const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
31const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
32const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
33const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
34const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
35const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
36const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
37
38// Git `pack-revindex.h` / `pack-write.c` (standalone `.rev` next to MIDX).
39const RIDX_SIGNATURE: u32 = 0x5249_4458;
40const RIDX_VERSION: u32 = 1;
41const RIDX_HEADER_SIZE: usize = 12;
42const MIDX_CHUNK_ALIGNMENT: usize = 4;
43
44// `git midx.h` (MIDX_LARGE_OFFSET_NEEDED).
45const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
46
47struct MidxEntry {
48    oid: ObjectId,
49    pack_id: u32,
50    offset: u64,
51    pack_mtime: std::time::SystemTime,
52}
53
54/// Options for writing a multi-pack index (extension of the simple writer).
55#[derive(Debug, Clone, Default)]
56pub struct WriteMultiPackIndexOptions {
57    /// When set, objects also present in other packs are taken from this pack
58    /// (`pack_names` index in the sorted name list).
59    pub preferred_pack_idx: Option<u32>,
60    /// Basename of the preferred pack (e.g. `pack-abc.idx` or `pack-abc.pack`); resolved against
61    /// the working pack name list after optional subset filtering.
62    pub preferred_pack_name: Option<String>,
63    /// If set, only these `pack-*.idx` basenames are included, in this order (Git `--stdin-packs`).
64    pub pack_names_subset_ordered: Option<Vec<String>>,
65    /// When true, append RIDX + empty BTMP chunks so `test-tool read-midx --bitmap` succeeds.
66    pub write_bitmap_placeholders: bool,
67    /// When true, write a new layer in `multi-pack-index.d/` and extend the chain file
68    /// instead of replacing `pack/multi-pack-index`.
69    pub incremental: bool,
70    /// When true with [`Self::write_bitmap_placeholders`], also create an empty `.rev`
71    /// sidecar (Git `GIT_TEST_MIDX_WRITE_REV` compatibility).
72    pub write_rev_placeholder: bool,
73    /// On-disk MIDX format version to write (`1` or `2`). `None` writes the default (v2).
74    /// Set from `midx.version`.
75    pub version: Option<u8>,
76}
77
78fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
79    let t = raw.trim();
80    let t = std::path::Path::new(t)
81        .file_name()
82        .and_then(|s| s.to_str())
83        .unwrap_or(t);
84    let t = t.strip_prefix("./").unwrap_or(t);
85    if t.ends_with(".idx") {
86        Ok(t.to_string())
87    } else if t.ends_with(".pack") {
88        Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
89    } else {
90        Ok(format!("{t}.idx"))
91    }
92}
93
94/// Read a big-endian `u32` from `data` at byte offset `off`.
95///
96/// Returns [`Error::CorruptObject`] if `data` does not contain 4 bytes at `off`,
97/// replacing the previous fixed-width-slice `.try_into().unwrap()` with real
98/// bounds handling (the success-path value is unchanged).
99fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
100    let end = off.checked_add(4).filter(|&e| e <= data.len());
101    let Some(end) = end else {
102        return Err(Error::CorruptObject(
103            "truncated MIDX data reading u32".to_owned(),
104        ));
105    };
106    let bytes: [u8; 4] = data[off..end]
107        .try_into()
108        .map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
109    Ok(u32::from_be_bytes(bytes))
110}
111
112/// Read a big-endian `u64` from `data` at byte offset `off`.
113///
114/// Returns [`Error::CorruptObject`] if `data` does not contain 8 bytes at `off`,
115/// replacing the previous fixed-width-slice `.try_into().unwrap()` with real
116/// bounds handling (the success-path value is unchanged).
117fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
118    let end = off.checked_add(8).filter(|&e| e <= data.len());
119    let Some(end) = end else {
120        return Err(Error::CorruptObject(
121            "truncated MIDX data reading u64".to_owned(),
122        ));
123    };
124    let bytes: [u8; 8] = data[off..end]
125        .try_into()
126        .map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
127    Ok(u64::from_be_bytes(bytes))
128}
129
130struct MidxFileHeader {
131    num_chunks: u8,
132}
133
134fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
135    if data.len() < MIDX_HEADER_SIZE + 20 {
136        return Err(Error::CorruptObject("midx file too small".to_owned()));
137    }
138    let sig = read_be_u32(data, 0)?;
139    if sig != MIDX_SIGNATURE {
140        return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
141    }
142    let version = data[4];
143    if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
144        return Err(Error::CorruptObject(format!(
145            "multi-pack-index version {version} not recognized"
146        )));
147    }
148    let object_hash_bytes = data[5];
149    let num_chunks = data[6];
150    let _num_packs = read_be_u32(data, 8)?;
151    Ok((
152        MidxFileHeader { num_chunks },
153        MIDX_HEADER_SIZE,
154        object_hash_bytes,
155    ))
156}
157
158fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
159    let mut names = Vec::new();
160    let mut start = 0usize;
161    for (i, &b) in pn.iter().enumerate() {
162        if b == 0 && i >= start {
163            if i > start {
164                let s = std::str::from_utf8(&pn[start..i])
165                    .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
166                names.push(s.to_string());
167            }
168            start = i + 1;
169        }
170    }
171    Ok(names)
172}
173
174/// Compare a pack basename that may use `.pack` or `.idx` with an MIDX pack name (`.idx`).
175fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
176    let a = idx_or_pack_name.as_bytes();
177    let b = idx_name.as_bytes();
178    let mut i = 0usize;
179    let min = a.len().min(b.len());
180    while i < min && a[i] == b[i] {
181        i += 1;
182    }
183    let suf_a = &a[i..];
184    let suf_b = &b[i..];
185    if suf_b == b"idx" && suf_a == b"pack" {
186        return std::cmp::Ordering::Equal;
187    }
188    suf_a.cmp(suf_b)
189}
190
191fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
192    let mut best: Option<(usize, std::time::SystemTime)> = None;
193    for (i, n) in names.iter().enumerate() {
194        let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
195        let mtime = meta.modified().map_err(Error::Io)?;
196        match best {
197            None => best = Some((i, mtime)),
198            Some((_, t)) if mtime < t => best = Some((i, mtime)),
199            _ => {}
200        }
201    }
202    Ok(best.map(|(i, _)| i))
203}
204
205fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
206    pack_dir.join("multi-pack-index.d")
207}
208
209fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
210    midx_d_dir(pack_dir).join("multi-pack-index-chain")
211}
212
213fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
214    let path = chain_file_path(pack_dir);
215    let f = fs::File::open(&path).map_err(Error::Io)?;
216    let mut out = Vec::new();
217    for line in BufReader::new(f).lines() {
218        let line = line.map_err(Error::Io)?;
219        let t = line.trim();
220        if t.is_empty() {
221            continue;
222        }
223        if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
224            return Err(Error::CorruptObject(format!(
225                "invalid multi-pack-index chain line: {t}"
226            )));
227        }
228        out.push(t.to_ascii_lowercase());
229    }
230    Ok(out)
231}
232
233/// Resolve the path to the newest MIDX layer (root `multi-pack-index` or last chain entry).
234/// Return the MIDX hash-version byte expected for the repository owning `pack_dir`,
235/// mirroring git's `oid_version(r->hash_algo)` (SHA-1 → 1, SHA-256 → 2).
236///
237/// `pack_dir` is `<gitdir>/objects/pack`; the object format lives in the gitdir's
238/// `config` under `extensions.objectformat`. When the config cannot be read or the
239/// extension is absent, the default SHA-1 version (1) is returned.
240fn repo_midx_hash_version(pack_dir: &Path) -> u8 {
241    // pack_dir = <gitdir>/objects/pack -> gitdir = pack_dir/../..
242    let Some(objects_dir) = pack_dir.parent() else {
243        return HASH_VERSION_SHA1;
244    };
245    repo_midx_hash_version_for_objects_dir(objects_dir)
246}
247
248/// Like [`repo_midx_hash_version`] but starting from the `objects` directory.
249fn repo_midx_hash_version_for_objects_dir(objects_dir: &Path) -> u8 {
250    let Some(gitdir) = objects_dir.parent() else {
251        return HASH_VERSION_SHA1;
252    };
253    let config_path = gitdir.join("config");
254    let Ok(text) = fs::read_to_string(&config_path) else {
255        return HASH_VERSION_SHA1;
256    };
257    // Minimal scan for `[extensions]` ... `objectformat = sha256`. Section and key
258    // names are case-insensitive in git config; values are case-sensitive but git
259    // only accepts the literals "sha1"/"sha256".
260    let mut in_extensions = false;
261    for raw in text.lines() {
262        let line = raw.trim();
263        if line.starts_with('[') {
264            let section = line.trim_start_matches('[').trim_end_matches(']');
265            let name = section.split_whitespace().next().unwrap_or("");
266            in_extensions = name.eq_ignore_ascii_case("extensions");
267            continue;
268        }
269        if !in_extensions {
270            continue;
271        }
272        if let Some((key, value)) = line.split_once('=') {
273            if key.trim().eq_ignore_ascii_case("objectformat")
274                && value.trim().eq_ignore_ascii_case("sha256")
275            {
276                return HASH_VERSION_SHA256;
277            }
278        }
279    }
280    HASH_VERSION_SHA1
281}
282
283pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
284    let root = pack_dir.join("multi-pack-index");
285    if root.exists() {
286        return Some(root);
287    }
288    let hashes = read_chain_layer_hashes(pack_dir).ok()?;
289    let last = hashes.last()?;
290    Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
291}
292
293/// Resolve a specific MIDX layer file by its lowercase hex checksum. Searches the
294/// incremental chain (`multi-pack-index.d/multi-pack-index-<hash>.midx`) and the
295/// single-file root MIDX. Returns `None` when no layer matches that checksum.
296pub fn resolve_midx_layer_path(pack_dir: &Path, checksum: &str) -> Option<std::path::PathBuf> {
297    let checksum = checksum.to_ascii_lowercase();
298    if let Ok(hashes) = read_chain_layer_hashes(pack_dir) {
299        if hashes.contains(&checksum) {
300            return Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{checksum}.midx")));
301        }
302    }
303    let root = pack_dir.join("multi-pack-index");
304    if root.exists() {
305        if let Ok(hex) = midx_checksum_hex_from_path(&root) {
306            if hex == checksum {
307                return Some(root);
308            }
309        }
310    }
311    None
312}
313
314fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
315    let data = fs::read(path).map_err(Error::Io)?;
316    let _ = parse_midx_header(&data)?;
317    Ok(data)
318}
319
320fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
321    let (_, hdr_end, _) = parse_midx_header(data)?;
322    let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
323    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
324    let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
325    let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
326    let num_objects = ooff_len / 8;
327    if oidl_len != num_objects * 20 {
328        return Err(Error::CorruptObject(
329            "MIDX oid-lookup size mismatch".to_owned(),
330        ));
331    }
332    let mut oids = HashSet::with_capacity(num_objects);
333    for i in 0..num_objects {
334        let start = oidl_off + i * 20;
335        let oid = ObjectId::from_bytes(&data[start..start + 20])?;
336        oids.insert(oid);
337    }
338    Ok((oids, pack_names))
339}
340
341fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
342    let mut oids = HashSet::new();
343    let mut packs = HashSet::new();
344    let root = pack_dir.join("multi-pack-index");
345    let chain_path = chain_file_path(pack_dir);
346    if chain_path.exists() {
347        for h in read_chain_layer_hashes(pack_dir)? {
348            let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
349            let data = load_midx_file(&p)?;
350            let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
351            oids.extend(layer_oids);
352            for n in names {
353                packs.insert(n);
354            }
355        }
356        return Ok((oids, packs));
357    }
358    if root.exists() {
359        let data = load_midx_file(&root)?;
360        let (o, names) = oids_and_packs_from_midx_data(&data)?;
361        oids = o;
362        for n in names {
363            packs.insert(n);
364        }
365    }
366    Ok((oids, packs))
367}
368
369fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
370    let data = fs::read(path).map_err(Error::Io)?;
371    if data.len() < 20 {
372        return Err(Error::CorruptObject(
373            "midx too small for checksum".to_owned(),
374        ));
375    }
376    let hash = &data[data.len() - 20..];
377    Ok(hex::encode(hash))
378}
379
380fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
381    let _ = fs::remove_file(dst);
382    if fs::hard_link(src, dst).is_ok() {
383        return Ok(());
384    }
385    fs::copy(src, dst).map_err(Error::Io)?;
386    Ok(())
387}
388
389fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
390    let midx_d = midx_d_dir(pack_dir);
391    fs::create_dir_all(&midx_d).map_err(Error::Io)?;
392    let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
393    hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
394    let exts = ["bitmap", "rev"];
395    for ext in exts {
396        let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
397        if src.exists() {
398            let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
399            hard_link_or_copy(&src, &dst)?;
400        }
401    }
402    Ok(())
403}
404
405fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
406    let midx_d = midx_d_dir(pack_dir);
407    if !midx_d.exists() {
408        return Ok(());
409    }
410    let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
411    for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
412        let ent = ent.map_err(Error::Io)?;
413        let name = ent.file_name().to_string_lossy().to_string();
414        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
415            continue;
416        };
417        let Some((hash_part, _ext)) = rest.split_once('.') else {
418            continue;
419        };
420        if hash_part.len() == 40 && !keep.contains(hash_part) {
421            let _ = fs::remove_file(ent.path());
422        }
423    }
424    Ok(())
425}
426
427/// Remove every incremental MIDX layer file (`multi-pack-index-<hash>.midx`,
428/// `.bitmap`, `.rev`) from `multi-pack-index.d/` and unlink the chain file, but
429/// leave the (now empty) directory in place.
430///
431/// This mirrors git's `clear_incremental_midx_files_ext` plus the chain unlink in
432/// `clear_midx_files` for a non-incremental write: git iterates the directory and
433/// `unlink`s the matching files individually and never `rmdir`s the directory, so
434/// a single-file MIDX write leaves an empty `multi-pack-index.d/` behind rather
435/// than removing it (see t5334 "convert incremental to non-incremental").
436fn clear_incremental_midx_files(pack_dir: &Path) -> Result<()> {
437    let midx_d = midx_d_dir(pack_dir);
438    // Unlink the chain file regardless of whether other entries remain.
439    let _ = fs::remove_file(chain_file_path(pack_dir));
440    if !midx_d.exists() {
441        return Ok(());
442    }
443    for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
444        let ent = ent.map_err(Error::Io)?;
445        let name = ent.file_name().to_string_lossy().to_string();
446        if name.starts_with("multi-pack-index-")
447            && (name.ends_with(".midx") || name.ends_with(".bitmap") || name.ends_with(".rev"))
448        {
449            let _ = fs::remove_file(ent.path());
450        }
451    }
452    Ok(())
453}
454
455fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
456    fs::metadata(&idx.pack_path)
457        .and_then(|m| m.modified())
458        .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
459}
460
461fn midx_pick_better_entry(
462    cur: &MidxEntry,
463    cand_pack: u32,
464    cand_offset: u64,
465    cand_mtime: std::time::SystemTime,
466    preferred_pack: Option<u32>,
467) -> bool {
468    let cur_pref = preferred_pack == Some(cur.pack_id);
469    let new_pref = preferred_pack == Some(cand_pack);
470    if new_pref && !cur_pref {
471        return true;
472    }
473    if cur_pref && !new_pref {
474        return false;
475    }
476    match cand_mtime.cmp(&cur.pack_mtime) {
477        std::cmp::Ordering::Greater => true,
478        std::cmp::Ordering::Less => false,
479        std::cmp::Ordering::Equal => {
480            if cand_pack != cur.pack_id {
481                cand_pack < cur.pack_id
482            } else {
483                cand_offset < cur.offset
484            }
485        }
486    }
487}
488
489/// Build a MIDX layer's bytes, omitting objects whose OID is present in
490/// `exclude_oids` (the base chain for incremental layers and compaction, where
491/// objects already provided by a lower layer must not be repeated). Pass `None`
492/// for a full (non-incremental) MIDX.
493#[allow(clippy::too_many_arguments)]
494fn build_midx_bytes_filtered(
495    idx_names: &[String],
496    indexes: &[PackIndex],
497    preferred_idx: Option<usize>,
498    write_bitmap_placeholders: bool,
499    omit_embedded_ridx_chunk: bool,
500    version: u8,
501    hash_version: u8,
502    exclude_oids: Option<&HashSet<ObjectId>>,
503) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
504    let preferred_pack_idx = preferred_idx.map(|p| p as u32);
505    let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
506
507    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
508    for (pack_id, idx) in indexes.iter().enumerate() {
509        let pack_id = u32::try_from(pack_id).map_err(|_| {
510            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
511        })?;
512        let mtime = pack_mtimes[pack_id as usize];
513        for e in &idx.entries {
514            if e.oid.len() != 20 {
515                continue;
516            }
517            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
518                continue;
519            };
520            if let Some(ex) = exclude_oids {
521                if ex.contains(&oid) {
522                    continue;
523                }
524            }
525            let cand = MidxEntry {
526                oid,
527                pack_id,
528                offset: e.offset,
529                pack_mtime: mtime,
530            };
531            match best.get(&oid) {
532                None => {
533                    best.insert(oid, cand);
534                }
535                Some(cur) => {
536                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
537                        best.insert(oid, cand);
538                    }
539                }
540            }
541        }
542    }
543
544    let mut entries: Vec<MidxEntry> = best.into_values().collect();
545    entries.sort_by_key(|a| a.oid);
546
547    // Decide how object offsets are encoded, mirroring git/midx-write.c.
548    // `large_offsets_needed` becomes true only when some offset cannot fit in a
549    // 32-bit field (> 0xffffffff); in that mode every offset that does not fit in
550    // 31 bits (> 0x7fffffff) is stored in the 64-bit large-offset (LOFF) chunk and
551    // its 32-bit slot is `MIDX_LARGE_OFFSET_NEEDED | slot`. When no offset exceeds
552    // 32 bits, offsets in [2^31, 2^32) are written directly as raw 32-bit values
553    // and no LOFF chunk is emitted.
554    let large_offsets_needed = entries.iter().any(|e| e.offset > u64::from(u32::MAX));
555
556    let num_packs = indexes.len() as u32;
557
558    let mut pack_names_blob = Vec::new();
559    for name in idx_names {
560        pack_names_blob.extend_from_slice(name.as_bytes());
561        pack_names_blob.push(0);
562    }
563    let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
564        % MIDX_CHUNK_ALIGNMENT;
565    pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
566    let chunk_pnam = pack_names_blob;
567
568    let mut chunk_oidf = vec![0u8; 256 * 4];
569    let mut j = 0usize;
570    for i in 0..256 {
571        while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
572            j += 1;
573        }
574        chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
575    }
576
577    let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
578    for e in &entries {
579        chunk_oidl.extend_from_slice(e.oid.as_bytes());
580    }
581
582    let mut large_offsets: Vec<u64> = Vec::new();
583    let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
584    for e in &entries {
585        chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
586        let encoded = if large_offsets_needed && e.offset >> 31 != 0 {
587            let slot = u32::try_from(large_offsets.len()).map_err(|_| {
588                Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
589            })?;
590            large_offsets.push(e.offset);
591            MIDX_LARGE_OFFSET_NEEDED | slot
592        } else {
593            // When large offsets are not needed, an offset in [2^31, 2^32) is
594            // written verbatim (truncation via `as u32` is exact here because the
595            // value fits in 32 bits).
596            e.offset as u32
597        };
598        chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
599    }
600
601    let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
602        Vec::new()
603    } else {
604        let mut v = Vec::with_capacity(large_offsets.len() * 8);
605        for off in &large_offsets {
606            v.extend_from_slice(&off.to_be_bytes());
607        }
608        v
609    };
610
611    let pref = preferred_pack_idx;
612    let mut order: Vec<u32> = (0..entries.len() as u32).collect();
613    order.sort_by(|&ai, &bi| {
614        let a = &entries[ai as usize];
615        let b = &entries[bi as usize];
616        let a_pref = pref == Some(a.pack_id);
617        let b_pref = pref == Some(b.pack_id);
618        b_pref
619            .cmp(&a_pref)
620            .then_with(|| a.pack_id.cmp(&b.pack_id))
621            .then_with(|| a.offset.cmp(&b.offset))
622            .then_with(|| ai.cmp(&bi))
623    });
624
625    let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
626    for oid_idx in &order {
627        chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
628    }
629
630    // BTMP: per-pack (bitmap_pos, bitmap_nr) in the pseudo-bitmap namespace, matching Git's
631    // `write_midx_bitmapped_packs` (cumulative start + object count per pack).
632    let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
633        Some(order.clone())
634    } else {
635        None
636    };
637    let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
638        // Per-pack `(bitmap_pos, bitmap_nr)`: position of the pack's first object in
639        // the MIDX pack-order traversal and the number of (deduplicated) MIDX objects
640        // selected from that pack — matching `write_midx_bitmapped_packs` in
641        // git/midx-write.c (counts MIDX entries per pack, not raw idx entry counts).
642        let num_packs_usize = indexes.len();
643        let mut bitmap_pos = vec![u32::MAX; num_packs_usize];
644        let mut bitmap_nr = vec![0u32; num_packs_usize];
645        for (rank, &oid_idx) in order.iter().enumerate() {
646            let pack = entries[oid_idx as usize].pack_id as usize;
647            if let Some(p) = bitmap_pos.get_mut(pack) {
648                if *p == u32::MAX {
649                    *p = rank as u32;
650                }
651            }
652            if let Some(n) = bitmap_nr.get_mut(pack) {
653                *n += 1;
654            }
655        }
656        let mut v = Vec::new();
657        for pack in 0..num_packs_usize {
658            let pos = if bitmap_pos[pack] == u32::MAX {
659                0
660            } else {
661                bitmap_pos[pack]
662            };
663            v.extend_from_slice(&pos.to_be_bytes());
664            v.extend_from_slice(&bitmap_nr[pack].to_be_bytes());
665        }
666        let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
667        v.extend(std::iter::repeat_n(0u8, pad));
668        v
669    } else {
670        Vec::new()
671    };
672
673    let mut chunks: Vec<(u32, Vec<u8>)> = vec![
674        (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
675        (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
676        (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
677        (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
678    ];
679    if !chunk_loff.is_empty() {
680        chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
681    }
682    if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
683        chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
684    }
685    if write_bitmap_placeholders {
686        chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
687    }
688
689    let num_chunks: u8 = chunks
690        .len()
691        .try_into()
692        .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
693
694    let mut body = Vec::new();
695    let mut cur_offset =
696        MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
697
698    for (id, data) in &chunks {
699        body.extend_from_slice(&id.to_be_bytes());
700        body.extend_from_slice(&cur_offset.to_be_bytes());
701        cur_offset += data.len() as u64;
702    }
703    body.extend_from_slice(&0u32.to_be_bytes());
704    body.extend_from_slice(&cur_offset.to_be_bytes());
705
706    for (_, data) in &chunks {
707        body.extend_from_slice(data);
708    }
709
710    let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
711    out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
712    out.push(if version == MIDX_VERSION_V1 {
713        MIDX_VERSION_V1
714    } else {
715        MIDX_VERSION_V2
716    });
717    out.push(hash_version);
718    out.push(num_chunks);
719    out.push(0);
720    out.extend_from_slice(&num_packs.to_be_bytes());
721    out.extend_from_slice(&body);
722
723    let mut hasher = Sha1::new();
724    hasher.update(&out);
725    let hash = hasher.finalize();
726    out.extend_from_slice(&hash);
727
728    Ok((out, rev_sidecar_order))
729}
730
731/// Standalone MIDX `.rev` file (Git `write_rev_file_order` / `RIDX_SIGNATURE`).
732fn write_midx_rev_sidecar(
733    path: &Path,
734    pack_order: &[u32],
735    midx_file_hash: &[u8; 20],
736) -> Result<()> {
737    let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
738    body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
739    body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
740    body.extend_from_slice(&1u32.to_be_bytes());
741    for idx in pack_order {
742        body.extend_from_slice(&idx.to_be_bytes());
743    }
744    body.extend_from_slice(midx_file_hash);
745    fs::write(path, body).map_err(Error::Io)
746}
747
748fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
749    let (hdr, _, _) = parse_midx_header(data)?;
750    let n = hdr.num_chunks as usize;
751    let pos = header_end;
752    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
753    if data.len() < toc_end + 20 {
754        return Err(Error::CorruptObject(
755            "truncated MIDX chunk table".to_owned(),
756        ));
757    }
758    for i in 0..n {
759        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
760        let id = read_be_u32(data, base)?;
761        let off = read_be_u64(data, base + 4)? as usize;
762        if id == chunk_id {
763            let next_off = if i + 1 < n {
764                let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
765                read_be_u64(data, nb + 4)? as usize
766            } else {
767                let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
768                read_be_u64(data, term + 4)? as usize
769            };
770            return Ok((off, next_off.saturating_sub(off)));
771        }
772    }
773    Err(Error::CorruptObject(format!(
774        "MIDX chunk {chunk_id:08x} not found"
775    )))
776}
777
778/// A fatal MIDX parse failure (Git `die()` in `load_multi_pack_index`). The
779/// contained message is the exact text Git prints, without the `error:`/`fatal:`
780/// prefix.
781#[derive(Debug, Clone)]
782pub struct MidxLoadError(pub String);
783
784impl std::fmt::Display for MidxLoadError {
785    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
786        write!(f, "{}", self.0)
787    }
788}
789
790/// Parsed table-of-contents entry: `(chunk_id, file_offset)`.
791struct TocEntry {
792    id: u32,
793    offset: usize,
794}
795
796/// Walk the MIDX chunk table of contents, mirroring `read_table_of_contents`
797/// in `git/chunk-format.c`. Returns the chunk list plus any reported errors,
798/// or a fatal `MidxLoadError` for the conditions Git treats as `die()`-worthy.
799fn parse_midx_toc(
800    data: &[u8],
801    hash_len: usize,
802    errors: &mut Vec<String>,
803) -> std::result::Result<Vec<TocEntry>, MidxLoadError> {
804    if data.len() < MIDX_HEADER_SIZE + hash_len {
805        return Err(MidxLoadError("multi-pack-index file too small".to_owned()));
806    }
807    let num_chunks = data[6] as usize;
808    let toc_off = MIDX_HEADER_SIZE;
809    let needed = toc_off + (num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE;
810    if data.len() < needed {
811        return Err(MidxLoadError(
812            "multi-pack-index chunk table is truncated".to_owned(),
813        ));
814    }
815    let file_size = data.len();
816    let mut chunks: Vec<TocEntry> = Vec::with_capacity(num_chunks);
817
818    let read_be64 = |off: usize| -> u64 {
819        let mut b = [0u8; 8];
820        b.copy_from_slice(&data[off..off + 8]);
821        u64::from_be_bytes(b)
822    };
823    let read_be32 = |off: usize| -> u32 {
824        let mut b = [0u8; 4];
825        b.copy_from_slice(&data[off..off + 4]);
826        u32::from_be_bytes(b)
827    };
828
829    for i in 0..num_chunks {
830        let entry = toc_off + i * CHUNK_TOC_ENTRY_SIZE;
831        let chunk_id = read_be32(entry);
832        let chunk_offset = read_be64(entry + 4);
833
834        if chunk_id == 0 {
835            errors.push("terminating chunk id appears earlier than expected".to_owned());
836            return Err(MidxLoadError(
837                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
838            ));
839        }
840        if !(chunk_offset as usize).is_multiple_of(MIDX_CHUNK_ALIGNMENT) {
841            errors.push(format!(
842                "chunk id {chunk_id:x} not {MIDX_CHUNK_ALIGNMENT}-byte aligned"
843            ));
844            return Err(MidxLoadError(
845                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
846            ));
847        }
848
849        let next_entry = toc_off + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
850        let next_chunk_offset = read_be64(next_entry + 4);
851
852        if next_chunk_offset < chunk_offset
853            || next_chunk_offset > (file_size as u64).saturating_sub(hash_len as u64)
854        {
855            errors.push(format!(
856                "improper chunk offset(s) {chunk_offset:x} and {next_chunk_offset:x}"
857            ));
858            return Err(MidxLoadError(
859                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
860            ));
861        }
862
863        if chunks.iter().any(|c| c.id == chunk_id) {
864            errors.push(format!("duplicate chunk ID {chunk_id:x} found"));
865            return Err(MidxLoadError(
866                "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
867            ));
868        }
869
870        chunks.push(TocEntry {
871            id: chunk_id,
872            offset: chunk_offset as usize,
873        });
874    }
875
876    // Terminating TOC entry must have chunk id 0.
877    let term_entry = toc_off + num_chunks * CHUNK_TOC_ENTRY_SIZE;
878    let final_id = read_be32(term_entry);
879    if final_id != 0 {
880        errors.push(format!("final chunk has non-zero id {final_id:x}"));
881        return Err(MidxLoadError(
882            "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
883        ));
884    }
885
886    Ok(chunks)
887}
888
889/// Look up `(start, len)` of a chunk in a parsed TOC.
890fn toc_chunk_range(chunks: &[TocEntry], data_len: usize, id: u32) -> Option<(usize, usize)> {
891    for (i, c) in chunks.iter().enumerate() {
892        if c.id == id {
893            let next = if i + 1 < chunks.len() {
894                chunks[i + 1].offset
895            } else {
896                data_len.saturating_sub(20)
897            };
898            return Some((c.offset, next.saturating_sub(c.offset)));
899        }
900    }
901    None
902}
903
904/// Full multi-pack-index verification, mirroring `verify_midx_file` in `git/midx.c`
905/// plus the `die()`/`error()` conditions in `load_multi_pack_index`. On any problem
906/// returns the list of error lines (without `error:`/`fatal:` prefixes) in the order
907/// Git emits them; an empty list means the MIDX is valid.
908///
909/// `objects_dir` is the object database (e.g. `.git/objects`).
910pub fn verify_midx(objects_dir: &Path) -> std::result::Result<(), Vec<String>> {
911    let pack_dir = objects_dir.join("pack");
912    let path = match resolve_tip_midx_path(&pack_dir) {
913        Some(p) => p,
914        None => return Ok(()),
915    };
916    let data = match fs::read(&path) {
917        Ok(d) => d,
918        Err(_) => return Ok(()),
919    };
920
921    let mut fatal: Vec<String> = Vec::new();
922    let mut errors: Vec<String> = Vec::new();
923
924    // --- header checks (load_multi_pack_index) ---
925    if data.len() < MIDX_HEADER_SIZE + 20 {
926        return Err(vec!["multi-pack-index file is too small".to_owned()]);
927    }
928    let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
929    if sig != MIDX_SIGNATURE {
930        return Err(vec![format!(
931            "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
932        )]);
933    }
934    let version = data[4];
935    if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
936        return Err(vec![format!(
937            "multi-pack-index version {version} not recognized"
938        )]);
939    }
940    let hash_version = data[5];
941    let expected_hash_version = repo_midx_hash_version_for_objects_dir(objects_dir);
942    if hash_version != expected_hash_version {
943        return Err(vec![format!(
944            "multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
945        )]);
946    }
947    let hash_len = 20usize;
948    let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
949
950    // --- table of contents ---
951    let chunks = match parse_midx_toc(&data, hash_len, &mut errors) {
952        Ok(c) => c,
953        Err(e) => {
954            errors.push(e.0);
955            return Err(errors);
956        }
957    };
958
959    // required pack-names chunk
960    let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
961    else {
962        errors.push("multi-pack-index required pack-name chunk missing or corrupted".to_owned());
963        return Err(errors);
964    };
965
966    // oid-fanout chunk + ordering check
967    let Some((fan_off, fan_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
968    else {
969        errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
970        return Err(errors);
971    };
972    if fan_len != 256 * 4 {
973        errors.push("multi-pack-index OID fanout is of the wrong size".to_owned());
974        errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
975        return Err(errors);
976    }
977    let fanout = |i: usize| -> u32 {
978        let b = fan_off + i * 4;
979        u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
980    };
981    for i in 0..255 {
982        let f1 = fanout(i);
983        let f2 = fanout(i + 1);
984        if f1 > f2 {
985            errors.push(format!(
986                "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
987                i + 1
988            ));
989            errors
990                .push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
991            return Err(errors);
992        }
993    }
994    let num_objects = fanout(255) as usize;
995
996    // oid-lookup chunk (size depends on num_objects)
997    let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
998    else {
999        errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1000        return Err(errors);
1001    };
1002    if oidl_len != hash_len * num_objects {
1003        errors.push("multi-pack-index OID lookup chunk is the wrong size".to_owned());
1004        errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1005        return Err(errors);
1006    }
1007
1008    // object-offsets chunk
1009    let Some((ooff_off, ooff_len)) =
1010        toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1011    else {
1012        errors
1013            .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1014        return Err(errors);
1015    };
1016    if ooff_len != num_objects * 8 {
1017        errors.push("multi-pack-index object offset chunk is the wrong size".to_owned());
1018        errors
1019            .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1020        return Err(errors);
1021    }
1022
1023    let large_off = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1024
1025    // pack names: parse and (for V1) verify ordering.
1026    let names = match parse_pack_names_blob(&data[pn_off..pn_off + pn_len]) {
1027        Ok(n) => n,
1028        Err(_) => {
1029            errors.push("multi-pack-index pack-name chunk is too short".to_owned());
1030            return Err(errors);
1031        }
1032    };
1033    if version == MIDX_VERSION_V1 {
1034        for i in 1..names.len() {
1035            if names[i] <= names[i - 1] {
1036                fatal.push(format!(
1037                    "multi-pack-index pack names out of order: '{}' before '{}'",
1038                    names[i - 1],
1039                    names[i]
1040                ));
1041                // Git die()s here while loading; surface immediately.
1042                errors.extend(fatal);
1043                return Err(errors);
1044            }
1045        }
1046    }
1047
1048    // --- checksum ---
1049    if !midx_checksum_is_valid(&data) {
1050        errors.push("incorrect checksum".to_owned());
1051    }
1052
1053    // --- load each referenced pack (failed to load pack) ---
1054    let mut pack_indexes: Vec<Option<PackIndex>> = Vec::with_capacity(num_packs);
1055    for i in 0..num_packs {
1056        // Load the pack idx without verifying its trailing checksum: `git
1057        // multi-pack-index verify` uses `open_pack_index`, which only parses the
1058        // index header/tables. The 64-bit-offset tests deliberately corrupt a
1059        // pack `.idx` (invalidating its checksum) and still expect the MIDX
1060        // verify to read recorded offsets out of that idx for comparison.
1061        let loaded = match names.get(i) {
1062            Some(name) => read_pack_index_no_verify(&pack_dir.join(name)).ok(),
1063            None => None,
1064        };
1065        if loaded.is_none() {
1066            errors.push(format!("failed to load pack in position {i}"));
1067        }
1068        pack_indexes.push(loaded);
1069    }
1070
1071    if num_objects == 0 {
1072        errors.push("the midx contains no oid".to_owned());
1073        if errors.is_empty() {
1074            return Ok(());
1075        }
1076        return Err(errors);
1077    }
1078
1079    // --- OID lookup order ---
1080    let oid_at =
1081        |i: usize| -> &[u8] { &data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len] };
1082    for i in 0..num_objects.saturating_sub(1) {
1083        let a = oid_at(i);
1084        let b = oid_at(i + 1);
1085        if a >= b {
1086            errors.push(format!(
1087                "oid lookup out of order: oid[{i}] = {} >= {} = oid[{}]",
1088                hex::encode(a),
1089                hex::encode(b),
1090                i + 1
1091            ));
1092        }
1093    }
1094
1095    // --- object offsets vs pack index ---
1096    for i in 0..num_objects {
1097        let ob = ooff_off + i * 8;
1098        let pack_int_id = u32::from_be_bytes([data[ob], data[ob + 1], data[ob + 2], data[ob + 3]]);
1099        let off_raw = u32::from_be_bytes([data[ob + 4], data[ob + 5], data[ob + 6], data[ob + 7]]);
1100        let oid_hex = hex::encode(oid_at(i));
1101
1102        if pack_int_id as usize >= num_packs {
1103            errors.push(format!(
1104                "bad pack-int-id: {pack_int_id} ({num_packs} total packs)"
1105            ));
1106            errors.push(format!(
1107                "failed to load pack entry for oid[{i}] = {oid_hex}"
1108            ));
1109            continue;
1110        }
1111
1112        // resolve MIDX-recorded offset (handle large offsets)
1113        let m_offset: u64 = if off_raw & MIDX_LARGE_OFFSET_NEEDED != 0 {
1114            let slot = (off_raw & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1115            match large_off {
1116                Some((lo_off, lo_len)) if (slot + 1) * 8 <= lo_len => {
1117                    let b = lo_off + slot * 8;
1118                    let mut arr = [0u8; 8];
1119                    arr.copy_from_slice(&data[b..b + 8]);
1120                    u64::from_be_bytes(arr)
1121                }
1122                _ => {
1123                    errors.push("multi-pack-index large offset out of bounds".to_owned());
1124                    continue;
1125                }
1126            }
1127        } else {
1128            u64::from(off_raw)
1129        };
1130
1131        let Some(Some(idx)) = pack_indexes.get(pack_int_id as usize) else {
1132            errors.push(format!(
1133                "failed to load pack entry for oid[{i}] = {oid_hex}"
1134            ));
1135            continue;
1136        };
1137        let Ok(oid) = ObjectId::from_bytes(oid_at(i)) else {
1138            errors.push(format!(
1139                "failed to load pack entry for oid[{i}] = {oid_hex}"
1140            ));
1141            continue;
1142        };
1143        match idx.find_offset(&oid) {
1144            Some(p_offset) => {
1145                if m_offset != p_offset {
1146                    errors.push(format!(
1147                        "incorrect object offset for oid[{i}] = {oid_hex}: {m_offset:x} != {p_offset:x}"
1148                    ));
1149                }
1150            }
1151            None => {
1152                errors.push(format!(
1153                    "failed to load pack entry for oid[{i}] = {oid_hex}"
1154                ));
1155            }
1156        }
1157    }
1158
1159    if errors.is_empty() {
1160        Ok(())
1161    } else {
1162        Err(errors)
1163    }
1164}
1165
1166/// Validate the trailing SHA-1 of an in-memory MIDX image.
1167fn midx_checksum_is_valid(data: &[u8]) -> bool {
1168    if data.len() < 20 {
1169        return false;
1170    }
1171    let body = &data[..data.len() - 20];
1172    let stored = &data[data.len() - 20..];
1173    let mut hasher = Sha1::new();
1174    hasher.update(body);
1175    let got = hasher.finalize();
1176    got.as_slice() == stored
1177}
1178
1179/// Return the `pack-*.idx` basename for the MIDX preferred pack (RIDX position 0).
1180///
1181/// `objects_dir` is the repository object database (e.g. `.git/objects`), not `objects/pack`.
1182///
1183/// Used by `test-tool read-midx --preferred-pack` compatibility.
1184/// Pack index basenames (`pack-*.idx`) stored in the MIDX pack-names chunk.
1185pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
1186    let pack_dir = objects_dir.join("pack");
1187    let path = resolve_tip_midx_path(&pack_dir)
1188        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1189    let data = fs::read(&path).map_err(Error::Io)?;
1190    let (_, hdr_end, _) = parse_midx_header(&data)?;
1191    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1192    parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
1193}
1194
1195/// A single MIDX-referenced object together with the pack it is attributed to.
1196pub struct MidxObjectRef {
1197    pub oid: ObjectId,
1198    /// Index into the pack-names list returned alongside this.
1199    pub pack_int_id: usize,
1200}
1201
1202/// Read the tip MIDX and return `(pack_names, objects)`, where each object names
1203/// the pack it is attributed to (`pack_int_id`). Mirrors the per-object
1204/// `nth_midxed_pack_int_id` iteration in Git used by expire/repack.
1205pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
1206    let pack_dir = objects_dir.join("pack");
1207    let path = resolve_tip_midx_path(&pack_dir)
1208        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1209    let data = fs::read(&path).map_err(Error::Io)?;
1210    let (_, hdr_end, _) = parse_midx_header(&data)?;
1211    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1212    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1213    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1214    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1215    if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
1216        return Err(Error::CorruptObject(
1217            "bad MIDX oid-lookup / object-offsets size".to_owned(),
1218        ));
1219    }
1220    let num = oidl_len / 20;
1221    if num * 8 != ooff_len {
1222        return Err(Error::CorruptObject(
1223            "MIDX oid count does not match object-offsets".to_owned(),
1224        ));
1225    }
1226    let mut objects = Vec::with_capacity(num);
1227    for i in 0..num {
1228        let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
1229            .map_err(|e| Error::CorruptObject(e.to_string()))?;
1230        let base = ooff_off + i * 8;
1231        let pack_id = read_be_u32(&data, base)? as usize;
1232        objects.push(MidxObjectRef {
1233            oid,
1234            pack_int_id: pack_id,
1235        });
1236    }
1237    Ok((names, objects))
1238}
1239
1240/// Trailing 40-character SHA-1 hex of the active MIDX (root or chain tip).
1241pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
1242    let pack_dir = objects_dir.join("pack");
1243    let path = resolve_tip_midx_path(&pack_dir)
1244        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1245    midx_checksum_hex_from_path(&path)
1246}
1247
1248/// Resolve the MIDX file to read for `test-tool read-midx`: a specific layer when
1249/// `checksum` is `Some`, otherwise the chain tip / root MIDX. A checksum that does
1250/// not name any layer yields a `could not find MIDX with checksum` error matching
1251/// git's `test-read-midx.c`.
1252fn resolve_read_midx_path(pack_dir: &Path, checksum: Option<&str>) -> Result<std::path::PathBuf> {
1253    match checksum {
1254        Some(cs) => resolve_midx_layer_path(pack_dir, cs)
1255            .ok_or_else(|| Error::CorruptObject(format!("could not find MIDX with checksum {cs}"))),
1256        None => resolve_tip_midx_path(pack_dir)
1257            .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned())),
1258    }
1259}
1260
1261/// Human-readable dump of the MIDX (matches `test-tool read-midx` layout closely enough for grep-based tests).
1262/// Emit one line per MIDX object: `{oid} {offset}\t{pack-idx-name}` (matches Git `test-read-midx.c`).
1263pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
1264    format_midx_show_objects_layer(objects_dir, None)
1265}
1266
1267/// Like [`format_midx_show_objects`] but reads a specific layer by checksum.
1268pub fn format_midx_show_objects_layer(
1269    objects_dir: &Path,
1270    checksum: Option<&str>,
1271) -> Result<String> {
1272    let mut out = format_midx_dump_layer(objects_dir, checksum)?;
1273    let pack_dir = objects_dir.join("pack");
1274    let path = resolve_read_midx_path(&pack_dir, checksum)?;
1275    let data = fs::read(&path).map_err(Error::Io)?;
1276    let (_, hdr_end, _) = parse_midx_header(&data)?;
1277    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1278    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1279    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1280    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1281    if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
1282        return Err(Error::CorruptObject(
1283            "bad MIDX oid-lookup / object-offsets size".to_owned(),
1284        ));
1285    }
1286    let num = oidl_len / 20;
1287    if num * 8 != ooff_len {
1288        return Err(Error::CorruptObject(
1289            "MIDX oid count does not match object-offsets".to_owned(),
1290        ));
1291    }
1292    for i in 0..num {
1293        let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
1294            .map_err(|e| Error::CorruptObject(e.to_string()))?;
1295        let base = ooff_off + i * 8;
1296        let pack_id = read_be_u32(&data, base)? as usize;
1297        let offset = u64::from(read_be_u32(&data, base + 4)?);
1298        let idx_name = names
1299            .get(pack_id)
1300            .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
1301        // Match `test-read-midx.c`, which prints `e.p->pack_name`: the full pack
1302        // path `<object-dir>/pack/<stem>.pack`. A relative object dir gets a `./`
1303        // prefix (Git `relative_path`).
1304        let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1305        let dir_disp = objects_dir.display().to_string();
1306        let dir_disp = if objects_dir.is_absolute() || dir_disp.starts_with("./") {
1307            dir_disp
1308        } else {
1309            format!("./{dir_disp}")
1310        };
1311        out.push_str(&format!(
1312            "{} {}\t{}/pack/{}.pack\n",
1313            oid.to_hex(),
1314            offset,
1315            dir_disp,
1316            stem
1317        ));
1318    }
1319    Ok(out)
1320}
1321
1322pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
1323    format_midx_dump_layer(objects_dir, None)
1324}
1325
1326/// Like [`format_midx_dump`] but reads a specific layer by checksum (chain layer or
1327/// root MIDX). Used by `test-tool read-midx <object-dir> <checksum>`.
1328pub fn format_midx_dump_layer(objects_dir: &Path, checksum: Option<&str>) -> Result<String> {
1329    let pack_dir = objects_dir.join("pack");
1330    let path = resolve_read_midx_path(&pack_dir, checksum)?;
1331    let data = fs::read(&path).map_err(Error::Io)?;
1332    let (hdr, hdr_end, _) = parse_midx_header(&data)?;
1333    let sig = read_be_u32(&data, 0)?;
1334    let version = data[4];
1335    // The C `read-midx` test tool prints `m->hash_len`, the raw hash length
1336    // (20 for SHA-1, 32 for SHA-256), not the on-disk hash-version byte.
1337    let hash_len: u8 = match data[5] {
1338        1 => 20,
1339        2 => 32,
1340        other => other,
1341    };
1342    let num_chunks = hdr.num_chunks;
1343    let num_packs = read_be_u32(&data, 8)?;
1344
1345    let mut chunk_tags: Vec<&'static str> = Vec::new();
1346    let n = num_chunks as usize;
1347    let pos = hdr_end;
1348    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
1349    if data.len() < toc_end + 20 {
1350        return Err(Error::CorruptObject(
1351            "truncated MIDX chunk table".to_owned(),
1352        ));
1353    }
1354    for i in 0..n {
1355        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
1356        let id = read_be_u32(&data, base)?;
1357        let tag = match id {
1358            x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
1359            x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
1360            x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
1361            x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
1362            x if x == MIDX_CHUNKID_LARGEOFFSETS => "large-offsets",
1363            x if x == MIDX_CHUNKID_REVINDEX => "revindex",
1364            x if x == 0x4254_4d50 => "bitmapped-packs",
1365            _ => "unknown",
1366        };
1367        chunk_tags.push(tag);
1368    }
1369
1370    let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1371    let num_objects = ooff_len / 8;
1372
1373    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1374    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1375
1376    let mut out = String::new();
1377    out.push_str(&format!(
1378        "header: {:08x} {} {} {} {}\n",
1379        sig, version, hash_len, num_chunks, num_packs
1380    ));
1381    out.push_str("chunks:");
1382    for t in &chunk_tags {
1383        out.push(' ');
1384        out.push_str(t);
1385    }
1386    out.push('\n');
1387    out.push_str(&format!("num_objects: {num_objects}\n"));
1388    out.push_str("packs:\n");
1389    for n in &pack_names {
1390        out.push_str(n);
1391        out.push('\n');
1392    }
1393    out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
1394    Ok(out)
1395}
1396
1397/// OID rows from the active multi-pack-index, plus reverse-index order for pack-reuse bitmap bits.
1398///
1399/// Git assigns each object a **global bitmap bit** equal to its position in the MIDX reverse index
1400/// (`RIDX` chunk) traversal order — not its position in the pack `.idx` file. Helpers on this struct
1401/// map [`ObjectId`] → global bit the same way as `midx-write.c` (`midx_pack_order`).
1402#[derive(Debug, Clone)]
1403pub struct MidxReuseTables {
1404    /// OIDs in MIDX lexicographic order (same order as the OID lookup chunk).
1405    pub oids: Vec<ObjectId>,
1406    /// `(pack_int_id, in-pack offset)` parallel to `oids`.
1407    pub pack_and_offset: Vec<(u32, u64)>,
1408    /// `rid_order[rank]` is the OID-table index of the object at global bitmap rank `rank`.
1409    pub rid_order: Vec<u32>,
1410    /// Inverse map: global bitmap rank for each OID-table index.
1411    pub oid_idx_to_rank: Vec<u32>,
1412}
1413
1414/// Load OID / object-offset / reverse-index tables from the tip MIDX (root or chain tip).
1415///
1416/// Returns [`None`] when there is no MIDX or no `RIDX` chunk (no pseudo-bitmap ordering).
1417pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
1418    let pack_dir = objects_dir.join("pack");
1419    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1420        return Ok(None);
1421    };
1422    let data = fs::read(&path).map_err(Error::Io)?;
1423    let (_, hdr_end, _) = parse_midx_header(&data)?;
1424    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1425    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1426    let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
1427        return Ok(None);
1428    };
1429    if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
1430        return Err(Error::CorruptObject(
1431            "MIDX OID / offset chunk size mismatch".to_owned(),
1432        ));
1433    }
1434    let num_objects = oid_l_len / 20;
1435    if ridx_len != num_objects.saturating_mul(4) {
1436        return Err(Error::CorruptObject(
1437            "MIDX reverse index length does not match object count".to_owned(),
1438        ));
1439    }
1440    if num_objects == 0 {
1441        return Ok(None);
1442    }
1443
1444    let mut oids = Vec::with_capacity(num_objects);
1445    for i in 0..num_objects {
1446        let base = oidl_off + i * 20;
1447        oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
1448    }
1449
1450    let mut pack_and_offset = Vec::with_capacity(num_objects);
1451    for i in 0..num_objects {
1452        let ob = ooff_off + i * 8;
1453        let pack_id = read_be_u32(&data, ob)?;
1454        let off32 = read_be_u32(&data, ob + 4)?;
1455        pack_and_offset.push((pack_id, u64::from(off32)));
1456    }
1457
1458    let mut rid_order = Vec::with_capacity(num_objects);
1459    for i in 0..num_objects {
1460        let base = ridx_off + i * 4;
1461        rid_order.push(read_be_u32(&data, base)?);
1462    }
1463
1464    let mut oid_idx_to_rank = vec![0u32; num_objects];
1465    for (rank, &oid_idx) in rid_order.iter().enumerate() {
1466        let idx = usize::try_from(oid_idx)
1467            .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
1468        if idx >= num_objects {
1469            return Err(Error::CorruptObject(
1470                "MIDX reverse index out of range".to_owned(),
1471            ));
1472        }
1473        oid_idx_to_rank[idx] = u32::try_from(rank)
1474            .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
1475    }
1476
1477    Ok(Some(MidxReuseTables {
1478        oids,
1479        pack_and_offset,
1480        rid_order,
1481        oid_idx_to_rank,
1482    }))
1483}
1484
1485impl MidxReuseTables {
1486    /// Global pseudo-bitmap index for `oid`, or [`None`] if the object is not in this MIDX.
1487    #[must_use]
1488    pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
1489        let oid_idx = self.oids.binary_search(oid).ok()?;
1490        Some(self.oid_idx_to_rank[oid_idx])
1491    }
1492
1493    /// MIDX-canonical pack id for `oid` (the single copy the MIDX selected after deduplication),
1494    /// or [`None`] if the object is not in this MIDX. Used to reject cross-pack delta reuse: a
1495    /// delta is only reusable verbatim when its base resolves to the *same* pack the delta lives
1496    /// in, mirroring Git's `midx_pair_to_pack_pos` check in `try_partial_reuse`.
1497    #[must_use]
1498    pub fn canonical_pack(&self, oid: &ObjectId) -> Option<u32> {
1499        let oid_idx = self.oids.binary_search(oid).ok()?;
1500        Some(self.pack_and_offset[oid_idx].0)
1501    }
1502}
1503
1504/// One pack's slice of the MIDX pseudo-bitmap namespace (`BTMP` chunk).
1505#[derive(Debug, Clone, Copy)]
1506pub struct MidxBtmpPackRange {
1507    /// Pack index in the MIDX pack-names list.
1508    pub pack_id: u32,
1509    /// First bit index assigned to this pack (cumulative object order).
1510    pub bitmap_pos: u32,
1511    /// Number of objects in this pack (same as `.idx` entry count).
1512    pub bitmap_nr: u32,
1513}
1514
1515/// Read per-pack `(bitmap_pos, bitmap_nr)` from the active MIDX `BTMP` chunk.
1516///
1517/// Returns an empty vector when the MIDX has no bitmapped-packs chunk.
1518pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
1519    let pack_dir = objects_dir.join("pack");
1520    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1521        return Ok(Vec::new());
1522    };
1523    let data = fs::read(&path).map_err(Error::Io)?;
1524    let (_, hdr_end, _) = parse_midx_header(&data)?;
1525    let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1526        return Ok(Vec::new());
1527    };
1528    if btmp_len == 0 || btmp_len % 8 != 0 {
1529        return Err(Error::CorruptObject(
1530            "invalid MIDX BTMP chunk length".to_owned(),
1531        ));
1532    }
1533    let num_packs = read_be_u32(&data, 8)?;
1534    let n_entries = btmp_len / 8;
1535    if u32::try_from(n_entries).ok() != Some(num_packs) {
1536        return Err(Error::CorruptObject(
1537            "MIDX BTMP entry count does not match num_packs".to_owned(),
1538        ));
1539    }
1540    let mut out = Vec::with_capacity(n_entries);
1541    for i in 0..n_entries {
1542        let base = btmp_off + i * 8;
1543        let bitmap_pos = read_be_u32(&data, base)?;
1544        let bitmap_nr = read_be_u32(&data, base + 4)?;
1545        out.push(MidxBtmpPackRange {
1546            pack_id: u32::try_from(i)
1547                .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
1548            bitmap_pos,
1549            bitmap_nr,
1550        });
1551    }
1552    Ok(out)
1553}
1554
1555/// Format `test-tool read-midx --bitmap` output for the active MIDX: per pack, a
1556/// line with `<pack>.pack`, then `  bitmap_pos:` and `  bitmap_nr:`. Returns an
1557/// error whose message is `MIDX does not contain the BTMP chunk` when the MIDX has
1558/// no `BTMP` chunk (mirrors `nth_bitmapped_pack` in git/midx.c).
1559pub fn format_midx_bitmapped_packs(objects_dir: &Path) -> Result<String> {
1560    let pack_dir = objects_dir.join("pack");
1561    let path = resolve_tip_midx_path(&pack_dir)
1562        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1563    let data = fs::read(&path).map_err(Error::Io)?;
1564    let (_, hdr_end, _) = parse_midx_header(&data)?;
1565    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1566    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1567    let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1568        return Err(Error::CorruptObject(
1569            "MIDX does not contain the BTMP chunk".to_owned(),
1570        ));
1571    };
1572    let n_entries = btmp_len / 8;
1573    let mut out = String::new();
1574    for i in 0..n_entries {
1575        let base = btmp_off + i * 8;
1576        let bitmap_pos = read_be_u32(&data, base)?;
1577        let bitmap_nr = read_be_u32(&data, base + 4)?;
1578        let idx_name = names.get(i).ok_or_else(|| {
1579            Error::CorruptObject("BTMP entry has no corresponding pack name".to_owned())
1580        })?;
1581        let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1582        out.push_str(&format!("{stem}.pack\n"));
1583        out.push_str(&format!("  bitmap_pos: {bitmap_pos}\n"));
1584        out.push_str(&format!("  bitmap_nr: {bitmap_nr}\n"));
1585    }
1586    Ok(out)
1587}
1588
1589/// Look up which pack and in-pack offset holds `oid` according to the active MIDX.
1590pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
1591    let pack_dir = objects_dir.join("pack");
1592    let path = resolve_tip_midx_path(&pack_dir)
1593        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1594    let data = fs::read(&path).map_err(Error::Io)?;
1595    let (_, hdr_end, _) = parse_midx_header(&data)?;
1596    let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1597    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1598    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1599    if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
1600        return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
1601    }
1602    let num_objects = oid_l_len / 20;
1603    let first = oid.as_bytes()[0] as usize;
1604    let j0 = if first == 0 {
1605        0usize
1606    } else {
1607        read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
1608    };
1609    let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
1610    let mut lo = j0;
1611    let mut hi = j1;
1612    while lo < hi {
1613        let mid = (lo + hi) / 2;
1614        let base = oidl_off + mid * 20;
1615        let cmp = data[base..base + 20].cmp(oid.as_bytes());
1616        if cmp == std::cmp::Ordering::Less {
1617            lo = mid + 1;
1618        } else {
1619            hi = mid;
1620        }
1621    }
1622    if lo >= num_objects {
1623        return Err(Error::CorruptObject(format!(
1624            "object {} not in multi-pack-index",
1625            oid.to_hex()
1626        )));
1627    }
1628    let base = oidl_off + lo * 20;
1629    if data[base..base + 20] != *oid.as_bytes() {
1630        return Err(Error::CorruptObject(format!(
1631            "object {} not in multi-pack-index",
1632            oid.to_hex()
1633        )));
1634    }
1635    let ob = ooff_off + lo * 8;
1636    let pack_id = read_be_u32(&data, ob)?;
1637    let off32 = read_be_u32(&data, ob + 4)?;
1638    Ok((pack_id, u64::from(off32)))
1639}
1640
1641/// Returns whether `oid` appears in the active MIDX OID table for `objects_dir`.
1642///
1643/// [`None`] means there is no MIDX at the pack tip. [`Some`] is the lookup result when a MIDX exists.
1644pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
1645    let pack_dir = objects_dir.join("pack");
1646    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1647        return Ok(None);
1648    };
1649    let data = fs::read(&midx_path).map_err(Error::Io)?;
1650    let MidxReadView {
1651        oidf_off,
1652        oidl_off,
1653        num_objects,
1654        ..
1655    } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1656        MidxLoadResult::Ok(v) => v,
1657        MidxLoadResult::Skip => return Ok(None),
1658    };
1659
1660    let first = oid.as_bytes()[0] as usize;
1661    let lo = if first == 0 {
1662        0u32
1663    } else {
1664        read_be_u32(&data, oidf_off + (first - 1) * 4)?
1665    };
1666    let hi = read_be_u32(&data, oidf_off + first * 4)?;
1667
1668    let mut i = lo as usize;
1669    while i < hi as usize && i < num_objects {
1670        let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1671        match o.cmp(oid) {
1672            std::cmp::Ordering::Equal => return Ok(Some(true)),
1673            std::cmp::Ordering::Greater => return Ok(Some(false)),
1674            std::cmp::Ordering::Less => i += 1,
1675        }
1676    }
1677    Ok(Some(false))
1678}
1679
1680/// Chunk offsets and metadata of a successfully loaded MIDX, ready for object reads.
1681struct MidxReadView {
1682    oidf_off: usize,
1683    oidl_off: usize,
1684    ooff_off: usize,
1685    loff: Option<(usize, usize)>,
1686    num_objects: usize,
1687    pack_names: Vec<String>,
1688}
1689
1690enum MidxLoadResult {
1691    Ok(MidxReadView),
1692    /// The MIDX is unusable but not fatal (Git returns NULL and falls back to packs);
1693    /// an `error:`/`warning:` line has already been printed.
1694    Skip,
1695}
1696
1697/// Print a recoverable MIDX `error:`/`warning:` line at most once per process.
1698///
1699/// Git loads the MIDX once and caches it, so a recoverable corruption is reported a
1700/// single time. grit re-reads the MIDX per object lookup, so without deduping the same
1701/// line would repeat; this guard restores the single-report behavior the tests expect.
1702fn midx_warn_once(line: &str) {
1703    use std::sync::Mutex;
1704    use std::sync::OnceLock;
1705    static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
1706    let seen = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
1707    if let Ok(mut set) = seen.lock() {
1708        if set.insert(line.to_string()) {
1709            eprintln!("{line}");
1710        }
1711    } else {
1712        eprintln!("{line}");
1713    }
1714}
1715
1716/// Print Git-style `error:`/`fatal:` lines and exit 128, mirroring `die()` after the
1717/// preceding `error()` calls. `lines` are printed as `error:` except the last as `fatal:`.
1718fn midx_die(lines: &[&str]) -> ! {
1719    use std::io::Write;
1720    let mut err = std::io::stderr().lock();
1721    let n = lines.len();
1722    for (i, l) in lines.iter().enumerate() {
1723        if i + 1 == n {
1724            let _ = writeln!(err, "fatal: {l}");
1725        } else {
1726            let _ = writeln!(err, "error: {l}");
1727        }
1728    }
1729    let _ = err.flush();
1730    std::process::exit(128);
1731}
1732
1733/// Validate and load a MIDX image for object reads, mirroring `load_multi_pack_index`
1734/// in git/midx.c. Fatal corruptions print `error:`/`fatal:` and exit (Git `die()`);
1735/// recoverable corruptions print an `error:`/`warning:` and return [`MidxLoadResult::Skip`].
1736fn midx_load_for_read(data: &[u8], expected_hash_version: u8) -> MidxLoadResult {
1737    if data.len() < MIDX_HEADER_SIZE + 20 {
1738        return MidxLoadResult::Skip;
1739    }
1740    let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
1741    if sig != MIDX_SIGNATURE {
1742        midx_die(&[&format!(
1743            "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
1744        )]);
1745    }
1746    let version = data[4];
1747    if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
1748        midx_die(&[&format!(
1749            "multi-pack-index version {version} not recognized"
1750        )]);
1751    }
1752    let hash_version = data[5];
1753    if hash_version != expected_hash_version {
1754        // `load_multi_pack_index` error()s then `goto cleanup_fail` (returns NULL),
1755        // so this is recoverable, not fatal. The expected version is the repository's
1756        // own `oid_version(hash_algo)` (SHA-1 → 1, SHA-256 → 2).
1757        midx_warn_once(&format!(
1758            "error: multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
1759        ));
1760        return MidxLoadResult::Skip;
1761    }
1762    let hash_len = 20usize;
1763    let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1764
1765    // Table of contents (chunk-format.c read_table_of_contents). Recoverable failures
1766    // (unaligned / improper offset / duplicate / non-zero terminator) print error() and
1767    // return NULL.
1768    let mut toc_errors: Vec<String> = Vec::new();
1769    let chunks = match parse_midx_toc(data, hash_len, &mut toc_errors) {
1770        Ok(c) => c,
1771        Err(_) => {
1772            for e in &toc_errors {
1773                midx_warn_once(&format!("error: {e}"));
1774            }
1775            return MidxLoadResult::Skip;
1776        }
1777    };
1778
1779    // Required pack-names chunk.
1780    let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
1781    else {
1782        midx_die(&["multi-pack-index required pack-name chunk missing or corrupted"]);
1783    };
1784
1785    // Required oid-fanout chunk + size + ordering (midx_read_oid_fanout).
1786    let Some((oidf_off, oidf_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
1787    else {
1788        midx_die(&["multi-pack-index required OID fanout chunk missing or corrupted"]);
1789    };
1790    if oidf_len != 256 * 4 {
1791        midx_die(&[
1792            "multi-pack-index OID fanout is of the wrong size",
1793            "multi-pack-index required OID fanout chunk missing or corrupted",
1794        ]);
1795    }
1796    let fanout = |i: usize| -> u32 {
1797        let b = oidf_off + i * 4;
1798        u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
1799    };
1800    for i in 0..255 {
1801        let f1 = fanout(i);
1802        let f2 = fanout(i + 1);
1803        if f1 > f2 {
1804            midx_die(&[
1805                &format!(
1806                    "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
1807                    i + 1
1808                ),
1809                "multi-pack-index required OID fanout chunk missing or corrupted",
1810            ]);
1811        }
1812    }
1813    let num_objects = fanout(255) as usize;
1814
1815    // Required oid-lookup chunk + size (midx_read_oid_lookup).
1816    let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
1817    else {
1818        midx_die(&["multi-pack-index required OID lookup chunk missing or corrupted"]);
1819    };
1820    if oidl_len != hash_len * num_objects {
1821        midx_die(&[
1822            "multi-pack-index OID lookup chunk is the wrong size",
1823            "multi-pack-index required OID lookup chunk missing or corrupted",
1824        ]);
1825    }
1826
1827    // Required object-offsets chunk + size (midx_read_object_offsets).
1828    let Some((ooff_off, ooff_len)) =
1829        toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1830    else {
1831        midx_die(&["multi-pack-index required object offsets chunk missing or corrupted"]);
1832    };
1833    if ooff_len != num_objects * 8 {
1834        midx_die(&[
1835            "multi-pack-index object offset chunk is the wrong size",
1836            "multi-pack-index required object offsets chunk missing or corrupted",
1837        ]);
1838    }
1839
1840    let loff = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1841
1842    // Optional revindex chunk — wrong size warns but does not fail the load.
1843    if let Some((_, rlen)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_REVINDEX) {
1844        if rlen != num_objects * 4 {
1845            midx_warn_once("error: multi-pack-index reverse-index chunk is the wrong size");
1846            midx_warn_once("warning: multi-pack bitmap is missing required reverse index");
1847        }
1848    }
1849
1850    // Pack-name parsing (die if a name is unterminated).
1851    let mut pack_names: Vec<String> = Vec::with_capacity(num_packs);
1852    let blob = &data[pn_off..pn_off + pn_len];
1853    let mut start = 0usize;
1854    for _ in 0..num_packs {
1855        let Some(rel) = blob[start..].iter().position(|&b| b == 0) else {
1856            midx_die(&["multi-pack-index pack-name chunk is too short"]);
1857        };
1858        let name = match std::str::from_utf8(&blob[start..start + rel]) {
1859            Ok(s) => s.to_string(),
1860            Err(_) => midx_die(&["multi-pack-index pack-name chunk is too short"]),
1861        };
1862        if version == MIDX_VERSION_V1
1863            && !pack_names.is_empty()
1864            && name.as_str() <= pack_names.last().map(|s| s.as_str()).unwrap_or("")
1865        {
1866            midx_die(&[&format!(
1867                "multi-pack-index pack names out of order: '{}' before '{name}'",
1868                pack_names.last().cloned().unwrap_or_default()
1869            )]);
1870        }
1871        pack_names.push(name);
1872        start += rel + 1;
1873    }
1874
1875    MidxLoadResult::Ok(MidxReadView {
1876        oidf_off,
1877        oidl_off,
1878        ooff_off,
1879        loff,
1880        num_objects,
1881        pack_names,
1882    })
1883}
1884
1885/// Eagerly validate that every pack named by the active MIDX has a readable `.idx`.
1886///
1887/// Mirrors git/packfile.c `open_pack_index`: when `prepare_packed_git` registers the
1888/// packs the MIDX references, a pack whose `.idx` cannot be opened (truncated/corrupt)
1889/// triggers `error: packfile <pack> index unavailable`. Git reports this once because the
1890/// MIDX/pack store is prepared a single time; this routine reproduces that even when the
1891/// object that triggered the read is found loose (so it never reaches the per-object MIDX
1892/// lookup). Runs at most once per process per `objects_dir`.
1893pub fn validate_midx_referenced_packs(objects_dir: &Path) {
1894    use std::sync::Mutex;
1895    use std::sync::OnceLock;
1896    static DONE: OnceLock<Mutex<HashSet<std::path::PathBuf>>> = OnceLock::new();
1897    let done = DONE.get_or_init(|| Mutex::new(HashSet::new()));
1898    if let Ok(mut set) = done.lock() {
1899        if !set.insert(objects_dir.to_path_buf()) {
1900            return;
1901        }
1902    }
1903
1904    let pack_dir = objects_dir.join("pack");
1905    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1906        return;
1907    };
1908    let Ok(data) = fs::read(&midx_path) else {
1909        return;
1910    };
1911    let MidxReadView { pack_names, .. } =
1912        match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1913            MidxLoadResult::Ok(v) => v,
1914            MidxLoadResult::Skip => return,
1915        };
1916    for idx_name in &pack_names {
1917        let idx_path = pack_dir.join(idx_name);
1918        // A MIDX may name a pack whose files were later deleted; Git skips the missing
1919        // pack silently (it is not "unavailable", just gone). Only a present-but-corrupt
1920        // idx produces the "index unavailable" error.
1921        if !idx_path.exists() {
1922            continue;
1923        }
1924        // Match Git's `open_pack_index`, which parses the idx header/tables but does
1925        // not verify the trailing checksum: a structurally valid idx with a stale
1926        // checksum (the 64-bit-offset tests corrupt one offset byte in place) loads
1927        // fine and must NOT be reported "unavailable". Only an unparseable idx
1928        // (e.g. truncated, as in `corrupt idx reports errors`) is unavailable.
1929        if crate::pack::read_pack_index_no_verify(&idx_path).is_err() {
1930            let mut pack_path = idx_path.clone();
1931            pack_path.set_extension("pack");
1932            midx_warn_once(&format!(
1933                "error: packfile {} index unavailable",
1934                pack_path.display()
1935            ));
1936        }
1937    }
1938}
1939
1940/// When `core.multiPackIndex` is enabled, try to read `oid` from the active MIDX in `objects_dir`.
1941///
1942/// Returns [`None`] when no MIDX exists or `oid` is not listed. Returns [`Some(Err(..))`] when the
1943/// MIDX is present but malformed (callers surface Git-style `error:` / `fatal:` messages).
1944pub fn try_read_object_via_midx(
1945    objects_dir: &Path,
1946    oid: &ObjectId,
1947) -> Result<Option<crate::objects::Object>> {
1948    let pack_dir = objects_dir.join("pack");
1949    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1950        return Ok(None);
1951    };
1952    let data = fs::read(&midx_path).map_err(Error::Io)?;
1953
1954    // Load-time validation, mirroring `load_multi_pack_index` in git/midx.c.
1955    // Fatal corruptions `die()` (print error + fatal, exit 128); recoverable
1956    // ones (e.g. an unaligned chunk table) skip the MIDX entirely.
1957    let MidxReadView {
1958        oidf_off,
1959        oidl_off,
1960        ooff_off,
1961        loff,
1962        num_objects,
1963        pack_names,
1964    } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1965        MidxLoadResult::Ok(v) => v,
1966        MidxLoadResult::Skip => return Ok(None),
1967    };
1968
1969    let first = oid.as_bytes()[0] as usize;
1970    let lo = if first == 0 {
1971        0u32
1972    } else {
1973        read_be_u32(&data, oidf_off + (first - 1) * 4)?
1974    };
1975    let hi = read_be_u32(&data, oidf_off + first * 4)?;
1976
1977    let mut pos = None;
1978    let mut i = lo as usize;
1979    while i < hi as usize && i < num_objects {
1980        let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1981        let c = o.cmp(oid);
1982        if c == std::cmp::Ordering::Equal {
1983            pos = Some(i);
1984            break;
1985        }
1986        if c == std::cmp::Ordering::Greater {
1987            break;
1988        }
1989        i += 1;
1990    }
1991    let Some(pos) = pos else {
1992        return Ok(None);
1993    };
1994
1995    let obase = ooff_off + pos * 8;
1996    let pack_id = read_be_u32(&data, obase)?;
1997    let raw_off = read_be_u32(&data, obase + 4)?;
1998    let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
1999        let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
2000        let need = (idx + 1) * 8;
2001        match loff {
2002            Some((loff_off, loff_len)) if loff_len >= need => {
2003                read_be_u64(&data, loff_off + idx * 8)?
2004            }
2005            _ => {
2006                // git/midx.c `nth_midxed_offset`: die on out-of-bounds large offset.
2007                midx_die(&["multi-pack-index large offset out of bounds"]);
2008            }
2009        }
2010    } else {
2011        u64::from(raw_off)
2012    };
2013
2014    let idx_name = pack_names
2015        .get(pack_id as usize)
2016        .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
2017    let idx_path = pack_dir.join(idx_name);
2018    // A multi-pack-index can outlive packs it names (e.g. a `repack -d` deleted a
2019    // pack but did not rewrite the MIDX). Git tolerates such stale entries by
2020    // skipping the missing pack; mirror that by falling through to other object
2021    // sources instead of surfacing the open error.
2022    if !idx_path.exists() {
2023        return Ok(None);
2024    }
2025    // Mirror git/packfile.c `open_pack_index`: when a pack's idx cannot be read
2026    // (e.g. truncated/corrupt), Git emits `error: packfile <pack> index unavailable`,
2027    // marks the pack invalid, and continues to other object sources. The object
2028    // may still be found loose or in another pack, so fall through rather than
2029    // surfacing the parse error as fatal. Use the non-verifying parse to match
2030    // `open_pack_index`, which does not validate the trailing checksum (a pack
2031    // `.idx` with a stale checksum but valid structure must still be usable).
2032    let idx = match crate::pack::read_pack_index_no_verify(&idx_path) {
2033        Ok(idx) => idx,
2034        Err(_) => {
2035            let mut pack_path = idx_path.clone();
2036            pack_path.set_extension("pack");
2037            midx_warn_once(&format!(
2038                "error: packfile {} index unavailable",
2039                pack_path.display()
2040            ));
2041            return Ok(None);
2042        }
2043    };
2044    crate::pack::read_object_from_pack(&idx, oid).map(Some)
2045}
2046
2047pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
2048    let pack_dir = objects_dir.join("pack");
2049    let path = resolve_tip_midx_path(&pack_dir)
2050        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
2051    let data = fs::read(&path).map_err(Error::Io)?;
2052    let (_, hdr_end, _) = parse_midx_header(&data)?;
2053    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2054    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
2055    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
2056    // The preferred pack is recorded in the MIDX reverse index, which is only
2057    // present when the MIDX has a bitmap. Without it, the preferred pack is
2058    // unknowable (git/midx.c `midx_preferred_pack` returns -1). Prefer the
2059    // embedded RIDX chunk; otherwise fall back to a `multi-pack-index*.rev`
2060    // sidecar, matching `load_midx_revindex`.
2061    let (ridx_off, ridx_len) = match find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) {
2062        Ok(r) => r,
2063        Err(_) => {
2064            return Err(Error::CorruptObject(
2065                "could not determine MIDX preferred pack".to_owned(),
2066            ));
2067        }
2068    };
2069
2070    if ridx_len < 4 || ooff_len < 8 {
2071        return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
2072    }
2073    let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
2074    let entry_base = ooff_off + first_oid_idx * 8;
2075    if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
2076        return Err(Error::CorruptObject(
2077            "bad MIDX object-offsets index".to_owned(),
2078        ));
2079    }
2080    let pack_id = read_be_u32(&data, entry_base)?;
2081    let idx = usize::try_from(pack_id)
2082        .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
2083    names
2084        .get(idx)
2085        .cloned()
2086        .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
2087}
2088
2089/// Build `objects/pack/multi-pack-index` for all pack indexes in `pack_dir`.
2090///
2091/// Returns an error if there are no `.idx` files, if an object offset does not
2092/// fit in 31 bits (no `LOFF` chunk yet), or if I/O fails.
2093/// Remove every multi-pack-index file under `pack_dir` (root file, sidecars, and
2094/// `multi-pack-index.d/`). Used by full `repack -a` so stale incremental chains do not survive.
2095pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
2096    let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2097    scrub_root_midx_sidecars_except(pack_dir, None)?;
2098    let midx_d = midx_d_dir(pack_dir);
2099    if midx_d.exists() {
2100        let _ = fs::remove_dir_all(&midx_d);
2101    }
2102    Ok(())
2103}
2104
2105pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
2106    write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
2107}
2108
2109/// Write `multi-pack-index` with optional preferred pack, placeholders, and incremental chain.
2110pub fn write_multi_pack_index_with_options(
2111    pack_dir: &Path,
2112    opts: &WriteMultiPackIndexOptions,
2113) -> Result<()> {
2114    // Git warns and ignores an existing MIDX whose checksum does not validate when
2115    // writing a fresh (non-stdin-packs) MIDX (git/midx-write.c `write_midx_internal`).
2116    if opts.pack_names_subset_ordered.is_none() {
2117        if let Some(existing) = resolve_tip_midx_path(pack_dir) {
2118            if let Ok(bytes) = fs::read(&existing) {
2119                if midx_checksum_is_valid(&bytes) {
2120                    // A fresh write copies the existing MIDX's packs. Loading a pack
2121                    // it references whose `.pack` is gone fails with `could not load
2122                    // pack N` (git/midx-write.c `fill_pack_from_midx`).
2123                    if let Ok((_, existing_names)) = oids_and_packs_from_midx_data(&bytes) {
2124                        for (i, name) in existing_names.iter().enumerate() {
2125                            let stem = name.strip_suffix(".idx").unwrap_or(name);
2126                            if !pack_dir.join(format!("{stem}.pack")).exists() {
2127                                eprintln!("error: could not load pack {i}");
2128                                return Err(Error::CorruptObject(format!(
2129                                    "could not load pack {i}"
2130                                )));
2131                            }
2132                        }
2133                    }
2134                } else {
2135                    eprintln!("warning: ignoring existing multi-pack-index; checksum mismatch");
2136                }
2137            }
2138        }
2139    }
2140
2141    // Git's MIDX covers every pack index in the directory regardless of its
2142    // basename (the `.git/objects/pack/test-*.idx` packs created by t7900's
2143    // incremental-repack test, for instance), so include any `*.idx` whose
2144    // companion `.pack` exists.
2145    let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
2146        .map(|rd| {
2147            rd.filter_map(|e| e.ok())
2148                .filter_map(|e| {
2149                    let name = e.file_name().to_string_lossy().to_string();
2150                    let stem = name.strip_suffix(".idx")?;
2151                    if pack_dir.join(format!("{stem}.pack")).exists() {
2152                        Some(name)
2153                    } else {
2154                        None
2155                    }
2156                })
2157                .collect()
2158        })
2159        .unwrap_or_default();
2160    idx_names.sort();
2161
2162    let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
2163        let mut out = Vec::new();
2164        for line in sub {
2165            let want = normalize_pack_idx_basename(line)?;
2166            if let Some(found) = idx_names.iter().find(|n| **n == want).cloned() {
2167                if !out.contains(&found) {
2168                    out.push(found);
2169                }
2170            }
2171            // Unknown names on stdin are silently ignored (Git skips packs it
2172            // cannot find rather than failing the whole write).
2173        }
2174        out
2175    } else {
2176        idx_names
2177    };
2178
2179    // Resolve / validate the preferred pack against the working pack set. Git emits a
2180    // (non-fatal) `warning: unknown preferred pack: '<name>'` when it cannot be matched.
2181    let mut preferred_warned = false;
2182    if let Some(raw) = opts.preferred_pack_name.as_deref() {
2183        if opts.preferred_pack_idx.is_none()
2184            && !idx_names
2185                .iter()
2186                .any(|n| cmp_idx_or_pack_name(raw, n).is_eq())
2187        {
2188            eprintln!("warning: unknown preferred pack: '{raw}'");
2189            preferred_warned = true;
2190        }
2191    }
2192
2193    if idx_names.is_empty() {
2194        // Git `write_midx_internal`: `error("no pack files to index.")` then fail.
2195        eprintln!("error: no pack files to index.");
2196        return Err(Error::CorruptObject("no pack files to index.".to_owned()));
2197    }
2198
2199    let (base_oids, base_pack_names) = if opts.incremental {
2200        collect_incremental_base(pack_dir)?
2201    } else {
2202        (HashSet::new(), HashSet::new())
2203    };
2204
2205    let layer_idx_names: Vec<String> = if opts.incremental {
2206        idx_names
2207            .iter()
2208            .filter(|n| {
2209                !base_pack_names
2210                    .iter()
2211                    .any(|bp| pack_names_match_layer(bp, n))
2212            })
2213            .cloned()
2214            .collect()
2215    } else {
2216        idx_names.clone()
2217    };
2218
2219    if opts.incremental && layer_idx_names.is_empty() {
2220        return Ok(());
2221    }
2222
2223    let work_names = if opts.incremental {
2224        &layer_idx_names[..]
2225    } else {
2226        &idx_names[..]
2227    };
2228
2229    let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
2230    if preferred_idx.is_none() && !preferred_warned {
2231        if let Some(raw) = opts.preferred_pack_name.as_deref() {
2232            // Already validated against `idx_names`; resolve against the working set.
2233            preferred_idx = work_names
2234                .iter()
2235                .position(|n| cmp_idx_or_pack_name(raw, n).is_eq());
2236        }
2237    }
2238    if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
2239        preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
2240    }
2241    if let Some(p) = preferred_idx {
2242        if p >= work_names.len() {
2243            return Err(Error::CorruptObject(
2244                "preferred pack index out of range".to_owned(),
2245            ));
2246        }
2247    }
2248
2249    let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
2250    for name in work_names {
2251        let path = pack_dir.join(name);
2252        // Do not re-verify the idx trailer here; Git reads the offset table
2253        // directly (t5319 forces a deliberately corrupt-but-valid 64-bit idx).
2254        indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2255    }
2256
2257    // Git refuses an explicitly preferred pack that has no objects.
2258    if let Some(p) = preferred_idx {
2259        if indexes.get(p).map(|i| i.entries.len()).unwrap_or(0) == 0 {
2260            let name = work_names.get(p).cloned().unwrap_or_default();
2261            let pack_name = name.strip_suffix(".idx").unwrap_or(&name);
2262            eprintln!("error: cannot select preferred pack {pack_name}.pack with no objects");
2263            return Err(Error::CorruptObject(
2264                "cannot select preferred pack with no objects".to_owned(),
2265            ));
2266        }
2267    }
2268
2269    let pack_mtimes_layer: Vec<std::time::SystemTime> =
2270        indexes.iter().map(pack_mtime_for_midx).collect();
2271    let preferred_u32 = preferred_idx.map(|p| p as u32);
2272
2273    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
2274    for (pack_id, idx) in indexes.iter().enumerate() {
2275        let pack_id = u32::try_from(pack_id).map_err(|_| {
2276            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
2277        })?;
2278        let mtime = pack_mtimes_layer[pack_id as usize];
2279        for e in &idx.entries {
2280            if e.oid.len() != 20 {
2281                continue;
2282            }
2283            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
2284                continue;
2285            };
2286            if opts.incremental && base_oids.contains(&oid) {
2287                continue;
2288            }
2289            let cand = MidxEntry {
2290                oid,
2291                pack_id,
2292                offset: e.offset,
2293                pack_mtime: mtime,
2294            };
2295            match best.get(&oid) {
2296                None => {
2297                    best.insert(oid, cand);
2298                }
2299                Some(cur) => {
2300                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
2301                        best.insert(oid, cand);
2302                    }
2303                }
2304            }
2305        }
2306    }
2307
2308    let bitmap_placeholders =
2309        opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
2310
2311    let omit_embedded_ridx = opts.write_rev_placeholder;
2312    // An incremental layer must not repeat objects already provided by the base
2313    // chain even when the layer's own pack physically contains them (a fresh pack
2314    // built with `--revs` from a tag range, for instance). Filter by base OID.
2315    let exclude = if opts.incremental && !base_oids.is_empty() {
2316        Some(&base_oids)
2317    } else {
2318        None
2319    };
2320    let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2321        work_names,
2322        &indexes,
2323        preferred_idx,
2324        bitmap_placeholders,
2325        omit_embedded_ridx,
2326        opts.version.unwrap_or(MIDX_VERSION_V2),
2327        repo_midx_hash_version(pack_dir),
2328        exclude,
2329    )?;
2330
2331    let hash = &out[out.len() - 20..];
2332    let hash_hex = hex::encode(hash);
2333    let hash_arr: [u8; 20] = hash
2334        .try_into()
2335        .map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
2336
2337    if opts.incremental {
2338        let root_midx = pack_dir.join("multi-pack-index");
2339        let chain_path = chain_file_path(pack_dir);
2340        let chain_existed = chain_path.exists();
2341
2342        let mut chain = if root_midx.exists() && !chain_existed {
2343            let root_hex = midx_checksum_hex_from_path(&root_midx)?;
2344            link_root_midx_into_chain(pack_dir, &root_hex)?;
2345            vec![root_hex]
2346        } else {
2347            read_chain_layer_hashes(pack_dir).unwrap_or_default()
2348        };
2349
2350        chain.push(hash_hex.clone());
2351
2352        let midx_d = midx_d_dir(pack_dir);
2353        fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2354
2355        let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2356        fs::write(&layer_path, &out).map_err(Error::Io)?;
2357
2358        let mut chain_data = String::new();
2359        for h in &chain {
2360            chain_data.push_str(h);
2361            chain_data.push('\n');
2362        }
2363        fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2364
2365        clear_stale_split_layers(pack_dir, &chain)?;
2366
2367        let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2368        scrub_root_midx_sidecars(pack_dir)?;
2369        if bitmap_placeholders {
2370            let full = hex::encode(hash);
2371            fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
2372                .map_err(Error::Io)?;
2373            if opts.write_rev_placeholder {
2374                let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
2375                if let Some(order) = rev_sidecar_order.as_ref() {
2376                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2377                } else {
2378                    fs::write(rev_path, []).map_err(Error::Io)?;
2379                }
2380            }
2381        }
2382    } else {
2383        // A non-incremental write replaces any prior split layout. Git removes the
2384        // individual incremental layer files inside `multi-pack-index.d/` and
2385        // unlinks the chain file, but never `rmdir`s the directory itself, so an
2386        // empty `multi-pack-index.d/` is left behind (t5334 expects
2387        // `test_dir_is_empty $midxdir` after the conversion).
2388        let dest = pack_dir.join("multi-pack-index");
2389
2390        // Git's `midx_needs_update`: if the new MIDX is byte-identical to the one
2391        // already on disk and we are not (re)writing a bitmap, leave the file
2392        // untouched so its mtime is preserved (t5319 `test_midx_is_retained`).
2393        let bitmap_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap"));
2394        let bitmap_ok = !opts.write_bitmap_placeholders || bitmap_path.exists();
2395        // Only short-circuit when there is no active incremental chain to collapse;
2396        // an empty leftover `multi-pack-index.d/` (from a prior conversion) must not
2397        // defeat the retention optimization, so key off the chain file, not the dir.
2398        if bitmap_ok && !chain_file_path(pack_dir).exists() {
2399            if let Ok(existing) = fs::read(&dest) {
2400                if existing == out {
2401                    return Ok(());
2402                }
2403            }
2404        }
2405
2406        clear_incremental_midx_files(pack_dir)?;
2407
2408        fs::write(&dest, &out).map_err(Error::Io)?;
2409
2410        scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
2411
2412        if opts.write_bitmap_placeholders {
2413            fs::write(
2414                pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2415                [],
2416            )
2417            .map_err(Error::Io)?;
2418            if opts.write_rev_placeholder {
2419                let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
2420                if let Some(order) = rev_sidecar_order.as_ref() {
2421                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2422                } else {
2423                    fs::write(rev_path, []).map_err(Error::Io)?;
2424                }
2425            }
2426        }
2427    }
2428
2429    Ok(())
2430}
2431
2432fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
2433    if base_name == disk_idx {
2434        return true;
2435    }
2436    cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
2437}
2438
2439/// Failure modes of [`compact_multi_pack_index`], each mapping to one of git's
2440/// user-facing diagnostics in `cmd_multi_pack_index_compact`.
2441#[derive(Debug)]
2442pub enum CompactError {
2443    /// `--incremental` was requested but no chain exists yet.
2444    NoChain,
2445    /// One of the endpoint checksums does not name a layer in the chain. Carries the
2446    /// raw argument text so the message matches `could not find MIDX: <arg>`.
2447    MissingEndpoint(String),
2448    /// Both endpoints resolve to the same layer.
2449    IdenticalEndpoints,
2450    /// `from` (argv[0]) is newer than `to` (argv[1]); git requires `from` to be an
2451    /// ancestor of `to`. Carries `(from, to)` arg text for the diagnostic.
2452    NotAncestor(String, String),
2453    /// Compaction was requested with the v1 on-disk MIDX format.
2454    V1Format,
2455    /// Any underlying I/O or parse failure.
2456    Other(String),
2457}
2458
2459impl std::fmt::Display for CompactError {
2460    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2461        match self {
2462            CompactError::NoChain => write!(f, "no multi-pack-index chain to compact"),
2463            CompactError::MissingEndpoint(s) => write!(f, "could not find MIDX: {s}"),
2464            CompactError::IdenticalEndpoints => {
2465                write!(f, "MIDX compaction endpoints must be unique")
2466            }
2467            CompactError::NotAncestor(from, to) => {
2468                write!(f, "MIDX {from} must be an ancestor of {to}")
2469            }
2470            CompactError::V1Format => write!(f, "cannot perform MIDX compaction with v1 format"),
2471            CompactError::Other(s) => write!(f, "{s}"),
2472        }
2473    }
2474}
2475
2476impl From<Error> for CompactError {
2477    fn from(e: Error) -> Self {
2478        CompactError::Other(e.to_string())
2479    }
2480}
2481
2482/// Collect every OID provided by the chain layers in `hashes` (each layer file is
2483/// self-contained: it lists only its own incremental objects).
2484fn collect_layer_oids(pack_dir: &Path, hashes: &[String]) -> Result<HashSet<ObjectId>> {
2485    let mut oids = HashSet::new();
2486    for h in hashes {
2487        let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
2488        let data = load_midx_file(&p)?;
2489        let (layer_oids, _) = oids_and_packs_from_midx_data(&data)?;
2490        oids.extend(layer_oids);
2491    }
2492    Ok(oids)
2493}
2494
2495/// Pack idx basenames listed by a single chain layer, in the layer's stored order.
2496fn layer_pack_names(pack_dir: &Path, hash: &str) -> Result<Vec<String>> {
2497    let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{hash}.midx"));
2498    let data = load_midx_file(&p)?;
2499    let (_, hdr_end, _) = parse_midx_header(&data)?;
2500    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2501    parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
2502}
2503
2504/// `git multi-pack-index compact <from> <to>`: merge the inclusive chain range
2505/// `[from..to]` (oldest→newest, matching git's `from`=argv[0] / `to`=argv[1]) into a
2506/// single new incremental layer, preserving pack order, and rewrite the chain as
2507/// `[layers before from] + [compacted layer] + [layers after to]`.
2508///
2509/// Mirrors `write_midx_file_compact` (git/midx-write.c). Because grit's chain layers
2510/// are self-contained (each lists only its own packs/objects), layers outside the
2511/// compacted range keep their existing files and checksums untouched.
2512pub fn compact_multi_pack_index(
2513    pack_dir: &Path,
2514    from_arg: &str,
2515    to_arg: &str,
2516    write_bitmaps: bool,
2517    write_rev: bool,
2518    version: Option<u8>,
2519) -> std::result::Result<(), CompactError> {
2520    if version == Some(MIDX_VERSION_V1) {
2521        return Err(CompactError::V1Format);
2522    }
2523
2524    let chain = read_chain_layer_hashes(pack_dir).map_err(|_| CompactError::NoChain)?;
2525    if chain.is_empty() {
2526        return Err(CompactError::NoChain);
2527    }
2528
2529    let from_hex = from_arg.to_ascii_lowercase();
2530    let to_hex = to_arg.to_ascii_lowercase();
2531
2532    let from_pos = chain.iter().position(|h| *h == from_hex);
2533    let to_pos = chain.iter().position(|h| *h == to_hex);
2534
2535    // Match git: report `from` first, then `to`, when an endpoint is missing.
2536    let Some(from_pos) = from_pos else {
2537        return Err(CompactError::MissingEndpoint(from_arg.to_string()));
2538    };
2539    let Some(to_pos) = to_pos else {
2540        return Err(CompactError::MissingEndpoint(to_arg.to_string()));
2541    };
2542
2543    if from_pos == to_pos {
2544        return Err(CompactError::IdenticalEndpoints);
2545    }
2546    // git walks `base_midx` from `from`; reaching `to` means `from` is an ancestor of
2547    // `to`, i.e. `from` is newer (higher chain index) than `to`. That is the reverse
2548    // of what compaction expects, so report the "must be an ancestor" error.
2549    if from_pos > to_pos {
2550        return Err(CompactError::NotAncestor(
2551            from_arg.to_string(),
2552            to_arg.to_string(),
2553        ));
2554    }
2555
2556    // Layers strictly before `from` form the base; their objects are excluded from
2557    // the compacted layer.
2558    let base_hashes = &chain[..from_pos];
2559    let merged_hashes = &chain[from_pos..=to_pos];
2560    let upper_hashes = &chain[to_pos + 1..];
2561
2562    let base_oids = collect_layer_oids(pack_dir, base_hashes)?;
2563
2564    // Gather the merged layers' pack idx names in chain order (oldest layer first),
2565    // preserving each layer's internal order (git's `fill_packs_from_midx_range`).
2566    let mut ordered_idx_names: Vec<String> = Vec::new();
2567    for h in merged_hashes {
2568        for name in layer_pack_names(pack_dir, h)? {
2569            if !ordered_idx_names.contains(&name) {
2570                ordered_idx_names.push(name);
2571            }
2572        }
2573    }
2574
2575    if ordered_idx_names.is_empty() {
2576        return Err(CompactError::Other(
2577            "no packs found in compaction range".to_owned(),
2578        ));
2579    }
2580
2581    // Load the pack indexes in the resolved order.
2582    let mut indexes: Vec<PackIndex> = Vec::with_capacity(ordered_idx_names.len());
2583    for name in &ordered_idx_names {
2584        let path = pack_dir.join(name);
2585        indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2586    }
2587
2588    // When writing a bitmap, git sets the preferred pack to the first (oldest) pack
2589    // of the compacted range so its objects win duplicate selection.
2590    let preferred_idx = if write_bitmaps { Some(0usize) } else { None };
2591
2592    let exclude = if base_oids.is_empty() {
2593        None
2594    } else {
2595        Some(&base_oids)
2596    };
2597
2598    let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2599        &ordered_idx_names,
2600        &indexes,
2601        preferred_idx,
2602        write_bitmaps,
2603        write_rev,
2604        version.unwrap_or(MIDX_VERSION_V2),
2605        repo_midx_hash_version(pack_dir),
2606        exclude,
2607    )?;
2608
2609    let hash = &out[out.len() - 20..];
2610    let hash_hex = hex::encode(hash);
2611    let hash_arr: [u8; 20] = hash
2612        .try_into()
2613        .map_err(|_| CompactError::Other("midx hash length mismatch".to_owned()))?;
2614
2615    let midx_d = midx_d_dir(pack_dir);
2616    fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2617
2618    let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2619    fs::write(&layer_path, &out).map_err(Error::Io)?;
2620
2621    // New chain: base layers, the compacted layer, then the untouched upper layers.
2622    let mut new_chain: Vec<String> = Vec::new();
2623    new_chain.extend(base_hashes.iter().cloned());
2624    new_chain.push(hash_hex.clone());
2625    new_chain.extend(upper_hashes.iter().cloned());
2626
2627    let mut chain_data = String::new();
2628    for h in &new_chain {
2629        chain_data.push_str(h);
2630        chain_data.push('\n');
2631    }
2632    fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2633
2634    if write_bitmaps {
2635        fs::write(
2636            midx_d.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2637            [],
2638        )
2639        .map_err(Error::Io)?;
2640        let rev_path = midx_d.join(format!("multi-pack-index-{hash_hex}.rev"));
2641        if write_rev {
2642            if let Some(order) = rev_sidecar_order.as_ref() {
2643                write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2644            } else {
2645                fs::write(rev_path, []).map_err(Error::Io)?;
2646            }
2647        }
2648    }
2649
2650    // Drop the now-removed range layers and their sidecars.
2651    clear_stale_split_layers(pack_dir, &new_chain)?;
2652
2653    Ok(())
2654}
2655
2656fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
2657    scrub_root_midx_sidecars_except(pack_dir, None)
2658}
2659
2660fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
2661    let Ok(rd) = fs::read_dir(pack_dir) else {
2662        return Ok(());
2663    };
2664    for ent in rd {
2665        let ent = ent.map_err(Error::Io)?;
2666        let name = ent.file_name().to_string_lossy().to_string();
2667        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
2668            continue;
2669        };
2670        if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
2671            continue;
2672        }
2673        let hash_part = rest
2674            .strip_suffix(".bitmap")
2675            .or_else(|| rest.strip_suffix(".rev"))
2676            .unwrap_or(rest);
2677        // Git's `clear_midx_files_ext` removes any `multi-pack-index-<hash>.<ext>`
2678        // sidecar that does not belong to the current MIDX, regardless of the
2679        // hash's textual length (t5319 plants a `multi-pack-index-abc.rev`).
2680        if keep_hex.is_some_and(|k| k == hash_part) {
2681            continue;
2682        }
2683        let _ = fs::remove_file(ent.path());
2684    }
2685    Ok(())
2686}