Skip to main content

grit_lib/
midx.rs

1//! Multi-pack-index (MIDX) file writing and minimal reading.
2//!
3//! Writes a Git-compatible `multi-pack-index` file (version 1, SHA-1) covering
4//! selected `pack-*.idx` files. Objects that appear in multiple packs keep the
5//! preferred pack's copy when `preferred_pack_idx` is set (matching Git's
6//! geometric repack tests).
7//!
8//! Incremental writes follow Git's split layout: layers live under
9//! `pack/multi-pack-index.d/multi-pack-index-<sha1>.midx` with ordering in
10//! `multi-pack-index-chain` (oldest hash first, newest last).
11
12use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::ObjectId;
21use crate::pack::{read_pack_index, PackIndex};
22
23const MIDX_SIGNATURE: u32 = 0x4d49_4458;
24const MIDX_VERSION_V1: u8 = 1;
25const HASH_VERSION_SHA1: u8 = 1;
26const MIDX_HEADER_SIZE: usize = 12;
27const CHUNK_TOC_ENTRY_SIZE: usize = 12;
28const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
29const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
30const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
31const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
32const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
33const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
34const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
35
36// Git `pack-revindex.h` / `pack-write.c` (standalone `.rev` next to MIDX).
37const RIDX_SIGNATURE: u32 = 0x5249_4458;
38const RIDX_VERSION: u32 = 1;
39const RIDX_HEADER_SIZE: usize = 12;
40const MIDX_CHUNK_ALIGNMENT: usize = 4;
41
42// `git midx.h` (MIDX_LARGE_OFFSET_NEEDED).
43const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
44
45struct MidxEntry {
46    oid: ObjectId,
47    pack_id: u32,
48    offset: u64,
49    pack_mtime: std::time::SystemTime,
50}
51
52/// Options for writing a multi-pack index (extension of the simple writer).
53#[derive(Debug, Clone, Default)]
54pub struct WriteMultiPackIndexOptions {
55    /// When set, objects also present in other packs are taken from this pack
56    /// (`pack_names` index in the sorted name list).
57    pub preferred_pack_idx: Option<u32>,
58    /// Basename of the preferred pack (e.g. `pack-abc.idx` or `pack-abc.pack`); resolved against
59    /// the working pack name list after optional subset filtering.
60    pub preferred_pack_name: Option<String>,
61    /// If set, only these `pack-*.idx` basenames are included, in this order (Git `--stdin-packs`).
62    pub pack_names_subset_ordered: Option<Vec<String>>,
63    /// When true, append RIDX + empty BTMP chunks so `test-tool read-midx --bitmap` succeeds.
64    pub write_bitmap_placeholders: bool,
65    /// When true, write a new layer in `multi-pack-index.d/` and extend the chain file
66    /// instead of replacing `pack/multi-pack-index`.
67    pub incremental: bool,
68    /// When true with [`Self::write_bitmap_placeholders`], also create an empty `.rev`
69    /// sidecar (Git `GIT_TEST_MIDX_WRITE_REV` compatibility).
70    pub write_rev_placeholder: bool,
71}
72
73fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
74    let t = raw.trim();
75    let t = std::path::Path::new(t)
76        .file_name()
77        .and_then(|s| s.to_str())
78        .unwrap_or(t);
79    let t = t.strip_prefix("./").unwrap_or(t);
80    if t.ends_with(".idx") {
81        Ok(t.to_string())
82    } else if t.ends_with(".pack") {
83        Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
84    } else {
85        Ok(format!("{t}.idx"))
86    }
87}
88
89struct MidxFileHeader {
90    num_chunks: u8,
91}
92
93fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
94    if data.len() < MIDX_HEADER_SIZE + 20 {
95        return Err(Error::CorruptObject("midx file too small".to_owned()));
96    }
97    let sig = u32::from_be_bytes(data[0..4].try_into().unwrap());
98    if sig != MIDX_SIGNATURE {
99        return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
100    }
101    let version = data[4];
102    if version != MIDX_VERSION_V1 {
103        return Err(Error::CorruptObject(format!(
104            "unsupported MIDX version {version}"
105        )));
106    }
107    let object_hash_bytes = data[5];
108    let num_chunks = data[6];
109    let _num_packs = u32::from_be_bytes(data[8..12].try_into().unwrap());
110    Ok((
111        MidxFileHeader { num_chunks },
112        MIDX_HEADER_SIZE,
113        object_hash_bytes,
114    ))
115}
116
117fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
118    let mut names = Vec::new();
119    let mut start = 0usize;
120    for (i, &b) in pn.iter().enumerate() {
121        if b == 0 && i >= start {
122            if i > start {
123                let s = std::str::from_utf8(&pn[start..i])
124                    .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
125                names.push(s.to_string());
126            }
127            start = i + 1;
128        }
129    }
130    Ok(names)
131}
132
133/// Compare a pack basename that may use `.pack` or `.idx` with an MIDX pack name (`.idx`).
134fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
135    let a = idx_or_pack_name.as_bytes();
136    let b = idx_name.as_bytes();
137    let mut i = 0usize;
138    let min = a.len().min(b.len());
139    while i < min && a[i] == b[i] {
140        i += 1;
141    }
142    let suf_a = &a[i..];
143    let suf_b = &b[i..];
144    if suf_b == b"idx" && suf_a == b"pack" {
145        return std::cmp::Ordering::Equal;
146    }
147    suf_a.cmp(suf_b)
148}
149
150fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
151    let mut best: Option<(usize, std::time::SystemTime)> = None;
152    for (i, n) in names.iter().enumerate() {
153        let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
154        let mtime = meta.modified().map_err(Error::Io)?;
155        match best {
156            None => best = Some((i, mtime)),
157            Some((_, t)) if mtime < t => best = Some((i, mtime)),
158            _ => {}
159        }
160    }
161    Ok(best.map(|(i, _)| i))
162}
163
164fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
165    pack_dir.join("multi-pack-index.d")
166}
167
168fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
169    midx_d_dir(pack_dir).join("multi-pack-index-chain")
170}
171
172fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
173    let path = chain_file_path(pack_dir);
174    let f = fs::File::open(&path).map_err(Error::Io)?;
175    let mut out = Vec::new();
176    for line in BufReader::new(f).lines() {
177        let line = line.map_err(Error::Io)?;
178        let t = line.trim();
179        if t.is_empty() {
180            continue;
181        }
182        if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
183            return Err(Error::CorruptObject(format!(
184                "invalid multi-pack-index chain line: {t}"
185            )));
186        }
187        out.push(t.to_ascii_lowercase());
188    }
189    Ok(out)
190}
191
192/// Resolve the path to the newest MIDX layer (root `multi-pack-index` or last chain entry).
193pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
194    let root = pack_dir.join("multi-pack-index");
195    if root.exists() {
196        return Some(root);
197    }
198    let hashes = read_chain_layer_hashes(pack_dir).ok()?;
199    let last = hashes.last()?;
200    Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
201}
202
203fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
204    let data = fs::read(path).map_err(Error::Io)?;
205    let _ = parse_midx_header(&data)?;
206    Ok(data)
207}
208
209fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
210    let (_, hdr_end, _) = parse_midx_header(data)?;
211    let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
212    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
213    let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
214    let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
215    let num_objects = ooff_len / 8;
216    if oidl_len != num_objects * 20 {
217        return Err(Error::CorruptObject(
218            "MIDX oid-lookup size mismatch".to_owned(),
219        ));
220    }
221    let mut oids = HashSet::with_capacity(num_objects);
222    for i in 0..num_objects {
223        let start = oidl_off + i * 20;
224        let oid = ObjectId::from_bytes(&data[start..start + 20])?;
225        oids.insert(oid);
226    }
227    Ok((oids, pack_names))
228}
229
230fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
231    let mut oids = HashSet::new();
232    let mut packs = HashSet::new();
233    let root = pack_dir.join("multi-pack-index");
234    let chain_path = chain_file_path(pack_dir);
235    if chain_path.exists() {
236        for h in read_chain_layer_hashes(pack_dir)? {
237            let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
238            let data = load_midx_file(&p)?;
239            let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
240            oids.extend(layer_oids);
241            for n in names {
242                packs.insert(n);
243            }
244        }
245        return Ok((oids, packs));
246    }
247    if root.exists() {
248        let data = load_midx_file(&root)?;
249        let (o, names) = oids_and_packs_from_midx_data(&data)?;
250        oids = o;
251        for n in names {
252            packs.insert(n);
253        }
254    }
255    Ok((oids, packs))
256}
257
258fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
259    let data = fs::read(path).map_err(Error::Io)?;
260    if data.len() < 20 {
261        return Err(Error::CorruptObject(
262            "midx too small for checksum".to_owned(),
263        ));
264    }
265    let hash = &data[data.len() - 20..];
266    Ok(hex::encode(hash))
267}
268
269fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
270    let _ = fs::remove_file(dst);
271    if fs::hard_link(src, dst).is_ok() {
272        return Ok(());
273    }
274    fs::copy(src, dst).map_err(Error::Io)?;
275    Ok(())
276}
277
278fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
279    let midx_d = midx_d_dir(pack_dir);
280    fs::create_dir_all(&midx_d).map_err(Error::Io)?;
281    let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
282    hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
283    let exts = ["bitmap", "rev"];
284    for ext in exts {
285        let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
286        if src.exists() {
287            let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
288            hard_link_or_copy(&src, &dst)?;
289        }
290    }
291    Ok(())
292}
293
294fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
295    let midx_d = midx_d_dir(pack_dir);
296    if !midx_d.exists() {
297        return Ok(());
298    }
299    let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
300    for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
301        let ent = ent.map_err(Error::Io)?;
302        let name = ent.file_name().to_string_lossy().to_string();
303        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
304            continue;
305        };
306        let Some((hash_part, _ext)) = rest.split_once('.') else {
307            continue;
308        };
309        if hash_part.len() == 40 && !keep.contains(hash_part) {
310            let _ = fs::remove_file(ent.path());
311        }
312    }
313    Ok(())
314}
315
316fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
317    fs::metadata(&idx.pack_path)
318        .and_then(|m| m.modified())
319        .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
320}
321
322fn midx_pick_better_entry(
323    cur: &MidxEntry,
324    cand_pack: u32,
325    cand_offset: u64,
326    cand_mtime: std::time::SystemTime,
327    preferred_pack: Option<u32>,
328) -> bool {
329    let cur_pref = preferred_pack == Some(cur.pack_id);
330    let new_pref = preferred_pack == Some(cand_pack);
331    if new_pref && !cur_pref {
332        return true;
333    }
334    if cur_pref && !new_pref {
335        return false;
336    }
337    match cand_mtime.cmp(&cur.pack_mtime) {
338        std::cmp::Ordering::Greater => true,
339        std::cmp::Ordering::Less => false,
340        std::cmp::Ordering::Equal => {
341            if cand_pack != cur.pack_id {
342                cand_pack < cur.pack_id
343            } else {
344                cand_offset < cur.offset
345            }
346        }
347    }
348}
349
350fn build_midx_bytes(
351    idx_names: &[String],
352    indexes: &[PackIndex],
353    preferred_idx: Option<usize>,
354    write_bitmap_placeholders: bool,
355    omit_embedded_ridx_chunk: bool,
356) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
357    let preferred_pack_idx = preferred_idx.map(|p| p as u32);
358    let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
359
360    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
361    for (pack_id, idx) in indexes.iter().enumerate() {
362        let pack_id = u32::try_from(pack_id).map_err(|_| {
363            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
364        })?;
365        let mtime = pack_mtimes[pack_id as usize];
366        for e in &idx.entries {
367            if e.oid.len() != 20 {
368                continue;
369            }
370            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
371                continue;
372            };
373            let cand = MidxEntry {
374                oid,
375                pack_id,
376                offset: e.offset,
377                pack_mtime: mtime,
378            };
379            match best.get(&oid) {
380                None => {
381                    best.insert(oid, cand);
382                }
383                Some(cur) => {
384                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
385                        best.insert(oid, cand);
386                    }
387                }
388            }
389        }
390    }
391
392    let mut entries: Vec<MidxEntry> = best.into_values().collect();
393    entries.sort_by(|a, b| a.oid.cmp(&b.oid));
394
395    let mut large_offsets: Vec<u64> = Vec::new();
396    for e in &entries {
397        if e.offset > u64::from(u32::MAX) {
398            return Err(Error::CorruptObject(
399                "object offset does not fit in multi-pack-index".to_owned(),
400            ));
401        }
402    }
403
404    let num_packs = indexes.len() as u32;
405
406    let mut pack_names_blob = Vec::new();
407    for name in idx_names {
408        pack_names_blob.extend_from_slice(name.as_bytes());
409        pack_names_blob.push(0);
410    }
411    let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
412        % MIDX_CHUNK_ALIGNMENT;
413    pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
414    let chunk_pnam = pack_names_blob;
415
416    let mut chunk_oidf = vec![0u8; 256 * 4];
417    let mut j = 0usize;
418    for i in 0..256 {
419        while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
420            j += 1;
421        }
422        chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
423    }
424
425    let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
426    for e in &entries {
427        chunk_oidl.extend_from_slice(e.oid.as_bytes());
428    }
429
430    let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
431    for e in &entries {
432        chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
433        let needs_large = e.offset >= u64::from(MIDX_LARGE_OFFSET_NEEDED);
434        let encoded = if needs_large {
435            let slot = u32::try_from(large_offsets.len()).map_err(|_| {
436                Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
437            })?;
438            large_offsets.push(e.offset);
439            MIDX_LARGE_OFFSET_NEEDED | slot
440        } else {
441            u32::try_from(e.offset).map_err(|_| {
442                Error::CorruptObject("object offset overflow in multi-pack-index".to_owned())
443            })?
444        };
445        chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
446    }
447
448    let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
449        Vec::new()
450    } else {
451        let mut v = Vec::with_capacity(large_offsets.len() * 8);
452        for off in &large_offsets {
453            v.extend_from_slice(&off.to_be_bytes());
454        }
455        v
456    };
457
458    let pref = preferred_pack_idx;
459    let mut order: Vec<u32> = (0..entries.len() as u32).collect();
460    order.sort_by(|&ai, &bi| {
461        let a = &entries[ai as usize];
462        let b = &entries[bi as usize];
463        let a_pref = pref == Some(a.pack_id);
464        let b_pref = pref == Some(b.pack_id);
465        b_pref
466            .cmp(&a_pref)
467            .then_with(|| a.pack_id.cmp(&b.pack_id))
468            .then_with(|| a.offset.cmp(&b.offset))
469            .then_with(|| ai.cmp(&bi))
470    });
471
472    let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
473    for oid_idx in &order {
474        chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
475    }
476
477    // BTMP: per-pack (bitmap_pos, bitmap_nr) in the pseudo-bitmap namespace, matching Git's
478    // `write_midx_bitmapped_packs` (cumulative start + object count per pack).
479    let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
480        Some(order.clone())
481    } else {
482        None
483    };
484    let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
485        let mut v = Vec::new();
486        let mut cumulative = 0u32;
487        for idx in indexes {
488            let n = u32::try_from(idx.entries.len()).map_err(|_| {
489                Error::CorruptObject("too many objects in pack for MIDX BTMP".to_owned())
490            })?;
491            v.extend_from_slice(&cumulative.to_be_bytes());
492            v.extend_from_slice(&n.to_be_bytes());
493            cumulative = cumulative.saturating_add(n);
494        }
495        let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
496        v.extend(std::iter::repeat_n(0u8, pad));
497        v
498    } else {
499        Vec::new()
500    };
501
502    let mut chunks: Vec<(u32, Vec<u8>)> = vec![
503        (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
504        (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
505        (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
506        (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
507    ];
508    if !chunk_loff.is_empty() {
509        chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
510    }
511    if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
512        chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
513    }
514    if write_bitmap_placeholders {
515        chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
516    }
517
518    let num_chunks: u8 = chunks
519        .len()
520        .try_into()
521        .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
522
523    let mut body = Vec::new();
524    let mut cur_offset =
525        MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
526
527    for (id, data) in &chunks {
528        body.extend_from_slice(&id.to_be_bytes());
529        body.extend_from_slice(&cur_offset.to_be_bytes());
530        cur_offset += data.len() as u64;
531    }
532    body.extend_from_slice(&0u32.to_be_bytes());
533    body.extend_from_slice(&cur_offset.to_be_bytes());
534
535    for (_, data) in &chunks {
536        body.extend_from_slice(data);
537    }
538
539    let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
540    out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
541    out.push(MIDX_VERSION_V1);
542    out.push(HASH_VERSION_SHA1);
543    out.push(num_chunks);
544    out.push(0);
545    out.extend_from_slice(&num_packs.to_be_bytes());
546    out.extend_from_slice(&body);
547
548    let mut hasher = Sha1::new();
549    hasher.update(&out);
550    let hash = hasher.finalize();
551    out.extend_from_slice(&hash);
552
553    Ok((out, rev_sidecar_order))
554}
555
556/// Standalone MIDX `.rev` file (Git `write_rev_file_order` / `RIDX_SIGNATURE`).
557fn write_midx_rev_sidecar(
558    path: &Path,
559    pack_order: &[u32],
560    midx_file_hash: &[u8; 20],
561) -> Result<()> {
562    let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
563    body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
564    body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
565    body.extend_from_slice(&1u32.to_be_bytes());
566    for idx in pack_order {
567        body.extend_from_slice(&idx.to_be_bytes());
568    }
569    body.extend_from_slice(midx_file_hash);
570    fs::write(path, body).map_err(Error::Io)
571}
572
573fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
574    let (hdr, _, _) = parse_midx_header(data)?;
575    let n = hdr.num_chunks as usize;
576    let pos = header_end;
577    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
578    if data.len() < toc_end + 20 {
579        return Err(Error::CorruptObject(
580            "truncated MIDX chunk table".to_owned(),
581        ));
582    }
583    for i in 0..n {
584        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
585        let id = u32::from_be_bytes(data[base..base + 4].try_into().unwrap());
586        let off = u64::from_be_bytes(data[base + 4..base + 12].try_into().unwrap()) as usize;
587        if id == chunk_id {
588            let next_off = if i + 1 < n {
589                let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
590                u64::from_be_bytes(data[nb + 4..nb + 12].try_into().unwrap()) as usize
591            } else {
592                let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
593                u64::from_be_bytes(data[term + 4..term + 12].try_into().unwrap()) as usize
594            };
595            return Ok((off, next_off.saturating_sub(off)));
596        }
597    }
598    Err(Error::CorruptObject(format!(
599        "MIDX chunk {chunk_id:08x} not found"
600    )))
601}
602
603/// Return the `pack-*.idx` basename for the MIDX preferred pack (RIDX position 0).
604///
605/// `objects_dir` is the repository object database (e.g. `.git/objects`), not `objects/pack`.
606///
607/// Used by `test-tool read-midx --preferred-pack` compatibility.
608/// Pack index basenames (`pack-*.idx`) stored in the MIDX pack-names chunk.
609pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
610    let pack_dir = objects_dir.join("pack");
611    let path = resolve_tip_midx_path(&pack_dir)
612        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
613    let data = fs::read(&path).map_err(Error::Io)?;
614    let (_, hdr_end, _) = parse_midx_header(&data)?;
615    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
616    parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
617}
618
619/// Trailing 40-character SHA-1 hex of the active MIDX (root or chain tip).
620pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
621    let pack_dir = objects_dir.join("pack");
622    let path = resolve_tip_midx_path(&pack_dir)
623        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
624    midx_checksum_hex_from_path(&path)
625}
626
627/// Human-readable dump of the MIDX (matches `test-tool read-midx` layout closely enough for grep-based tests).
628/// Emit one line per MIDX object: `{oid} {offset}\t{pack-idx-name}` (matches Git `test-read-midx.c`).
629pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
630    let mut out = format_midx_dump(objects_dir)?;
631    let pack_dir = objects_dir.join("pack");
632    let path = resolve_tip_midx_path(&pack_dir)
633        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
634    let data = fs::read(&path).map_err(Error::Io)?;
635    let (_, hdr_end, _) = parse_midx_header(&data)?;
636    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
637    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
638    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
639    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
640    if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
641        return Err(Error::CorruptObject(
642            "bad MIDX oid-lookup / object-offsets size".to_owned(),
643        ));
644    }
645    let num = oidl_len / 20;
646    if num * 8 != ooff_len {
647        return Err(Error::CorruptObject(
648            "MIDX oid count does not match object-offsets".to_owned(),
649        ));
650    }
651    for i in 0..num {
652        let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
653            .map_err(|e| Error::CorruptObject(e.to_string()))?;
654        let base = ooff_off + i * 8;
655        let pack_id = u32::from_be_bytes(data[base..base + 4].try_into().unwrap()) as usize;
656        let offset = u32::from_be_bytes(data[base + 4..base + 8].try_into().unwrap()) as u64;
657        let pack_name = names
658            .get(pack_id)
659            .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
660        out.push_str(&format!("{} {}\t{}\n", oid.to_hex(), offset, pack_name));
661    }
662    Ok(out)
663}
664
665pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
666    let pack_dir = objects_dir.join("pack");
667    let path = resolve_tip_midx_path(&pack_dir)
668        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
669    let data = fs::read(&path).map_err(Error::Io)?;
670    let (hdr, hdr_end, _) = parse_midx_header(&data)?;
671    let sig = u32::from_be_bytes(data[0..4].try_into().unwrap());
672    let version = data[4];
673    let hash_len = data[5];
674    let num_chunks = hdr.num_chunks;
675    let num_packs = u32::from_be_bytes(data[8..12].try_into().unwrap());
676
677    let mut chunk_tags: Vec<&'static str> = Vec::new();
678    let n = num_chunks as usize;
679    let pos = hdr_end;
680    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
681    if data.len() < toc_end + 20 {
682        return Err(Error::CorruptObject(
683            "truncated MIDX chunk table".to_owned(),
684        ));
685    }
686    for i in 0..n {
687        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
688        let id = u32::from_be_bytes(data[base..base + 4].try_into().unwrap());
689        let tag = match id {
690            x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
691            x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
692            x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
693            x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
694            x if x == MIDX_CHUNKID_REVINDEX => "revindex",
695            x if x == 0x4254_4d50 => "bitmapped-packs",
696            _ => "unknown",
697        };
698        chunk_tags.push(tag);
699    }
700
701    let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
702    let num_objects = ooff_len / 8;
703
704    let pack_names = read_midx_pack_idx_names(objects_dir)?;
705
706    let mut out = String::new();
707    out.push_str(&format!(
708        "header: {:08x} {} {} {} {}\n",
709        sig, version, hash_len, num_chunks, num_packs
710    ));
711    out.push_str("chunks:");
712    for t in &chunk_tags {
713        out.push(' ');
714        out.push_str(t);
715    }
716    out.push('\n');
717    out.push_str(&format!("num_objects: {num_objects}\n"));
718    out.push_str("packs:\n");
719    for n in &pack_names {
720        out.push_str(n);
721        out.push('\n');
722    }
723    out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
724    Ok(out)
725}
726
727/// OID rows from the active multi-pack-index, plus reverse-index order for pack-reuse bitmap bits.
728///
729/// Git assigns each object a **global bitmap bit** equal to its position in the MIDX reverse index
730/// (`RIDX` chunk) traversal order — not its position in the pack `.idx` file. Helpers on this struct
731/// map [`ObjectId`] → global bit the same way as `midx-write.c` (`midx_pack_order`).
732#[derive(Debug, Clone)]
733pub struct MidxReuseTables {
734    /// OIDs in MIDX lexicographic order (same order as the OID lookup chunk).
735    pub oids: Vec<ObjectId>,
736    /// `(pack_int_id, in-pack offset)` parallel to `oids`.
737    pub pack_and_offset: Vec<(u32, u64)>,
738    /// `rid_order[rank]` is the OID-table index of the object at global bitmap rank `rank`.
739    pub rid_order: Vec<u32>,
740    /// Inverse map: global bitmap rank for each OID-table index.
741    pub oid_idx_to_rank: Vec<u32>,
742}
743
744/// Load OID / object-offset / reverse-index tables from the tip MIDX (root or chain tip).
745///
746/// Returns [`None`] when there is no MIDX or no `RIDX` chunk (no pseudo-bitmap ordering).
747pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
748    let pack_dir = objects_dir.join("pack");
749    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
750        return Ok(None);
751    };
752    let data = fs::read(&path).map_err(Error::Io)?;
753    let (_, hdr_end, _) = parse_midx_header(&data)?;
754    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
755    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
756    let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
757        return Ok(None);
758    };
759    if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
760        return Err(Error::CorruptObject(
761            "MIDX OID / offset chunk size mismatch".to_owned(),
762        ));
763    }
764    let num_objects = oid_l_len / 20;
765    if ridx_len != num_objects.saturating_mul(4) {
766        return Err(Error::CorruptObject(
767            "MIDX reverse index length does not match object count".to_owned(),
768        ));
769    }
770    if num_objects == 0 {
771        return Ok(None);
772    }
773
774    let mut oids = Vec::with_capacity(num_objects);
775    for i in 0..num_objects {
776        let base = oidl_off + i * 20;
777        oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
778    }
779
780    let mut pack_and_offset = Vec::with_capacity(num_objects);
781    for i in 0..num_objects {
782        let ob = ooff_off + i * 8;
783        let pack_id = u32::from_be_bytes(data[ob..ob + 4].try_into().unwrap());
784        let off32 = u32::from_be_bytes(data[ob + 4..ob + 8].try_into().unwrap());
785        pack_and_offset.push((pack_id, u64::from(off32)));
786    }
787
788    let mut rid_order = Vec::with_capacity(num_objects);
789    for i in 0..num_objects {
790        let base = ridx_off + i * 4;
791        rid_order.push(u32::from_be_bytes(data[base..base + 4].try_into().unwrap()));
792    }
793
794    let mut oid_idx_to_rank = vec![0u32; num_objects];
795    for (rank, &oid_idx) in rid_order.iter().enumerate() {
796        let idx = usize::try_from(oid_idx)
797            .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
798        if idx >= num_objects {
799            return Err(Error::CorruptObject(
800                "MIDX reverse index out of range".to_owned(),
801            ));
802        }
803        oid_idx_to_rank[idx] = u32::try_from(rank)
804            .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
805    }
806
807    Ok(Some(MidxReuseTables {
808        oids,
809        pack_and_offset,
810        rid_order,
811        oid_idx_to_rank,
812    }))
813}
814
815impl MidxReuseTables {
816    /// Global pseudo-bitmap index for `oid`, or [`None`] if the object is not in this MIDX.
817    #[must_use]
818    pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
819        let oid_idx = self.oids.binary_search(oid).ok()?;
820        Some(self.oid_idx_to_rank[oid_idx])
821    }
822}
823
824/// One pack's slice of the MIDX pseudo-bitmap namespace (`BTMP` chunk).
825#[derive(Debug, Clone, Copy)]
826pub struct MidxBtmpPackRange {
827    /// Pack index in the MIDX pack-names list.
828    pub pack_id: u32,
829    /// First bit index assigned to this pack (cumulative object order).
830    pub bitmap_pos: u32,
831    /// Number of objects in this pack (same as `.idx` entry count).
832    pub bitmap_nr: u32,
833}
834
835/// Read per-pack `(bitmap_pos, bitmap_nr)` from the active MIDX `BTMP` chunk.
836///
837/// Returns an empty vector when the MIDX has no bitmapped-packs chunk.
838pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
839    let pack_dir = objects_dir.join("pack");
840    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
841        return Ok(Vec::new());
842    };
843    let data = fs::read(&path).map_err(Error::Io)?;
844    let (_, hdr_end, _) = parse_midx_header(&data)?;
845    let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
846        return Ok(Vec::new());
847    };
848    if btmp_len == 0 || btmp_len % 8 != 0 {
849        return Err(Error::CorruptObject(
850            "invalid MIDX BTMP chunk length".to_owned(),
851        ));
852    }
853    let num_packs = u32::from_be_bytes(data[8..12].try_into().unwrap());
854    let n_entries = btmp_len / 8;
855    if u32::try_from(n_entries).ok() != Some(num_packs) {
856        return Err(Error::CorruptObject(
857            "MIDX BTMP entry count does not match num_packs".to_owned(),
858        ));
859    }
860    let mut out = Vec::with_capacity(n_entries);
861    for i in 0..n_entries {
862        let base = btmp_off + i * 8;
863        let bitmap_pos = u32::from_be_bytes(data[base..base + 4].try_into().unwrap());
864        let bitmap_nr = u32::from_be_bytes(data[base + 4..base + 8].try_into().unwrap());
865        out.push(MidxBtmpPackRange {
866            pack_id: u32::try_from(i)
867                .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
868            bitmap_pos,
869            bitmap_nr,
870        });
871    }
872    Ok(out)
873}
874
875/// Look up which pack and in-pack offset holds `oid` according to the active MIDX.
876pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
877    let pack_dir = objects_dir.join("pack");
878    let path = resolve_tip_midx_path(&pack_dir)
879        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
880    let data = fs::read(&path).map_err(Error::Io)?;
881    let (_, hdr_end, _) = parse_midx_header(&data)?;
882    let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
883    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
884    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
885    if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
886        return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
887    }
888    let num_objects = oid_l_len / 20;
889    let first = oid.as_bytes()[0] as usize;
890    let j0 = if first == 0 {
891        0usize
892    } else {
893        u32::from_be_bytes(
894            data[fanout_off + (first - 1) * 4..fanout_off + first * 4]
895                .try_into()
896                .unwrap(),
897        ) as usize
898    };
899    let j1 = u32::from_be_bytes(
900        data[fanout_off + first * 4..fanout_off + (first + 1) * 4]
901            .try_into()
902            .unwrap(),
903    ) as usize;
904    let mut lo = j0;
905    let mut hi = j1;
906    while lo < hi {
907        let mid = (lo + hi) / 2;
908        let base = oidl_off + mid * 20;
909        let cmp = data[base..base + 20].cmp(oid.as_bytes());
910        if cmp == std::cmp::Ordering::Less {
911            lo = mid + 1;
912        } else {
913            hi = mid;
914        }
915    }
916    if lo >= num_objects {
917        return Err(Error::CorruptObject(format!(
918            "object {} not in multi-pack-index",
919            oid.to_hex()
920        )));
921    }
922    let base = oidl_off + lo * 20;
923    if data[base..base + 20] != *oid.as_bytes() {
924        return Err(Error::CorruptObject(format!(
925            "object {} not in multi-pack-index",
926            oid.to_hex()
927        )));
928    }
929    let ob = ooff_off + lo * 8;
930    let pack_id = u32::from_be_bytes(data[ob..ob + 4].try_into().unwrap());
931    let off32 = u32::from_be_bytes(data[ob + 4..ob + 8].try_into().unwrap());
932    Ok((pack_id, u64::from(off32)))
933}
934
935/// Returns whether `oid` appears in the active MIDX OID table for `objects_dir`.
936///
937/// [`None`] means there is no MIDX at the pack tip. [`Some`] is the lookup result when a MIDX exists.
938pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
939    let pack_dir = objects_dir.join("pack");
940    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
941        return Ok(None);
942    };
943    let data = fs::read(&midx_path).map_err(Error::Io)?;
944    let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
945    if hash_bytes != 1 {
946        eprintln!(
947            "error: multi-pack-index hash version {} does not match version 1",
948            hash_bytes
949        );
950        return Err(Error::CorruptObject(
951            "multi-pack-index hash version mismatch".to_owned(),
952        ));
953    }
954    let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
955    if oidf_len != 256 * 4 {
956        eprintln!("error: multi-pack-index OID fanout is of the wrong size");
957        return Err(Error::CorruptObject(
958            "multi-pack-index OID fanout is of the wrong size".to_owned(),
959        ));
960    }
961    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
962    let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
963    let num_objects = ooff_len / 8;
964    if oidl_len != num_objects * 20 || ooff_len != num_objects * 8 {
965        if oidl_len != num_objects * 20 {
966            eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
967        } else {
968            eprintln!("error: multi-pack-index object offset chunk is the wrong size");
969        }
970        return Err(Error::CorruptObject("midx chunk size mismatch".to_owned()));
971    }
972
973    let first = oid.as_bytes()[0] as usize;
974    let lo = if first == 0 {
975        0u32
976    } else {
977        u32::from_be_bytes(
978            data[oidf_off + (first - 1) * 4..oidf_off + first * 4]
979                .try_into()
980                .unwrap(),
981        )
982    };
983    let hi = u32::from_be_bytes(
984        data[oidf_off + first * 4..oidf_off + (first + 1) * 4]
985            .try_into()
986            .unwrap(),
987    );
988    if lo > hi || hi as usize > num_objects {
989        eprintln!(
990            "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
991            first.saturating_sub(1),
992            lo,
993            hi,
994            first
995        );
996        return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
997    }
998
999    let mut i = lo as usize;
1000    while i < hi as usize {
1001        let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1002        match o.cmp(oid) {
1003            std::cmp::Ordering::Equal => return Ok(Some(true)),
1004            std::cmp::Ordering::Greater => return Ok(Some(false)),
1005            std::cmp::Ordering::Less => i += 1,
1006        }
1007    }
1008    Ok(Some(false))
1009}
1010
1011/// When `core.multiPackIndex` is enabled, try to read `oid` from the active MIDX in `objects_dir`.
1012///
1013/// Returns [`None`] when no MIDX exists or `oid` is not listed. Returns [`Some(Err(..))`] when the
1014/// MIDX is present but malformed (callers surface Git-style `error:` / `fatal:` messages).
1015pub fn try_read_object_via_midx(
1016    objects_dir: &Path,
1017    oid: &ObjectId,
1018) -> Result<Option<crate::objects::Object>> {
1019    let pack_dir = objects_dir.join("pack");
1020    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1021        return Ok(None);
1022    };
1023    let data = fs::read(&midx_path).map_err(Error::Io)?;
1024    let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
1025    let num_packs_hdr = u32::from_be_bytes(data[8..12].try_into().unwrap());
1026    if hash_bytes != 1 {
1027        eprintln!(
1028            "error: multi-pack-index hash version {} does not match version 1",
1029            hash_bytes
1030        );
1031        return Err(Error::CorruptObject(
1032            "multi-pack-index hash version mismatch".to_owned(),
1033        ));
1034    }
1035    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1036    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1037    if pack_names.len() != num_packs_hdr as usize {
1038        return Err(Error::CorruptObject(
1039            "multi-pack-index pack-name chunk is too short".to_owned(),
1040        ));
1041    }
1042    let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1043    if oidf_len != 256 * 4 {
1044        eprintln!("error: multi-pack-index OID fanout is of the wrong size");
1045        return Err(Error::CorruptObject(
1046            "multi-pack-index OID fanout is of the wrong size".to_owned(),
1047        ));
1048    }
1049    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1050    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1051    let num_objects = ooff_len / 8;
1052    if oidl_len != num_objects * 20 {
1053        eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
1054        return Err(Error::CorruptObject(
1055            "multi-pack-index OID lookup chunk is the wrong size".to_owned(),
1056        ));
1057    }
1058    if ooff_len != num_objects * 8 {
1059        eprintln!("error: multi-pack-index object offset chunk is the wrong size");
1060        return Err(Error::CorruptObject(
1061            "multi-pack-index object offset chunk is the wrong size".to_owned(),
1062        ));
1063    }
1064    let loff = find_chunk(&data, hdr_end, MIDX_CHUNKID_LARGEOFFSETS).ok();
1065    let ridx = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX).ok();
1066
1067    if let Some((_, rlen)) = ridx {
1068        if rlen != num_objects * 4 {
1069            eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
1070            eprintln!("warning: multi-pack bitmap is missing required reverse index");
1071        }
1072    }
1073
1074    let first = oid.as_bytes()[0] as usize;
1075    let lo = if first == 0 {
1076        0u32
1077    } else {
1078        u32::from_be_bytes(
1079            data[oidf_off + (first - 1) * 4..oidf_off + first * 4]
1080                .try_into()
1081                .unwrap(),
1082        )
1083    };
1084    let hi = u32::from_be_bytes(
1085        data[oidf_off + first * 4..oidf_off + (first + 1) * 4]
1086            .try_into()
1087            .unwrap(),
1088    );
1089    if lo > hi || hi as usize > num_objects {
1090        eprintln!(
1091            "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
1092            first.saturating_sub(1),
1093            lo,
1094            hi,
1095            first
1096        );
1097        return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
1098    }
1099
1100    let mut pos = None;
1101    let mut i = lo as usize;
1102    while i < hi as usize {
1103        let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1104        let c = o.cmp(oid);
1105        if c == std::cmp::Ordering::Equal {
1106            pos = Some(i);
1107            break;
1108        }
1109        if c == std::cmp::Ordering::Greater {
1110            break;
1111        }
1112        i += 1;
1113    }
1114    let Some(pos) = pos else {
1115        return Ok(None);
1116    };
1117
1118    let obase = ooff_off + pos * 8;
1119    let pack_id = u32::from_be_bytes(data[obase..obase + 4].try_into().unwrap());
1120    let raw_off = u32::from_be_bytes(data[obase + 4..obase + 8].try_into().unwrap());
1121    let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
1122        let Some((loff_off, loff_len)) = loff else {
1123            return Err(Error::CorruptObject(
1124                "multi-pack-index large offset missing LOFF chunk".to_owned(),
1125            ));
1126        };
1127        let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1128        let need = (idx + 1) * 8;
1129        if loff_len < need {
1130            return Err(Error::CorruptObject(
1131                "multi-pack-index large offset out of bounds".to_owned(),
1132            ));
1133        }
1134        u64::from_be_bytes(
1135            data[loff_off + idx * 8..loff_off + (idx + 1) * 8]
1136                .try_into()
1137                .unwrap(),
1138        )
1139    } else {
1140        raw_off as u64
1141    };
1142
1143    let idx_name = pack_names
1144        .get(pack_id as usize)
1145        .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
1146    let idx_path = pack_dir.join(idx_name);
1147    let idx = crate::pack::read_pack_index(&idx_path)?;
1148    crate::pack::read_object_from_pack(&idx, oid).map(Some)
1149}
1150
1151pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
1152    let pack_dir = objects_dir.join("pack");
1153    let path = resolve_tip_midx_path(&pack_dir)
1154        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1155    let data = fs::read(&path).map_err(Error::Io)?;
1156    let (_, hdr_end, _) = parse_midx_header(&data)?;
1157    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1158    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1159    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1160    let (ridx_off, ridx_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX)?;
1161
1162    if ridx_len < 4 || ooff_len < 8 {
1163        return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
1164    }
1165    let first_oid_idx =
1166        u32::from_be_bytes(data[ridx_off..ridx_off + 4].try_into().unwrap()) as usize;
1167    let entry_base = ooff_off + first_oid_idx * 8;
1168    if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
1169        return Err(Error::CorruptObject(
1170            "bad MIDX object-offsets index".to_owned(),
1171        ));
1172    }
1173    let pack_id = u32::from_be_bytes(data[entry_base..entry_base + 4].try_into().unwrap());
1174    let idx = usize::try_from(pack_id)
1175        .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
1176    names
1177        .get(idx)
1178        .cloned()
1179        .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
1180}
1181
1182/// Build `objects/pack/multi-pack-index` for all pack indexes in `pack_dir`.
1183///
1184/// Returns an error if there are no `.idx` files, if an object offset does not
1185/// fit in 31 bits (no `LOFF` chunk yet), or if I/O fails.
1186/// Remove every multi-pack-index file under `pack_dir` (root file, sidecars, and
1187/// `multi-pack-index.d/`). Used by full `repack -a` so stale incremental chains do not survive.
1188pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
1189    let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1190    scrub_root_midx_sidecars_except(pack_dir, None)?;
1191    let midx_d = midx_d_dir(pack_dir);
1192    if midx_d.exists() {
1193        let _ = fs::remove_dir_all(&midx_d);
1194    }
1195    Ok(())
1196}
1197
1198pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
1199    write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
1200}
1201
1202/// Write `multi-pack-index` with optional preferred pack, placeholders, and incremental chain.
1203pub fn write_multi_pack_index_with_options(
1204    pack_dir: &Path,
1205    opts: &WriteMultiPackIndexOptions,
1206) -> Result<()> {
1207    let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
1208        .map_err(Error::Io)?
1209        .filter_map(|e| e.ok())
1210        .filter_map(|e| {
1211            let name = e.file_name().to_string_lossy().to_string();
1212            if name.ends_with(".idx") && name.starts_with("pack-") {
1213                Some(name)
1214            } else {
1215                None
1216            }
1217        })
1218        .collect();
1219    idx_names.sort();
1220
1221    if idx_names.is_empty() {
1222        return Err(Error::CorruptObject(
1223            "no pack-*.idx files found in pack directory".to_owned(),
1224        ));
1225    }
1226
1227    let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
1228        let mut out = Vec::new();
1229        for line in sub {
1230            let want = normalize_pack_idx_basename(line)?;
1231            let found = idx_names
1232                .iter()
1233                .find(|n| **n == want)
1234                .cloned()
1235                .ok_or_else(|| {
1236                    Error::CorruptObject(format!("pack index not in repository: {want}"))
1237                })?;
1238            if !out.contains(&found) {
1239                out.push(found);
1240            }
1241        }
1242        if out.is_empty() {
1243            return Err(Error::CorruptObject(
1244                "stdin-packs list produced empty pack set".to_owned(),
1245            ));
1246        }
1247        out
1248    } else {
1249        idx_names
1250    };
1251
1252    let (base_oids, base_pack_names) = if opts.incremental {
1253        collect_incremental_base(pack_dir)?
1254    } else {
1255        (HashSet::new(), HashSet::new())
1256    };
1257
1258    let layer_idx_names: Vec<String> = if opts.incremental {
1259        idx_names
1260            .iter()
1261            .filter(|n| {
1262                !base_pack_names
1263                    .iter()
1264                    .any(|bp| pack_names_match_layer(bp, n))
1265            })
1266            .cloned()
1267            .collect()
1268    } else {
1269        idx_names.clone()
1270    };
1271
1272    if opts.incremental && layer_idx_names.is_empty() {
1273        return Ok(());
1274    }
1275
1276    let work_names = if opts.incremental {
1277        &layer_idx_names[..]
1278    } else {
1279        &idx_names[..]
1280    };
1281
1282    let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
1283    if preferred_idx.is_none() {
1284        if let Some(raw) = opts.preferred_pack_name.as_deref() {
1285            let pos = work_names
1286                .iter()
1287                .position(|n| cmp_idx_or_pack_name(raw, n).is_eq())
1288                .ok_or_else(|| {
1289                    Error::CorruptObject(format!(
1290                        "preferred pack '{raw}' not found in multi-pack-index input"
1291                    ))
1292                })?;
1293            preferred_idx = Some(pos);
1294        }
1295    }
1296    if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
1297        preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
1298    }
1299    if let Some(p) = preferred_idx {
1300        if p >= work_names.len() {
1301            return Err(Error::CorruptObject(
1302                "preferred pack index out of range".to_owned(),
1303            ));
1304        }
1305    }
1306
1307    let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
1308    for name in work_names {
1309        let path = pack_dir.join(name);
1310        indexes.push(read_pack_index(&path)?);
1311    }
1312
1313    let pack_mtimes_layer: Vec<std::time::SystemTime> =
1314        indexes.iter().map(pack_mtime_for_midx).collect();
1315    let preferred_u32 = preferred_idx.map(|p| p as u32);
1316
1317    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
1318    for (pack_id, idx) in indexes.iter().enumerate() {
1319        let pack_id = u32::try_from(pack_id).map_err(|_| {
1320            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
1321        })?;
1322        let mtime = pack_mtimes_layer[pack_id as usize];
1323        for e in &idx.entries {
1324            if e.oid.len() != 20 {
1325                continue;
1326            }
1327            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
1328                continue;
1329            };
1330            if opts.incremental && base_oids.contains(&oid) {
1331                continue;
1332            }
1333            let cand = MidxEntry {
1334                oid,
1335                pack_id,
1336                offset: e.offset,
1337                pack_mtime: mtime,
1338            };
1339            match best.get(&oid) {
1340                None => {
1341                    best.insert(oid, cand);
1342                }
1343                Some(cur) => {
1344                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
1345                        best.insert(oid, cand);
1346                    }
1347                }
1348            }
1349        }
1350    }
1351
1352    let bitmap_placeholders =
1353        opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
1354
1355    let omit_embedded_ridx = opts.write_rev_placeholder;
1356    let (out, rev_sidecar_order) = build_midx_bytes(
1357        work_names,
1358        &indexes,
1359        preferred_idx,
1360        bitmap_placeholders,
1361        omit_embedded_ridx,
1362    )?;
1363
1364    let hash = &out[out.len() - 20..];
1365    let hash_hex = hex::encode(hash);
1366    let hash_arr: [u8; 20] = hash
1367        .try_into()
1368        .map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
1369
1370    if opts.incremental {
1371        let root_midx = pack_dir.join("multi-pack-index");
1372        let chain_path = chain_file_path(pack_dir);
1373        let chain_existed = chain_path.exists();
1374
1375        let mut chain = if root_midx.exists() && !chain_existed {
1376            let root_hex = midx_checksum_hex_from_path(&root_midx)?;
1377            link_root_midx_into_chain(pack_dir, &root_hex)?;
1378            vec![root_hex]
1379        } else {
1380            read_chain_layer_hashes(pack_dir).unwrap_or_default()
1381        };
1382
1383        chain.push(hash_hex.clone());
1384
1385        let midx_d = midx_d_dir(pack_dir);
1386        fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1387
1388        let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
1389        fs::write(&layer_path, &out).map_err(Error::Io)?;
1390
1391        let mut chain_data = String::new();
1392        for h in &chain {
1393            chain_data.push_str(h);
1394            chain_data.push('\n');
1395        }
1396        fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
1397
1398        clear_stale_split_layers(pack_dir, &chain)?;
1399
1400        let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1401        scrub_root_midx_sidecars(pack_dir)?;
1402        if bitmap_placeholders {
1403            let full = hex::encode(hash);
1404            fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
1405                .map_err(Error::Io)?;
1406            if opts.write_rev_placeholder {
1407                let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
1408                if let Some(order) = rev_sidecar_order.as_ref() {
1409                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1410                } else {
1411                    fs::write(rev_path, []).map_err(Error::Io)?;
1412                }
1413            }
1414        }
1415    } else {
1416        let midx_d = midx_d_dir(pack_dir);
1417        if midx_d.exists() {
1418            for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
1419                let ent = ent.map_err(Error::Io)?;
1420                let _ = if ent.file_type().map_err(Error::Io)?.is_dir() {
1421                    fs::remove_dir_all(ent.path())
1422                } else {
1423                    fs::remove_file(ent.path())
1424                };
1425            }
1426        }
1427        fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1428
1429        let dest = pack_dir.join("multi-pack-index");
1430        fs::write(&dest, &out).map_err(Error::Io)?;
1431
1432        scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
1433
1434        if opts.write_bitmap_placeholders {
1435            fs::write(
1436                pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
1437                [],
1438            )
1439            .map_err(Error::Io)?;
1440            if opts.write_rev_placeholder {
1441                let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
1442                if let Some(order) = rev_sidecar_order.as_ref() {
1443                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1444                } else {
1445                    fs::write(rev_path, []).map_err(Error::Io)?;
1446                }
1447            }
1448        }
1449    }
1450
1451    Ok(())
1452}
1453
1454fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
1455    if base_name == disk_idx {
1456        return true;
1457    }
1458    cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
1459}
1460
1461fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
1462    scrub_root_midx_sidecars_except(pack_dir, None)
1463}
1464
1465fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
1466    let Ok(rd) = fs::read_dir(pack_dir) else {
1467        return Ok(());
1468    };
1469    for ent in rd {
1470        let ent = ent.map_err(Error::Io)?;
1471        let name = ent.file_name().to_string_lossy().to_string();
1472        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
1473            continue;
1474        };
1475        if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
1476            continue;
1477        }
1478        let hash_part = rest
1479            .strip_suffix(".bitmap")
1480            .or_else(|| rest.strip_suffix(".rev"))
1481            .unwrap_or(rest);
1482        if hash_part.len() != 40 {
1483            continue;
1484        }
1485        if keep_hex.is_some_and(|k| k == hash_part) {
1486            continue;
1487        }
1488        let _ = fs::remove_file(ent.path());
1489    }
1490    Ok(())
1491}