Skip to main content

grit_lib/
midx.rs

1//! Multi-pack-index (MIDX) file writing and minimal reading.
2//!
3//! Writes a Git-compatible `multi-pack-index` file (version 1, SHA-1) covering
4//! selected `pack-*.idx` files. Objects that appear in multiple packs keep the
5//! preferred pack's copy when `preferred_pack_idx` is set (matching Git's
6//! geometric repack tests).
7//!
8//! Incremental writes follow Git's split layout: layers live under
9//! `pack/multi-pack-index.d/multi-pack-index-<sha1>.midx` with ordering in
10//! `multi-pack-index-chain` (oldest hash first, newest last).
11
12use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::ObjectId;
21use crate::pack::{read_pack_index, PackIndex};
22
23const MIDX_SIGNATURE: u32 = 0x4d49_4458;
24const MIDX_VERSION_V1: u8 = 1;
25const HASH_VERSION_SHA1: u8 = 1;
26const MIDX_HEADER_SIZE: usize = 12;
27const CHUNK_TOC_ENTRY_SIZE: usize = 12;
28const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
29const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
30const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
31const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
32const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
33const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
34const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
35
36// Git `pack-revindex.h` / `pack-write.c` (standalone `.rev` next to MIDX).
37const RIDX_SIGNATURE: u32 = 0x5249_4458;
38const RIDX_VERSION: u32 = 1;
39const RIDX_HEADER_SIZE: usize = 12;
40const MIDX_CHUNK_ALIGNMENT: usize = 4;
41
42// `git midx.h` (MIDX_LARGE_OFFSET_NEEDED).
43const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
44
45struct MidxEntry {
46    oid: ObjectId,
47    pack_id: u32,
48    offset: u64,
49    pack_mtime: std::time::SystemTime,
50}
51
52/// Options for writing a multi-pack index (extension of the simple writer).
53#[derive(Debug, Clone, Default)]
54pub struct WriteMultiPackIndexOptions {
55    /// When set, objects also present in other packs are taken from this pack
56    /// (`pack_names` index in the sorted name list).
57    pub preferred_pack_idx: Option<u32>,
58    /// Basename of the preferred pack (e.g. `pack-abc.idx` or `pack-abc.pack`); resolved against
59    /// the working pack name list after optional subset filtering.
60    pub preferred_pack_name: Option<String>,
61    /// If set, only these `pack-*.idx` basenames are included, in this order (Git `--stdin-packs`).
62    pub pack_names_subset_ordered: Option<Vec<String>>,
63    /// When true, append RIDX + empty BTMP chunks so `test-tool read-midx --bitmap` succeeds.
64    pub write_bitmap_placeholders: bool,
65    /// When true, write a new layer in `multi-pack-index.d/` and extend the chain file
66    /// instead of replacing `pack/multi-pack-index`.
67    pub incremental: bool,
68    /// When true with [`Self::write_bitmap_placeholders`], also create an empty `.rev`
69    /// sidecar (Git `GIT_TEST_MIDX_WRITE_REV` compatibility).
70    pub write_rev_placeholder: bool,
71}
72
73fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
74    let t = raw.trim();
75    let t = std::path::Path::new(t)
76        .file_name()
77        .and_then(|s| s.to_str())
78        .unwrap_or(t);
79    let t = t.strip_prefix("./").unwrap_or(t);
80    if t.ends_with(".idx") {
81        Ok(t.to_string())
82    } else if t.ends_with(".pack") {
83        Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
84    } else {
85        Ok(format!("{t}.idx"))
86    }
87}
88
89/// Read a big-endian `u32` from `data` at byte offset `off`.
90///
91/// Returns [`Error::CorruptObject`] if `data` does not contain 4 bytes at `off`,
92/// replacing the previous fixed-width-slice `.try_into().unwrap()` with real
93/// bounds handling (the success-path value is unchanged).
94fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
95    let end = off.checked_add(4).filter(|&e| e <= data.len());
96    let Some(end) = end else {
97        return Err(Error::CorruptObject(
98            "truncated MIDX data reading u32".to_owned(),
99        ));
100    };
101    let bytes: [u8; 4] = data[off..end]
102        .try_into()
103        .map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
104    Ok(u32::from_be_bytes(bytes))
105}
106
107/// Read a big-endian `u64` from `data` at byte offset `off`.
108///
109/// Returns [`Error::CorruptObject`] if `data` does not contain 8 bytes at `off`,
110/// replacing the previous fixed-width-slice `.try_into().unwrap()` with real
111/// bounds handling (the success-path value is unchanged).
112fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
113    let end = off.checked_add(8).filter(|&e| e <= data.len());
114    let Some(end) = end else {
115        return Err(Error::CorruptObject(
116            "truncated MIDX data reading u64".to_owned(),
117        ));
118    };
119    let bytes: [u8; 8] = data[off..end]
120        .try_into()
121        .map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
122    Ok(u64::from_be_bytes(bytes))
123}
124
125struct MidxFileHeader {
126    num_chunks: u8,
127}
128
129fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
130    if data.len() < MIDX_HEADER_SIZE + 20 {
131        return Err(Error::CorruptObject("midx file too small".to_owned()));
132    }
133    let sig = read_be_u32(data, 0)?;
134    if sig != MIDX_SIGNATURE {
135        return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
136    }
137    let version = data[4];
138    if version != MIDX_VERSION_V1 {
139        return Err(Error::CorruptObject(format!(
140            "unsupported MIDX version {version}"
141        )));
142    }
143    let object_hash_bytes = data[5];
144    let num_chunks = data[6];
145    let _num_packs = read_be_u32(data, 8)?;
146    Ok((
147        MidxFileHeader { num_chunks },
148        MIDX_HEADER_SIZE,
149        object_hash_bytes,
150    ))
151}
152
153fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
154    let mut names = Vec::new();
155    let mut start = 0usize;
156    for (i, &b) in pn.iter().enumerate() {
157        if b == 0 && i >= start {
158            if i > start {
159                let s = std::str::from_utf8(&pn[start..i])
160                    .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
161                names.push(s.to_string());
162            }
163            start = i + 1;
164        }
165    }
166    Ok(names)
167}
168
169/// Compare a pack basename that may use `.pack` or `.idx` with an MIDX pack name (`.idx`).
170fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
171    let a = idx_or_pack_name.as_bytes();
172    let b = idx_name.as_bytes();
173    let mut i = 0usize;
174    let min = a.len().min(b.len());
175    while i < min && a[i] == b[i] {
176        i += 1;
177    }
178    let suf_a = &a[i..];
179    let suf_b = &b[i..];
180    if suf_b == b"idx" && suf_a == b"pack" {
181        return std::cmp::Ordering::Equal;
182    }
183    suf_a.cmp(suf_b)
184}
185
186fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
187    let mut best: Option<(usize, std::time::SystemTime)> = None;
188    for (i, n) in names.iter().enumerate() {
189        let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
190        let mtime = meta.modified().map_err(Error::Io)?;
191        match best {
192            None => best = Some((i, mtime)),
193            Some((_, t)) if mtime < t => best = Some((i, mtime)),
194            _ => {}
195        }
196    }
197    Ok(best.map(|(i, _)| i))
198}
199
200fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
201    pack_dir.join("multi-pack-index.d")
202}
203
204fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
205    midx_d_dir(pack_dir).join("multi-pack-index-chain")
206}
207
208fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
209    let path = chain_file_path(pack_dir);
210    let f = fs::File::open(&path).map_err(Error::Io)?;
211    let mut out = Vec::new();
212    for line in BufReader::new(f).lines() {
213        let line = line.map_err(Error::Io)?;
214        let t = line.trim();
215        if t.is_empty() {
216            continue;
217        }
218        if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
219            return Err(Error::CorruptObject(format!(
220                "invalid multi-pack-index chain line: {t}"
221            )));
222        }
223        out.push(t.to_ascii_lowercase());
224    }
225    Ok(out)
226}
227
228/// Resolve the path to the newest MIDX layer (root `multi-pack-index` or last chain entry).
229pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
230    let root = pack_dir.join("multi-pack-index");
231    if root.exists() {
232        return Some(root);
233    }
234    let hashes = read_chain_layer_hashes(pack_dir).ok()?;
235    let last = hashes.last()?;
236    Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
237}
238
239fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
240    let data = fs::read(path).map_err(Error::Io)?;
241    let _ = parse_midx_header(&data)?;
242    Ok(data)
243}
244
245fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
246    let (_, hdr_end, _) = parse_midx_header(data)?;
247    let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
248    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
249    let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
250    let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
251    let num_objects = ooff_len / 8;
252    if oidl_len != num_objects * 20 {
253        return Err(Error::CorruptObject(
254            "MIDX oid-lookup size mismatch".to_owned(),
255        ));
256    }
257    let mut oids = HashSet::with_capacity(num_objects);
258    for i in 0..num_objects {
259        let start = oidl_off + i * 20;
260        let oid = ObjectId::from_bytes(&data[start..start + 20])?;
261        oids.insert(oid);
262    }
263    Ok((oids, pack_names))
264}
265
266fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
267    let mut oids = HashSet::new();
268    let mut packs = HashSet::new();
269    let root = pack_dir.join("multi-pack-index");
270    let chain_path = chain_file_path(pack_dir);
271    if chain_path.exists() {
272        for h in read_chain_layer_hashes(pack_dir)? {
273            let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
274            let data = load_midx_file(&p)?;
275            let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
276            oids.extend(layer_oids);
277            for n in names {
278                packs.insert(n);
279            }
280        }
281        return Ok((oids, packs));
282    }
283    if root.exists() {
284        let data = load_midx_file(&root)?;
285        let (o, names) = oids_and_packs_from_midx_data(&data)?;
286        oids = o;
287        for n in names {
288            packs.insert(n);
289        }
290    }
291    Ok((oids, packs))
292}
293
294fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
295    let data = fs::read(path).map_err(Error::Io)?;
296    if data.len() < 20 {
297        return Err(Error::CorruptObject(
298            "midx too small for checksum".to_owned(),
299        ));
300    }
301    let hash = &data[data.len() - 20..];
302    Ok(hex::encode(hash))
303}
304
305fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
306    let _ = fs::remove_file(dst);
307    if fs::hard_link(src, dst).is_ok() {
308        return Ok(());
309    }
310    fs::copy(src, dst).map_err(Error::Io)?;
311    Ok(())
312}
313
314fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
315    let midx_d = midx_d_dir(pack_dir);
316    fs::create_dir_all(&midx_d).map_err(Error::Io)?;
317    let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
318    hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
319    let exts = ["bitmap", "rev"];
320    for ext in exts {
321        let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
322        if src.exists() {
323            let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
324            hard_link_or_copy(&src, &dst)?;
325        }
326    }
327    Ok(())
328}
329
330fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
331    let midx_d = midx_d_dir(pack_dir);
332    if !midx_d.exists() {
333        return Ok(());
334    }
335    let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
336    for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
337        let ent = ent.map_err(Error::Io)?;
338        let name = ent.file_name().to_string_lossy().to_string();
339        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
340            continue;
341        };
342        let Some((hash_part, _ext)) = rest.split_once('.') else {
343            continue;
344        };
345        if hash_part.len() == 40 && !keep.contains(hash_part) {
346            let _ = fs::remove_file(ent.path());
347        }
348    }
349    Ok(())
350}
351
352fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
353    fs::metadata(&idx.pack_path)
354        .and_then(|m| m.modified())
355        .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
356}
357
358fn midx_pick_better_entry(
359    cur: &MidxEntry,
360    cand_pack: u32,
361    cand_offset: u64,
362    cand_mtime: std::time::SystemTime,
363    preferred_pack: Option<u32>,
364) -> bool {
365    let cur_pref = preferred_pack == Some(cur.pack_id);
366    let new_pref = preferred_pack == Some(cand_pack);
367    if new_pref && !cur_pref {
368        return true;
369    }
370    if cur_pref && !new_pref {
371        return false;
372    }
373    match cand_mtime.cmp(&cur.pack_mtime) {
374        std::cmp::Ordering::Greater => true,
375        std::cmp::Ordering::Less => false,
376        std::cmp::Ordering::Equal => {
377            if cand_pack != cur.pack_id {
378                cand_pack < cur.pack_id
379            } else {
380                cand_offset < cur.offset
381            }
382        }
383    }
384}
385
386fn build_midx_bytes(
387    idx_names: &[String],
388    indexes: &[PackIndex],
389    preferred_idx: Option<usize>,
390    write_bitmap_placeholders: bool,
391    omit_embedded_ridx_chunk: bool,
392) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
393    let preferred_pack_idx = preferred_idx.map(|p| p as u32);
394    let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
395
396    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
397    for (pack_id, idx) in indexes.iter().enumerate() {
398        let pack_id = u32::try_from(pack_id).map_err(|_| {
399            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
400        })?;
401        let mtime = pack_mtimes[pack_id as usize];
402        for e in &idx.entries {
403            if e.oid.len() != 20 {
404                continue;
405            }
406            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
407                continue;
408            };
409            let cand = MidxEntry {
410                oid,
411                pack_id,
412                offset: e.offset,
413                pack_mtime: mtime,
414            };
415            match best.get(&oid) {
416                None => {
417                    best.insert(oid, cand);
418                }
419                Some(cur) => {
420                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
421                        best.insert(oid, cand);
422                    }
423                }
424            }
425        }
426    }
427
428    let mut entries: Vec<MidxEntry> = best.into_values().collect();
429    entries.sort_by_key(|a| a.oid);
430
431    let mut large_offsets: Vec<u64> = Vec::new();
432    for e in &entries {
433        if e.offset > u64::from(u32::MAX) {
434            return Err(Error::CorruptObject(
435                "object offset does not fit in multi-pack-index".to_owned(),
436            ));
437        }
438    }
439
440    let num_packs = indexes.len() as u32;
441
442    let mut pack_names_blob = Vec::new();
443    for name in idx_names {
444        pack_names_blob.extend_from_slice(name.as_bytes());
445        pack_names_blob.push(0);
446    }
447    let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
448        % MIDX_CHUNK_ALIGNMENT;
449    pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
450    let chunk_pnam = pack_names_blob;
451
452    let mut chunk_oidf = vec![0u8; 256 * 4];
453    let mut j = 0usize;
454    for i in 0..256 {
455        while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
456            j += 1;
457        }
458        chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
459    }
460
461    let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
462    for e in &entries {
463        chunk_oidl.extend_from_slice(e.oid.as_bytes());
464    }
465
466    let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
467    for e in &entries {
468        chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
469        let needs_large = e.offset >= u64::from(MIDX_LARGE_OFFSET_NEEDED);
470        let encoded = if needs_large {
471            let slot = u32::try_from(large_offsets.len()).map_err(|_| {
472                Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
473            })?;
474            large_offsets.push(e.offset);
475            MIDX_LARGE_OFFSET_NEEDED | slot
476        } else {
477            u32::try_from(e.offset).map_err(|_| {
478                Error::CorruptObject("object offset overflow in multi-pack-index".to_owned())
479            })?
480        };
481        chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
482    }
483
484    let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
485        Vec::new()
486    } else {
487        let mut v = Vec::with_capacity(large_offsets.len() * 8);
488        for off in &large_offsets {
489            v.extend_from_slice(&off.to_be_bytes());
490        }
491        v
492    };
493
494    let pref = preferred_pack_idx;
495    let mut order: Vec<u32> = (0..entries.len() as u32).collect();
496    order.sort_by(|&ai, &bi| {
497        let a = &entries[ai as usize];
498        let b = &entries[bi as usize];
499        let a_pref = pref == Some(a.pack_id);
500        let b_pref = pref == Some(b.pack_id);
501        b_pref
502            .cmp(&a_pref)
503            .then_with(|| a.pack_id.cmp(&b.pack_id))
504            .then_with(|| a.offset.cmp(&b.offset))
505            .then_with(|| ai.cmp(&bi))
506    });
507
508    let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
509    for oid_idx in &order {
510        chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
511    }
512
513    // BTMP: per-pack (bitmap_pos, bitmap_nr) in the pseudo-bitmap namespace, matching Git's
514    // `write_midx_bitmapped_packs` (cumulative start + object count per pack).
515    let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
516        Some(order.clone())
517    } else {
518        None
519    };
520    let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
521        let mut v = Vec::new();
522        let mut cumulative = 0u32;
523        for idx in indexes {
524            let n = u32::try_from(idx.entries.len()).map_err(|_| {
525                Error::CorruptObject("too many objects in pack for MIDX BTMP".to_owned())
526            })?;
527            v.extend_from_slice(&cumulative.to_be_bytes());
528            v.extend_from_slice(&n.to_be_bytes());
529            cumulative = cumulative.saturating_add(n);
530        }
531        let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
532        v.extend(std::iter::repeat_n(0u8, pad));
533        v
534    } else {
535        Vec::new()
536    };
537
538    let mut chunks: Vec<(u32, Vec<u8>)> = vec![
539        (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
540        (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
541        (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
542        (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
543    ];
544    if !chunk_loff.is_empty() {
545        chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
546    }
547    if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
548        chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
549    }
550    if write_bitmap_placeholders {
551        chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
552    }
553
554    let num_chunks: u8 = chunks
555        .len()
556        .try_into()
557        .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
558
559    let mut body = Vec::new();
560    let mut cur_offset =
561        MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
562
563    for (id, data) in &chunks {
564        body.extend_from_slice(&id.to_be_bytes());
565        body.extend_from_slice(&cur_offset.to_be_bytes());
566        cur_offset += data.len() as u64;
567    }
568    body.extend_from_slice(&0u32.to_be_bytes());
569    body.extend_from_slice(&cur_offset.to_be_bytes());
570
571    for (_, data) in &chunks {
572        body.extend_from_slice(data);
573    }
574
575    let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
576    out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
577    out.push(MIDX_VERSION_V1);
578    out.push(HASH_VERSION_SHA1);
579    out.push(num_chunks);
580    out.push(0);
581    out.extend_from_slice(&num_packs.to_be_bytes());
582    out.extend_from_slice(&body);
583
584    let mut hasher = Sha1::new();
585    hasher.update(&out);
586    let hash = hasher.finalize();
587    out.extend_from_slice(&hash);
588
589    Ok((out, rev_sidecar_order))
590}
591
592/// Standalone MIDX `.rev` file (Git `write_rev_file_order` / `RIDX_SIGNATURE`).
593fn write_midx_rev_sidecar(
594    path: &Path,
595    pack_order: &[u32],
596    midx_file_hash: &[u8; 20],
597) -> Result<()> {
598    let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
599    body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
600    body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
601    body.extend_from_slice(&1u32.to_be_bytes());
602    for idx in pack_order {
603        body.extend_from_slice(&idx.to_be_bytes());
604    }
605    body.extend_from_slice(midx_file_hash);
606    fs::write(path, body).map_err(Error::Io)
607}
608
609fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
610    let (hdr, _, _) = parse_midx_header(data)?;
611    let n = hdr.num_chunks as usize;
612    let pos = header_end;
613    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
614    if data.len() < toc_end + 20 {
615        return Err(Error::CorruptObject(
616            "truncated MIDX chunk table".to_owned(),
617        ));
618    }
619    for i in 0..n {
620        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
621        let id = read_be_u32(data, base)?;
622        let off = read_be_u64(data, base + 4)? as usize;
623        if id == chunk_id {
624            let next_off = if i + 1 < n {
625                let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
626                read_be_u64(data, nb + 4)? as usize
627            } else {
628                let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
629                read_be_u64(data, term + 4)? as usize
630            };
631            return Ok((off, next_off.saturating_sub(off)));
632        }
633    }
634    Err(Error::CorruptObject(format!(
635        "MIDX chunk {chunk_id:08x} not found"
636    )))
637}
638
639/// Return the `pack-*.idx` basename for the MIDX preferred pack (RIDX position 0).
640///
641/// `objects_dir` is the repository object database (e.g. `.git/objects`), not `objects/pack`.
642///
643/// Used by `test-tool read-midx --preferred-pack` compatibility.
644/// Pack index basenames (`pack-*.idx`) stored in the MIDX pack-names chunk.
645pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
646    let pack_dir = objects_dir.join("pack");
647    let path = resolve_tip_midx_path(&pack_dir)
648        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
649    let data = fs::read(&path).map_err(Error::Io)?;
650    let (_, hdr_end, _) = parse_midx_header(&data)?;
651    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
652    parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
653}
654
655/// A single MIDX-referenced object together with the pack it is attributed to.
656pub struct MidxObjectRef {
657    pub oid: ObjectId,
658    /// Index into the pack-names list returned alongside this.
659    pub pack_int_id: usize,
660}
661
662/// Read the tip MIDX and return `(pack_names, objects)`, where each object names
663/// the pack it is attributed to (`pack_int_id`). Mirrors the per-object
664/// `nth_midxed_pack_int_id` iteration in Git used by expire/repack.
665pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
666    let pack_dir = objects_dir.join("pack");
667    let path = resolve_tip_midx_path(&pack_dir)
668        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
669    let data = fs::read(&path).map_err(Error::Io)?;
670    let (_, hdr_end, _) = parse_midx_header(&data)?;
671    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
672    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
673    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
674    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
675    if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
676        return Err(Error::CorruptObject(
677            "bad MIDX oid-lookup / object-offsets size".to_owned(),
678        ));
679    }
680    let num = oidl_len / 20;
681    if num * 8 != ooff_len {
682        return Err(Error::CorruptObject(
683            "MIDX oid count does not match object-offsets".to_owned(),
684        ));
685    }
686    let mut objects = Vec::with_capacity(num);
687    for i in 0..num {
688        let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
689            .map_err(|e| Error::CorruptObject(e.to_string()))?;
690        let base = ooff_off + i * 8;
691        let pack_id = read_be_u32(&data, base)? as usize;
692        objects.push(MidxObjectRef {
693            oid,
694            pack_int_id: pack_id,
695        });
696    }
697    Ok((names, objects))
698}
699
700/// Trailing 40-character SHA-1 hex of the active MIDX (root or chain tip).
701pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
702    let pack_dir = objects_dir.join("pack");
703    let path = resolve_tip_midx_path(&pack_dir)
704        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
705    midx_checksum_hex_from_path(&path)
706}
707
708/// Human-readable dump of the MIDX (matches `test-tool read-midx` layout closely enough for grep-based tests).
709/// Emit one line per MIDX object: `{oid} {offset}\t{pack-idx-name}` (matches Git `test-read-midx.c`).
710pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
711    let mut out = format_midx_dump(objects_dir)?;
712    let pack_dir = objects_dir.join("pack");
713    let path = resolve_tip_midx_path(&pack_dir)
714        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
715    let data = fs::read(&path).map_err(Error::Io)?;
716    let (_, hdr_end, _) = parse_midx_header(&data)?;
717    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
718    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
719    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
720    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
721    if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
722        return Err(Error::CorruptObject(
723            "bad MIDX oid-lookup / object-offsets size".to_owned(),
724        ));
725    }
726    let num = oidl_len / 20;
727    if num * 8 != ooff_len {
728        return Err(Error::CorruptObject(
729            "MIDX oid count does not match object-offsets".to_owned(),
730        ));
731    }
732    for i in 0..num {
733        let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
734            .map_err(|e| Error::CorruptObject(e.to_string()))?;
735        let base = ooff_off + i * 8;
736        let pack_id = read_be_u32(&data, base)? as usize;
737        let offset = u64::from(read_be_u32(&data, base + 4)?);
738        let pack_name = names
739            .get(pack_id)
740            .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
741        out.push_str(&format!("{} {}\t{}\n", oid.to_hex(), offset, pack_name));
742    }
743    Ok(out)
744}
745
746pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
747    let pack_dir = objects_dir.join("pack");
748    let path = resolve_tip_midx_path(&pack_dir)
749        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
750    let data = fs::read(&path).map_err(Error::Io)?;
751    let (hdr, hdr_end, _) = parse_midx_header(&data)?;
752    let sig = read_be_u32(&data, 0)?;
753    let version = data[4];
754    let hash_len = data[5];
755    let num_chunks = hdr.num_chunks;
756    let num_packs = read_be_u32(&data, 8)?;
757
758    let mut chunk_tags: Vec<&'static str> = Vec::new();
759    let n = num_chunks as usize;
760    let pos = hdr_end;
761    let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
762    if data.len() < toc_end + 20 {
763        return Err(Error::CorruptObject(
764            "truncated MIDX chunk table".to_owned(),
765        ));
766    }
767    for i in 0..n {
768        let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
769        let id = read_be_u32(&data, base)?;
770        let tag = match id {
771            x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
772            x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
773            x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
774            x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
775            x if x == MIDX_CHUNKID_REVINDEX => "revindex",
776            x if x == 0x4254_4d50 => "bitmapped-packs",
777            _ => "unknown",
778        };
779        chunk_tags.push(tag);
780    }
781
782    let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
783    let num_objects = ooff_len / 8;
784
785    let pack_names = read_midx_pack_idx_names(objects_dir)?;
786
787    let mut out = String::new();
788    out.push_str(&format!(
789        "header: {:08x} {} {} {} {}\n",
790        sig, version, hash_len, num_chunks, num_packs
791    ));
792    out.push_str("chunks:");
793    for t in &chunk_tags {
794        out.push(' ');
795        out.push_str(t);
796    }
797    out.push('\n');
798    out.push_str(&format!("num_objects: {num_objects}\n"));
799    out.push_str("packs:\n");
800    for n in &pack_names {
801        out.push_str(n);
802        out.push('\n');
803    }
804    out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
805    Ok(out)
806}
807
808/// OID rows from the active multi-pack-index, plus reverse-index order for pack-reuse bitmap bits.
809///
810/// Git assigns each object a **global bitmap bit** equal to its position in the MIDX reverse index
811/// (`RIDX` chunk) traversal order — not its position in the pack `.idx` file. Helpers on this struct
812/// map [`ObjectId`] → global bit the same way as `midx-write.c` (`midx_pack_order`).
813#[derive(Debug, Clone)]
814pub struct MidxReuseTables {
815    /// OIDs in MIDX lexicographic order (same order as the OID lookup chunk).
816    pub oids: Vec<ObjectId>,
817    /// `(pack_int_id, in-pack offset)` parallel to `oids`.
818    pub pack_and_offset: Vec<(u32, u64)>,
819    /// `rid_order[rank]` is the OID-table index of the object at global bitmap rank `rank`.
820    pub rid_order: Vec<u32>,
821    /// Inverse map: global bitmap rank for each OID-table index.
822    pub oid_idx_to_rank: Vec<u32>,
823}
824
825/// Load OID / object-offset / reverse-index tables from the tip MIDX (root or chain tip).
826///
827/// Returns [`None`] when there is no MIDX or no `RIDX` chunk (no pseudo-bitmap ordering).
828pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
829    let pack_dir = objects_dir.join("pack");
830    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
831        return Ok(None);
832    };
833    let data = fs::read(&path).map_err(Error::Io)?;
834    let (_, hdr_end, _) = parse_midx_header(&data)?;
835    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
836    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
837    let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
838        return Ok(None);
839    };
840    if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
841        return Err(Error::CorruptObject(
842            "MIDX OID / offset chunk size mismatch".to_owned(),
843        ));
844    }
845    let num_objects = oid_l_len / 20;
846    if ridx_len != num_objects.saturating_mul(4) {
847        return Err(Error::CorruptObject(
848            "MIDX reverse index length does not match object count".to_owned(),
849        ));
850    }
851    if num_objects == 0 {
852        return Ok(None);
853    }
854
855    let mut oids = Vec::with_capacity(num_objects);
856    for i in 0..num_objects {
857        let base = oidl_off + i * 20;
858        oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
859    }
860
861    let mut pack_and_offset = Vec::with_capacity(num_objects);
862    for i in 0..num_objects {
863        let ob = ooff_off + i * 8;
864        let pack_id = read_be_u32(&data, ob)?;
865        let off32 = read_be_u32(&data, ob + 4)?;
866        pack_and_offset.push((pack_id, u64::from(off32)));
867    }
868
869    let mut rid_order = Vec::with_capacity(num_objects);
870    for i in 0..num_objects {
871        let base = ridx_off + i * 4;
872        rid_order.push(read_be_u32(&data, base)?);
873    }
874
875    let mut oid_idx_to_rank = vec![0u32; num_objects];
876    for (rank, &oid_idx) in rid_order.iter().enumerate() {
877        let idx = usize::try_from(oid_idx)
878            .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
879        if idx >= num_objects {
880            return Err(Error::CorruptObject(
881                "MIDX reverse index out of range".to_owned(),
882            ));
883        }
884        oid_idx_to_rank[idx] = u32::try_from(rank)
885            .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
886    }
887
888    Ok(Some(MidxReuseTables {
889        oids,
890        pack_and_offset,
891        rid_order,
892        oid_idx_to_rank,
893    }))
894}
895
896impl MidxReuseTables {
897    /// Global pseudo-bitmap index for `oid`, or [`None`] if the object is not in this MIDX.
898    #[must_use]
899    pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
900        let oid_idx = self.oids.binary_search(oid).ok()?;
901        Some(self.oid_idx_to_rank[oid_idx])
902    }
903}
904
905/// One pack's slice of the MIDX pseudo-bitmap namespace (`BTMP` chunk).
906#[derive(Debug, Clone, Copy)]
907pub struct MidxBtmpPackRange {
908    /// Pack index in the MIDX pack-names list.
909    pub pack_id: u32,
910    /// First bit index assigned to this pack (cumulative object order).
911    pub bitmap_pos: u32,
912    /// Number of objects in this pack (same as `.idx` entry count).
913    pub bitmap_nr: u32,
914}
915
916/// Read per-pack `(bitmap_pos, bitmap_nr)` from the active MIDX `BTMP` chunk.
917///
918/// Returns an empty vector when the MIDX has no bitmapped-packs chunk.
919pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
920    let pack_dir = objects_dir.join("pack");
921    let Some(path) = resolve_tip_midx_path(&pack_dir) else {
922        return Ok(Vec::new());
923    };
924    let data = fs::read(&path).map_err(Error::Io)?;
925    let (_, hdr_end, _) = parse_midx_header(&data)?;
926    let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
927        return Ok(Vec::new());
928    };
929    if btmp_len == 0 || btmp_len % 8 != 0 {
930        return Err(Error::CorruptObject(
931            "invalid MIDX BTMP chunk length".to_owned(),
932        ));
933    }
934    let num_packs = read_be_u32(&data, 8)?;
935    let n_entries = btmp_len / 8;
936    if u32::try_from(n_entries).ok() != Some(num_packs) {
937        return Err(Error::CorruptObject(
938            "MIDX BTMP entry count does not match num_packs".to_owned(),
939        ));
940    }
941    let mut out = Vec::with_capacity(n_entries);
942    for i in 0..n_entries {
943        let base = btmp_off + i * 8;
944        let bitmap_pos = read_be_u32(&data, base)?;
945        let bitmap_nr = read_be_u32(&data, base + 4)?;
946        out.push(MidxBtmpPackRange {
947            pack_id: u32::try_from(i)
948                .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
949            bitmap_pos,
950            bitmap_nr,
951        });
952    }
953    Ok(out)
954}
955
956/// Look up which pack and in-pack offset holds `oid` according to the active MIDX.
957pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
958    let pack_dir = objects_dir.join("pack");
959    let path = resolve_tip_midx_path(&pack_dir)
960        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
961    let data = fs::read(&path).map_err(Error::Io)?;
962    let (_, hdr_end, _) = parse_midx_header(&data)?;
963    let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
964    let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
965    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
966    if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
967        return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
968    }
969    let num_objects = oid_l_len / 20;
970    let first = oid.as_bytes()[0] as usize;
971    let j0 = if first == 0 {
972        0usize
973    } else {
974        read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
975    };
976    let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
977    let mut lo = j0;
978    let mut hi = j1;
979    while lo < hi {
980        let mid = (lo + hi) / 2;
981        let base = oidl_off + mid * 20;
982        let cmp = data[base..base + 20].cmp(oid.as_bytes());
983        if cmp == std::cmp::Ordering::Less {
984            lo = mid + 1;
985        } else {
986            hi = mid;
987        }
988    }
989    if lo >= num_objects {
990        return Err(Error::CorruptObject(format!(
991            "object {} not in multi-pack-index",
992            oid.to_hex()
993        )));
994    }
995    let base = oidl_off + lo * 20;
996    if data[base..base + 20] != *oid.as_bytes() {
997        return Err(Error::CorruptObject(format!(
998            "object {} not in multi-pack-index",
999            oid.to_hex()
1000        )));
1001    }
1002    let ob = ooff_off + lo * 8;
1003    let pack_id = read_be_u32(&data, ob)?;
1004    let off32 = read_be_u32(&data, ob + 4)?;
1005    Ok((pack_id, u64::from(off32)))
1006}
1007
1008/// Returns whether `oid` appears in the active MIDX OID table for `objects_dir`.
1009///
1010/// [`None`] means there is no MIDX at the pack tip. [`Some`] is the lookup result when a MIDX exists.
1011pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
1012    let pack_dir = objects_dir.join("pack");
1013    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1014        return Ok(None);
1015    };
1016    let data = fs::read(&midx_path).map_err(Error::Io)?;
1017    let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
1018    if hash_bytes != 1 {
1019        eprintln!(
1020            "error: multi-pack-index hash version {} does not match version 1",
1021            hash_bytes
1022        );
1023        return Err(Error::CorruptObject(
1024            "multi-pack-index hash version mismatch".to_owned(),
1025        ));
1026    }
1027    let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1028    if oidf_len != 256 * 4 {
1029        eprintln!("error: multi-pack-index OID fanout is of the wrong size");
1030        return Err(Error::CorruptObject(
1031            "multi-pack-index OID fanout is of the wrong size".to_owned(),
1032        ));
1033    }
1034    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1035    let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1036    let num_objects = ooff_len / 8;
1037    if oidl_len != num_objects * 20 || ooff_len != num_objects * 8 {
1038        if oidl_len != num_objects * 20 {
1039            eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
1040        } else {
1041            eprintln!("error: multi-pack-index object offset chunk is the wrong size");
1042        }
1043        return Err(Error::CorruptObject("midx chunk size mismatch".to_owned()));
1044    }
1045
1046    let first = oid.as_bytes()[0] as usize;
1047    let lo = if first == 0 {
1048        0u32
1049    } else {
1050        read_be_u32(&data, oidf_off + (first - 1) * 4)?
1051    };
1052    let hi = read_be_u32(&data, oidf_off + first * 4)?;
1053    if lo > hi || hi as usize > num_objects {
1054        eprintln!(
1055            "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
1056            first.saturating_sub(1),
1057            lo,
1058            hi,
1059            first
1060        );
1061        return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
1062    }
1063
1064    let mut i = lo as usize;
1065    while i < hi as usize {
1066        let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1067        match o.cmp(oid) {
1068            std::cmp::Ordering::Equal => return Ok(Some(true)),
1069            std::cmp::Ordering::Greater => return Ok(Some(false)),
1070            std::cmp::Ordering::Less => i += 1,
1071        }
1072    }
1073    Ok(Some(false))
1074}
1075
1076/// When `core.multiPackIndex` is enabled, try to read `oid` from the active MIDX in `objects_dir`.
1077///
1078/// Returns [`None`] when no MIDX exists or `oid` is not listed. Returns [`Some(Err(..))`] when the
1079/// MIDX is present but malformed (callers surface Git-style `error:` / `fatal:` messages).
1080pub fn try_read_object_via_midx(
1081    objects_dir: &Path,
1082    oid: &ObjectId,
1083) -> Result<Option<crate::objects::Object>> {
1084    let pack_dir = objects_dir.join("pack");
1085    let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1086        return Ok(None);
1087    };
1088    let data = fs::read(&midx_path).map_err(Error::Io)?;
1089    let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
1090    let num_packs_hdr = read_be_u32(&data, 8)?;
1091    if hash_bytes != 1 {
1092        eprintln!(
1093            "error: multi-pack-index hash version {} does not match version 1",
1094            hash_bytes
1095        );
1096        return Err(Error::CorruptObject(
1097            "multi-pack-index hash version mismatch".to_owned(),
1098        ));
1099    }
1100    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1101    let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1102    if pack_names.len() != num_packs_hdr as usize {
1103        return Err(Error::CorruptObject(
1104            "multi-pack-index pack-name chunk is too short".to_owned(),
1105        ));
1106    }
1107    let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1108    if oidf_len != 256 * 4 {
1109        eprintln!("error: multi-pack-index OID fanout is of the wrong size");
1110        return Err(Error::CorruptObject(
1111            "multi-pack-index OID fanout is of the wrong size".to_owned(),
1112        ));
1113    }
1114    let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1115    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1116    let num_objects = ooff_len / 8;
1117    if oidl_len != num_objects * 20 {
1118        eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
1119        return Err(Error::CorruptObject(
1120            "multi-pack-index OID lookup chunk is the wrong size".to_owned(),
1121        ));
1122    }
1123    if ooff_len != num_objects * 8 {
1124        eprintln!("error: multi-pack-index object offset chunk is the wrong size");
1125        return Err(Error::CorruptObject(
1126            "multi-pack-index object offset chunk is the wrong size".to_owned(),
1127        ));
1128    }
1129    let loff = find_chunk(&data, hdr_end, MIDX_CHUNKID_LARGEOFFSETS).ok();
1130    let ridx = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX).ok();
1131
1132    if let Some((_, rlen)) = ridx {
1133        if rlen != num_objects * 4 {
1134            eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
1135            eprintln!("warning: multi-pack bitmap is missing required reverse index");
1136        }
1137    }
1138
1139    let first = oid.as_bytes()[0] as usize;
1140    let lo = if first == 0 {
1141        0u32
1142    } else {
1143        read_be_u32(&data, oidf_off + (first - 1) * 4)?
1144    };
1145    let hi = read_be_u32(&data, oidf_off + first * 4)?;
1146    if lo > hi || hi as usize > num_objects {
1147        eprintln!(
1148            "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
1149            first.saturating_sub(1),
1150            lo,
1151            hi,
1152            first
1153        );
1154        return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
1155    }
1156
1157    let mut pos = None;
1158    let mut i = lo as usize;
1159    while i < hi as usize {
1160        let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1161        let c = o.cmp(oid);
1162        if c == std::cmp::Ordering::Equal {
1163            pos = Some(i);
1164            break;
1165        }
1166        if c == std::cmp::Ordering::Greater {
1167            break;
1168        }
1169        i += 1;
1170    }
1171    let Some(pos) = pos else {
1172        return Ok(None);
1173    };
1174
1175    let obase = ooff_off + pos * 8;
1176    let pack_id = read_be_u32(&data, obase)?;
1177    let raw_off = read_be_u32(&data, obase + 4)?;
1178    let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
1179        let Some((loff_off, loff_len)) = loff else {
1180            return Err(Error::CorruptObject(
1181                "multi-pack-index large offset missing LOFF chunk".to_owned(),
1182            ));
1183        };
1184        let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1185        let need = (idx + 1) * 8;
1186        if loff_len < need {
1187            return Err(Error::CorruptObject(
1188                "multi-pack-index large offset out of bounds".to_owned(),
1189            ));
1190        }
1191        read_be_u64(&data, loff_off + idx * 8)?
1192    } else {
1193        u64::from(raw_off)
1194    };
1195
1196    let idx_name = pack_names
1197        .get(pack_id as usize)
1198        .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
1199    let idx_path = pack_dir.join(idx_name);
1200    // A multi-pack-index can outlive packs it names (e.g. a `repack -d` deleted a
1201    // pack but did not rewrite the MIDX). Git tolerates such stale entries by
1202    // skipping the missing pack; mirror that by falling through to other object
1203    // sources instead of surfacing the open error.
1204    if !idx_path.exists() {
1205        return Ok(None);
1206    }
1207    let idx = crate::pack::read_pack_index(&idx_path)?;
1208    crate::pack::read_object_from_pack(&idx, oid).map(Some)
1209}
1210
1211pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
1212    let pack_dir = objects_dir.join("pack");
1213    let path = resolve_tip_midx_path(&pack_dir)
1214        .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1215    let data = fs::read(&path).map_err(Error::Io)?;
1216    let (_, hdr_end, _) = parse_midx_header(&data)?;
1217    let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1218    let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1219    let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1220    let (ridx_off, ridx_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX)?;
1221
1222    if ridx_len < 4 || ooff_len < 8 {
1223        return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
1224    }
1225    let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
1226    let entry_base = ooff_off + first_oid_idx * 8;
1227    if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
1228        return Err(Error::CorruptObject(
1229            "bad MIDX object-offsets index".to_owned(),
1230        ));
1231    }
1232    let pack_id = read_be_u32(&data, entry_base)?;
1233    let idx = usize::try_from(pack_id)
1234        .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
1235    names
1236        .get(idx)
1237        .cloned()
1238        .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
1239}
1240
1241/// Build `objects/pack/multi-pack-index` for all pack indexes in `pack_dir`.
1242///
1243/// Returns an error if there are no `.idx` files, if an object offset does not
1244/// fit in 31 bits (no `LOFF` chunk yet), or if I/O fails.
1245/// Remove every multi-pack-index file under `pack_dir` (root file, sidecars, and
1246/// `multi-pack-index.d/`). Used by full `repack -a` so stale incremental chains do not survive.
1247pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
1248    let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1249    scrub_root_midx_sidecars_except(pack_dir, None)?;
1250    let midx_d = midx_d_dir(pack_dir);
1251    if midx_d.exists() {
1252        let _ = fs::remove_dir_all(&midx_d);
1253    }
1254    Ok(())
1255}
1256
1257pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
1258    write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
1259}
1260
1261/// Write `multi-pack-index` with optional preferred pack, placeholders, and incremental chain.
1262pub fn write_multi_pack_index_with_options(
1263    pack_dir: &Path,
1264    opts: &WriteMultiPackIndexOptions,
1265) -> Result<()> {
1266    // Git's MIDX covers every pack index in the directory regardless of its
1267    // basename (the `.git/objects/pack/test-*.idx` packs created by t7900's
1268    // incremental-repack test, for instance), so include any `*.idx` whose
1269    // companion `.pack` exists.
1270    let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
1271        .map_err(Error::Io)?
1272        .filter_map(|e| e.ok())
1273        .filter_map(|e| {
1274            let name = e.file_name().to_string_lossy().to_string();
1275            let stem = name.strip_suffix(".idx")?;
1276            if pack_dir.join(format!("{stem}.pack")).exists() {
1277                Some(name)
1278            } else {
1279                None
1280            }
1281        })
1282        .collect();
1283    idx_names.sort();
1284
1285    if idx_names.is_empty() {
1286        return Err(Error::CorruptObject(
1287            "no pack-*.idx files found in pack directory".to_owned(),
1288        ));
1289    }
1290
1291    let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
1292        let mut out = Vec::new();
1293        for line in sub {
1294            let want = normalize_pack_idx_basename(line)?;
1295            let found = idx_names
1296                .iter()
1297                .find(|n| **n == want)
1298                .cloned()
1299                .ok_or_else(|| {
1300                    Error::CorruptObject(format!("pack index not in repository: {want}"))
1301                })?;
1302            if !out.contains(&found) {
1303                out.push(found);
1304            }
1305        }
1306        if out.is_empty() {
1307            return Err(Error::CorruptObject(
1308                "stdin-packs list produced empty pack set".to_owned(),
1309            ));
1310        }
1311        out
1312    } else {
1313        idx_names
1314    };
1315
1316    let (base_oids, base_pack_names) = if opts.incremental {
1317        collect_incremental_base(pack_dir)?
1318    } else {
1319        (HashSet::new(), HashSet::new())
1320    };
1321
1322    let layer_idx_names: Vec<String> = if opts.incremental {
1323        idx_names
1324            .iter()
1325            .filter(|n| {
1326                !base_pack_names
1327                    .iter()
1328                    .any(|bp| pack_names_match_layer(bp, n))
1329            })
1330            .cloned()
1331            .collect()
1332    } else {
1333        idx_names.clone()
1334    };
1335
1336    if opts.incremental && layer_idx_names.is_empty() {
1337        return Ok(());
1338    }
1339
1340    let work_names = if opts.incremental {
1341        &layer_idx_names[..]
1342    } else {
1343        &idx_names[..]
1344    };
1345
1346    let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
1347    if preferred_idx.is_none() {
1348        if let Some(raw) = opts.preferred_pack_name.as_deref() {
1349            let pos = work_names
1350                .iter()
1351                .position(|n| cmp_idx_or_pack_name(raw, n).is_eq())
1352                .ok_or_else(|| {
1353                    Error::CorruptObject(format!(
1354                        "preferred pack '{raw}' not found in multi-pack-index input"
1355                    ))
1356                })?;
1357            preferred_idx = Some(pos);
1358        }
1359    }
1360    if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
1361        preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
1362    }
1363    if let Some(p) = preferred_idx {
1364        if p >= work_names.len() {
1365            return Err(Error::CorruptObject(
1366                "preferred pack index out of range".to_owned(),
1367            ));
1368        }
1369    }
1370
1371    let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
1372    for name in work_names {
1373        let path = pack_dir.join(name);
1374        indexes.push(read_pack_index(&path)?);
1375    }
1376
1377    let pack_mtimes_layer: Vec<std::time::SystemTime> =
1378        indexes.iter().map(pack_mtime_for_midx).collect();
1379    let preferred_u32 = preferred_idx.map(|p| p as u32);
1380
1381    let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
1382    for (pack_id, idx) in indexes.iter().enumerate() {
1383        let pack_id = u32::try_from(pack_id).map_err(|_| {
1384            Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
1385        })?;
1386        let mtime = pack_mtimes_layer[pack_id as usize];
1387        for e in &idx.entries {
1388            if e.oid.len() != 20 {
1389                continue;
1390            }
1391            let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
1392                continue;
1393            };
1394            if opts.incremental && base_oids.contains(&oid) {
1395                continue;
1396            }
1397            let cand = MidxEntry {
1398                oid,
1399                pack_id,
1400                offset: e.offset,
1401                pack_mtime: mtime,
1402            };
1403            match best.get(&oid) {
1404                None => {
1405                    best.insert(oid, cand);
1406                }
1407                Some(cur) => {
1408                    if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
1409                        best.insert(oid, cand);
1410                    }
1411                }
1412            }
1413        }
1414    }
1415
1416    let bitmap_placeholders =
1417        opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
1418
1419    let omit_embedded_ridx = opts.write_rev_placeholder;
1420    let (out, rev_sidecar_order) = build_midx_bytes(
1421        work_names,
1422        &indexes,
1423        preferred_idx,
1424        bitmap_placeholders,
1425        omit_embedded_ridx,
1426    )?;
1427
1428    let hash = &out[out.len() - 20..];
1429    let hash_hex = hex::encode(hash);
1430    let hash_arr: [u8; 20] = hash
1431        .try_into()
1432        .map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
1433
1434    if opts.incremental {
1435        let root_midx = pack_dir.join("multi-pack-index");
1436        let chain_path = chain_file_path(pack_dir);
1437        let chain_existed = chain_path.exists();
1438
1439        let mut chain = if root_midx.exists() && !chain_existed {
1440            let root_hex = midx_checksum_hex_from_path(&root_midx)?;
1441            link_root_midx_into_chain(pack_dir, &root_hex)?;
1442            vec![root_hex]
1443        } else {
1444            read_chain_layer_hashes(pack_dir).unwrap_or_default()
1445        };
1446
1447        chain.push(hash_hex.clone());
1448
1449        let midx_d = midx_d_dir(pack_dir);
1450        fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1451
1452        let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
1453        fs::write(&layer_path, &out).map_err(Error::Io)?;
1454
1455        let mut chain_data = String::new();
1456        for h in &chain {
1457            chain_data.push_str(h);
1458            chain_data.push('\n');
1459        }
1460        fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
1461
1462        clear_stale_split_layers(pack_dir, &chain)?;
1463
1464        let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1465        scrub_root_midx_sidecars(pack_dir)?;
1466        if bitmap_placeholders {
1467            let full = hex::encode(hash);
1468            fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
1469                .map_err(Error::Io)?;
1470            if opts.write_rev_placeholder {
1471                let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
1472                if let Some(order) = rev_sidecar_order.as_ref() {
1473                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1474                } else {
1475                    fs::write(rev_path, []).map_err(Error::Io)?;
1476                }
1477            }
1478        }
1479    } else {
1480        let midx_d = midx_d_dir(pack_dir);
1481        if midx_d.exists() {
1482            for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
1483                let ent = ent.map_err(Error::Io)?;
1484                let _ = if ent.file_type().map_err(Error::Io)?.is_dir() {
1485                    fs::remove_dir_all(ent.path())
1486                } else {
1487                    fs::remove_file(ent.path())
1488                };
1489            }
1490        }
1491        fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1492
1493        let dest = pack_dir.join("multi-pack-index");
1494        fs::write(&dest, &out).map_err(Error::Io)?;
1495
1496        scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
1497
1498        if opts.write_bitmap_placeholders {
1499            fs::write(
1500                pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
1501                [],
1502            )
1503            .map_err(Error::Io)?;
1504            if opts.write_rev_placeholder {
1505                let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
1506                if let Some(order) = rev_sidecar_order.as_ref() {
1507                    write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1508                } else {
1509                    fs::write(rev_path, []).map_err(Error::Io)?;
1510                }
1511            }
1512        }
1513    }
1514
1515    Ok(())
1516}
1517
1518fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
1519    if base_name == disk_idx {
1520        return true;
1521    }
1522    cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
1523}
1524
1525fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
1526    scrub_root_midx_sidecars_except(pack_dir, None)
1527}
1528
1529fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
1530    let Ok(rd) = fs::read_dir(pack_dir) else {
1531        return Ok(());
1532    };
1533    for ent in rd {
1534        let ent = ent.map_err(Error::Io)?;
1535        let name = ent.file_name().to_string_lossy().to_string();
1536        let Some(rest) = name.strip_prefix("multi-pack-index-") else {
1537            continue;
1538        };
1539        if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
1540            continue;
1541        }
1542        let hash_part = rest
1543            .strip_suffix(".bitmap")
1544            .or_else(|| rest.strip_suffix(".rev"))
1545            .unwrap_or(rest);
1546        if hash_part.len() != 40 {
1547            continue;
1548        }
1549        if keep_hex.is_some_and(|k| k == hash_part) {
1550            continue;
1551        }
1552        let _ = fs::remove_file(ent.path());
1553    }
1554    Ok(())
1555}