1use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18use sha2::{Digest as Sha256Digest, Sha256};
19
20use crate::error::{Error, Result};
21use crate::objects::ObjectId;
22use crate::pack::{read_pack_index_no_verify, PackIndex};
23
24const MIDX_SIGNATURE: u32 = 0x4d49_4458;
25const MIDX_VERSION_V1: u8 = 1;
26const MIDX_VERSION_V2: u8 = 2;
27const HASH_VERSION_SHA1: u8 = 1;
28const HASH_VERSION_SHA256: u8 = 2;
29const MIDX_HEADER_SIZE: usize = 12;
30const CHUNK_TOC_ENTRY_SIZE: usize = 12;
31const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
32const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
33const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
34const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
35const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
36const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
37const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
38
39const RIDX_SIGNATURE: u32 = 0x5249_4458;
41const RIDX_VERSION: u32 = 1;
42const RIDX_HEADER_SIZE: usize = 12;
43const MIDX_CHUNK_ALIGNMENT: usize = 4;
44
45const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
47
48struct MidxEntry {
49 oid: ObjectId,
50 pack_id: u32,
51 offset: u64,
52 pack_mtime: std::time::SystemTime,
53}
54
55#[derive(Debug, Clone, Default)]
57pub struct WriteMultiPackIndexOptions {
58 pub preferred_pack_idx: Option<u32>,
61 pub preferred_pack_name: Option<String>,
64 pub pack_names_subset_ordered: Option<Vec<String>>,
66 pub write_bitmap_placeholders: bool,
68 pub incremental: bool,
71 pub write_rev_placeholder: bool,
74 pub version: Option<u8>,
77}
78
79fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
80 let t = raw.trim();
81 let t = std::path::Path::new(t)
82 .file_name()
83 .and_then(|s| s.to_str())
84 .unwrap_or(t);
85 let t = t.strip_prefix("./").unwrap_or(t);
86 if t.ends_with(".idx") {
87 Ok(t.to_string())
88 } else if t.ends_with(".pack") {
89 Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
90 } else {
91 Ok(format!("{t}.idx"))
92 }
93}
94
95fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
101 let end = off.checked_add(4).filter(|&e| e <= data.len());
102 let Some(end) = end else {
103 return Err(Error::CorruptObject(
104 "truncated MIDX data reading u32".to_owned(),
105 ));
106 };
107 let bytes: [u8; 4] = data[off..end]
108 .try_into()
109 .map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
110 Ok(u32::from_be_bytes(bytes))
111}
112
113fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
119 let end = off.checked_add(8).filter(|&e| e <= data.len());
120 let Some(end) = end else {
121 return Err(Error::CorruptObject(
122 "truncated MIDX data reading u64".to_owned(),
123 ));
124 };
125 let bytes: [u8; 8] = data[off..end]
126 .try_into()
127 .map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
128 Ok(u64::from_be_bytes(bytes))
129}
130
131struct MidxFileHeader {
132 num_chunks: u8,
133}
134
135fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
136 if data.len() < MIDX_HEADER_SIZE + 20 {
137 return Err(Error::CorruptObject("midx file too small".to_owned()));
138 }
139 let sig = read_be_u32(data, 0)?;
140 if sig != MIDX_SIGNATURE {
141 return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
142 }
143 let version = data[4];
144 if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
145 return Err(Error::CorruptObject(format!(
146 "multi-pack-index version {version} not recognized"
147 )));
148 }
149 let object_hash_bytes = data[5];
150 let num_chunks = data[6];
151 let _num_packs = read_be_u32(data, 8)?;
152 Ok((
153 MidxFileHeader { num_chunks },
154 MIDX_HEADER_SIZE,
155 object_hash_bytes,
156 ))
157}
158
159fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
160 let mut names = Vec::new();
161 let mut start = 0usize;
162 for (i, &b) in pn.iter().enumerate() {
163 if b == 0 && i >= start {
164 if i > start {
165 let s = std::str::from_utf8(&pn[start..i])
166 .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
167 names.push(s.to_string());
168 }
169 start = i + 1;
170 }
171 }
172 Ok(names)
173}
174
175fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
177 let a = idx_or_pack_name.as_bytes();
178 let b = idx_name.as_bytes();
179 let mut i = 0usize;
180 let min = a.len().min(b.len());
181 while i < min && a[i] == b[i] {
182 i += 1;
183 }
184 let suf_a = &a[i..];
185 let suf_b = &b[i..];
186 if suf_b == b"idx" && suf_a == b"pack" {
187 return std::cmp::Ordering::Equal;
188 }
189 suf_a.cmp(suf_b)
190}
191
192fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
193 let mut best: Option<(usize, std::time::SystemTime)> = None;
194 for (i, n) in names.iter().enumerate() {
195 let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
196 let mtime = meta.modified().map_err(Error::Io)?;
197 match best {
198 None => best = Some((i, mtime)),
199 Some((_, t)) if mtime < t => best = Some((i, mtime)),
200 _ => {}
201 }
202 }
203 Ok(best.map(|(i, _)| i))
204}
205
206fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
207 pack_dir.join("multi-pack-index.d")
208}
209
210fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
211 midx_d_dir(pack_dir).join("multi-pack-index-chain")
212}
213
214fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
215 let path = chain_file_path(pack_dir);
216 let f = fs::File::open(&path).map_err(Error::Io)?;
217 let mut out = Vec::new();
218 for line in BufReader::new(f).lines() {
219 let line = line.map_err(Error::Io)?;
220 let t = line.trim();
221 if t.is_empty() {
222 continue;
223 }
224 if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
225 return Err(Error::CorruptObject(format!(
226 "invalid multi-pack-index chain line: {t}"
227 )));
228 }
229 out.push(t.to_ascii_lowercase());
230 }
231 Ok(out)
232}
233
234fn repo_midx_hash_version(pack_dir: &Path) -> u8 {
242 let Some(objects_dir) = pack_dir.parent() else {
244 return HASH_VERSION_SHA1;
245 };
246 repo_midx_hash_version_for_objects_dir(objects_dir)
247}
248
249mod midx_cache {
262 use crate::error::{Error, Result};
263 use std::collections::HashMap;
264 use std::fs;
265 use std::path::{Path, PathBuf};
266 use std::sync::{Arc, Mutex, OnceLock};
267 use std::time::SystemTime;
268
269 type Stamp = (SystemTime, u64);
270
271 #[derive(Default)]
272 struct State {
273 bytes: HashMap<PathBuf, (Stamp, Arc<Vec<u8>>)>,
274 hash_version: HashMap<PathBuf, (Option<Stamp>, u8)>,
275 }
276
277 static CACHE: OnceLock<Mutex<State>> = OnceLock::new();
278
279 fn lock() -> std::sync::MutexGuard<'static, State> {
280 CACHE
281 .get_or_init(|| Mutex::new(State::default()))
282 .lock()
283 .unwrap_or_else(std::sync::PoisonError::into_inner)
284 }
285
286 fn stamp(path: &Path) -> Option<Stamp> {
287 let m = fs::metadata(path).ok()?;
288 Some((m.modified().unwrap_or(SystemTime::UNIX_EPOCH), m.len()))
289 }
290
291 pub fn get_bytes(path: &Path) -> Result<Arc<Vec<u8>>> {
293 let sig = stamp(path);
294 if let Some(sig) = sig {
295 let g = lock();
296 if let Some((s, b)) = g.bytes.get(path) {
297 if *s == sig {
298 return Ok(Arc::clone(b));
299 }
300 }
301 }
302 let data = Arc::new(fs::read(path).map_err(Error::Io)?);
303 if let Some(sig) = sig {
304 lock()
305 .bytes
306 .insert(path.to_path_buf(), (sig, Arc::clone(&data)));
307 }
308 Ok(data)
309 }
310
311 pub fn hash_version(config_path: &Path, compute: impl FnOnce() -> u8) -> u8 {
315 let sig = stamp(config_path);
316 {
317 let g = lock();
318 if let Some((s, v)) = g.hash_version.get(config_path) {
319 if *s == sig {
320 return *v;
321 }
322 }
323 }
324 let v = compute();
325 lock()
326 .hash_version
327 .insert(config_path.to_path_buf(), (sig, v));
328 v
329 }
330
331 pub fn evict_pack_dir(pack_dir: &Path) {
333 lock().bytes.retain(|p, _| !p.starts_with(pack_dir));
334 }
335}
336
337fn repo_midx_hash_version_for_objects_dir(objects_dir: &Path) -> u8 {
341 let Some(gitdir) = objects_dir.parent() else {
342 return HASH_VERSION_SHA1;
343 };
344 let config_path = gitdir.join("config");
345 midx_cache::hash_version(&config_path, || {
346 sniff_objectformat_hash_version(&config_path)
347 })
348}
349
350fn sniff_objectformat_hash_version(config_path: &Path) -> u8 {
352 let Ok(text) = fs::read_to_string(config_path) else {
353 return HASH_VERSION_SHA1;
354 };
355 let mut in_extensions = false;
359 for raw in text.lines() {
360 let line = raw.trim();
361 if line.starts_with('[') {
362 let section = line.trim_start_matches('[').trim_end_matches(']');
363 let name = section.split_whitespace().next().unwrap_or("");
364 in_extensions = name.eq_ignore_ascii_case("extensions");
365 continue;
366 }
367 if !in_extensions {
368 continue;
369 }
370 if let Some((key, value)) = line.split_once('=') {
371 if key.trim().eq_ignore_ascii_case("objectformat")
372 && value.trim().eq_ignore_ascii_case("sha256")
373 {
374 return HASH_VERSION_SHA256;
375 }
376 }
377 }
378 HASH_VERSION_SHA1
379}
380
381pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
382 let root = pack_dir.join("multi-pack-index");
383 if root.exists() {
384 return Some(root);
385 }
386 let hashes = read_chain_layer_hashes(pack_dir).ok()?;
387 let last = hashes.last()?;
388 Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
389}
390
391pub fn resolve_midx_layer_path(pack_dir: &Path, checksum: &str) -> Option<std::path::PathBuf> {
395 let checksum = checksum.to_ascii_lowercase();
396 if let Ok(hashes) = read_chain_layer_hashes(pack_dir) {
397 if hashes.contains(&checksum) {
398 return Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{checksum}.midx")));
399 }
400 }
401 let root = pack_dir.join("multi-pack-index");
402 if root.exists() {
403 if let Ok(hex) = midx_checksum_hex_from_path(&root) {
404 if hex == checksum {
405 return Some(root);
406 }
407 }
408 }
409 None
410}
411
412fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
413 let data = fs::read(path).map_err(Error::Io)?;
414 let _ = parse_midx_header(&data)?;
415 Ok(data)
416}
417
418fn midx_hash_len(data: &[u8]) -> usize {
421 if data.len() > 5 && data[5] == 2 {
422 32
423 } else {
424 20
425 }
426}
427
428fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
429 let hash_len = midx_hash_len(data);
430 let (_, hdr_end, _) = parse_midx_header(data)?;
431 let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
432 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
433 let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
434 let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
435 let num_objects = ooff_len / 8;
436 if oidl_len != num_objects * hash_len {
437 return Err(Error::CorruptObject(
438 "MIDX oid-lookup size mismatch".to_owned(),
439 ));
440 }
441 let mut oids = HashSet::with_capacity(num_objects);
442 for i in 0..num_objects {
443 let start = oidl_off + i * hash_len;
444 let oid = ObjectId::from_bytes(&data[start..start + hash_len])?;
445 oids.insert(oid);
446 }
447 Ok((oids, pack_names))
448}
449
450fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
451 let mut oids = HashSet::new();
452 let mut packs = HashSet::new();
453 let root = pack_dir.join("multi-pack-index");
454 let chain_path = chain_file_path(pack_dir);
455 if chain_path.exists() {
456 for h in read_chain_layer_hashes(pack_dir)? {
457 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
458 let data = load_midx_file(&p)?;
459 let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
460 oids.extend(layer_oids);
461 for n in names {
462 packs.insert(n);
463 }
464 }
465 return Ok((oids, packs));
466 }
467 if root.exists() {
468 let data = load_midx_file(&root)?;
469 let (o, names) = oids_and_packs_from_midx_data(&data)?;
470 oids = o;
471 for n in names {
472 packs.insert(n);
473 }
474 }
475 Ok((oids, packs))
476}
477
478fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
479 let data = fs::read(path).map_err(Error::Io)?;
480 if data.len() < 20 {
481 return Err(Error::CorruptObject(
482 "midx too small for checksum".to_owned(),
483 ));
484 }
485 let hash = &data[data.len() - 20..];
486 Ok(hex::encode(hash))
487}
488
489fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
490 let _ = fs::remove_file(dst);
491 if fs::hard_link(src, dst).is_ok() {
492 return Ok(());
493 }
494 fs::copy(src, dst).map_err(Error::Io)?;
495 Ok(())
496}
497
498fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
499 let midx_d = midx_d_dir(pack_dir);
500 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
501 let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
502 hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
503 let exts = ["bitmap", "rev"];
504 for ext in exts {
505 let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
506 if src.exists() {
507 let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
508 hard_link_or_copy(&src, &dst)?;
509 }
510 }
511 Ok(())
512}
513
514fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
515 let midx_d = midx_d_dir(pack_dir);
516 if !midx_d.exists() {
517 return Ok(());
518 }
519 let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
520 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
521 let ent = ent.map_err(Error::Io)?;
522 let name = ent.file_name().to_string_lossy().to_string();
523 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
524 continue;
525 };
526 let Some((hash_part, _ext)) = rest.split_once('.') else {
527 continue;
528 };
529 if hash_part.len() == 40 && !keep.contains(hash_part) {
530 let _ = fs::remove_file(ent.path());
531 }
532 }
533 Ok(())
534}
535
536fn clear_incremental_midx_files(pack_dir: &Path) -> Result<()> {
546 let midx_d = midx_d_dir(pack_dir);
547 let _ = fs::remove_file(chain_file_path(pack_dir));
549 if !midx_d.exists() {
550 return Ok(());
551 }
552 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
553 let ent = ent.map_err(Error::Io)?;
554 let name = ent.file_name().to_string_lossy().to_string();
555 if name.starts_with("multi-pack-index-")
556 && (name.ends_with(".midx") || name.ends_with(".bitmap") || name.ends_with(".rev"))
557 {
558 let _ = fs::remove_file(ent.path());
559 }
560 }
561 Ok(())
562}
563
564fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
565 fs::metadata(&idx.pack_path)
566 .and_then(|m| m.modified())
567 .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
568}
569
570fn midx_pick_better_entry(
571 cur: &MidxEntry,
572 cand_pack: u32,
573 cand_offset: u64,
574 cand_mtime: std::time::SystemTime,
575 preferred_pack: Option<u32>,
576) -> bool {
577 let cur_pref = preferred_pack == Some(cur.pack_id);
578 let new_pref = preferred_pack == Some(cand_pack);
579 if new_pref && !cur_pref {
580 return true;
581 }
582 if cur_pref && !new_pref {
583 return false;
584 }
585 match cand_mtime.cmp(&cur.pack_mtime) {
586 std::cmp::Ordering::Greater => true,
587 std::cmp::Ordering::Less => false,
588 std::cmp::Ordering::Equal => {
589 if cand_pack != cur.pack_id {
590 cand_pack < cur.pack_id
591 } else {
592 cand_offset < cur.offset
593 }
594 }
595 }
596}
597
598#[allow(clippy::too_many_arguments)]
603fn build_midx_bytes_filtered(
604 idx_names: &[String],
605 indexes: &[PackIndex],
606 preferred_idx: Option<usize>,
607 write_bitmap_placeholders: bool,
608 omit_embedded_ridx_chunk: bool,
609 version: u8,
610 hash_version: u8,
611 exclude_oids: Option<&HashSet<ObjectId>>,
612) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
613 let hash_len = if hash_version == 2 { 32 } else { 20 };
615 let preferred_pack_idx = preferred_idx.map(|p| p as u32);
616 let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
617
618 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
619 for (pack_id, idx) in indexes.iter().enumerate() {
620 let pack_id = u32::try_from(pack_id).map_err(|_| {
621 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
622 })?;
623 let mtime = pack_mtimes[pack_id as usize];
624 for e in &idx.entries {
625 if e.oid.len() != hash_len {
626 continue;
627 }
628 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
629 continue;
630 };
631 if let Some(ex) = exclude_oids {
632 if ex.contains(&oid) {
633 continue;
634 }
635 }
636 let cand = MidxEntry {
637 oid,
638 pack_id,
639 offset: e.offset,
640 pack_mtime: mtime,
641 };
642 match best.get(&oid) {
643 None => {
644 best.insert(oid, cand);
645 }
646 Some(cur) => {
647 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
648 best.insert(oid, cand);
649 }
650 }
651 }
652 }
653 }
654
655 let mut entries: Vec<MidxEntry> = best.into_values().collect();
656 entries.sort_by_key(|a| a.oid);
657
658 let large_offsets_needed = entries.iter().any(|e| e.offset > u64::from(u32::MAX));
666
667 let num_packs = indexes.len() as u32;
668
669 let mut pack_names_blob = Vec::new();
670 for name in idx_names {
671 pack_names_blob.extend_from_slice(name.as_bytes());
672 pack_names_blob.push(0);
673 }
674 let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
675 % MIDX_CHUNK_ALIGNMENT;
676 pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
677 let chunk_pnam = pack_names_blob;
678
679 let mut chunk_oidf = vec![0u8; 256 * 4];
680 let mut j = 0usize;
681 for i in 0..256 {
682 while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
683 j += 1;
684 }
685 chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
686 }
687
688 let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
689 for e in &entries {
690 chunk_oidl.extend_from_slice(e.oid.as_bytes());
691 }
692
693 let mut large_offsets: Vec<u64> = Vec::new();
694 let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
695 for e in &entries {
696 chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
697 let encoded = if large_offsets_needed && e.offset >> 31 != 0 {
698 let slot = u32::try_from(large_offsets.len()).map_err(|_| {
699 Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
700 })?;
701 large_offsets.push(e.offset);
702 MIDX_LARGE_OFFSET_NEEDED | slot
703 } else {
704 e.offset as u32
708 };
709 chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
710 }
711
712 let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
713 Vec::new()
714 } else {
715 let mut v = Vec::with_capacity(large_offsets.len() * 8);
716 for off in &large_offsets {
717 v.extend_from_slice(&off.to_be_bytes());
718 }
719 v
720 };
721
722 let pref = preferred_pack_idx;
723 let mut order: Vec<u32> = (0..entries.len() as u32).collect();
724 order.sort_by(|&ai, &bi| {
725 let a = &entries[ai as usize];
726 let b = &entries[bi as usize];
727 let a_pref = pref == Some(a.pack_id);
728 let b_pref = pref == Some(b.pack_id);
729 b_pref
730 .cmp(&a_pref)
731 .then_with(|| a.pack_id.cmp(&b.pack_id))
732 .then_with(|| a.offset.cmp(&b.offset))
733 .then_with(|| ai.cmp(&bi))
734 });
735
736 let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
737 for oid_idx in &order {
738 chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
739 }
740
741 let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
744 Some(order.clone())
745 } else {
746 None
747 };
748 let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
749 let num_packs_usize = indexes.len();
754 let mut bitmap_pos = vec![u32::MAX; num_packs_usize];
755 let mut bitmap_nr = vec![0u32; num_packs_usize];
756 for (rank, &oid_idx) in order.iter().enumerate() {
757 let pack = entries[oid_idx as usize].pack_id as usize;
758 if let Some(p) = bitmap_pos.get_mut(pack) {
759 if *p == u32::MAX {
760 *p = rank as u32;
761 }
762 }
763 if let Some(n) = bitmap_nr.get_mut(pack) {
764 *n += 1;
765 }
766 }
767 let mut v = Vec::new();
768 for pack in 0..num_packs_usize {
769 let pos = if bitmap_pos[pack] == u32::MAX {
770 0
771 } else {
772 bitmap_pos[pack]
773 };
774 v.extend_from_slice(&pos.to_be_bytes());
775 v.extend_from_slice(&bitmap_nr[pack].to_be_bytes());
776 }
777 let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
778 v.extend(std::iter::repeat_n(0u8, pad));
779 v
780 } else {
781 Vec::new()
782 };
783
784 let mut chunks: Vec<(u32, Vec<u8>)> = vec![
785 (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
786 (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
787 (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
788 (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
789 ];
790 if !chunk_loff.is_empty() {
791 chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
792 }
793 if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
794 chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
795 }
796 if write_bitmap_placeholders {
797 chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
798 }
799
800 let num_chunks: u8 = chunks
801 .len()
802 .try_into()
803 .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
804
805 let mut body = Vec::new();
806 let mut cur_offset =
807 MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
808
809 for (id, data) in &chunks {
810 body.extend_from_slice(&id.to_be_bytes());
811 body.extend_from_slice(&cur_offset.to_be_bytes());
812 cur_offset += data.len() as u64;
813 }
814 body.extend_from_slice(&0u32.to_be_bytes());
815 body.extend_from_slice(&cur_offset.to_be_bytes());
816
817 for (_, data) in &chunks {
818 body.extend_from_slice(data);
819 }
820
821 let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
822 out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
823 out.push(if version == MIDX_VERSION_V1 {
824 MIDX_VERSION_V1
825 } else {
826 MIDX_VERSION_V2
827 });
828 out.push(hash_version);
829 out.push(num_chunks);
830 out.push(0);
831 out.extend_from_slice(&num_packs.to_be_bytes());
832 out.extend_from_slice(&body);
833
834 if hash_version == 2 {
836 let mut hasher = Sha256::new();
837 Sha256Digest::update(&mut hasher, &out);
838 out.extend_from_slice(&hasher.finalize());
839 } else {
840 let mut hasher = Sha1::new();
841 hasher.update(&out);
842 out.extend_from_slice(&hasher.finalize());
843 }
844
845 Ok((out, rev_sidecar_order))
846}
847
848fn write_midx_rev_sidecar(path: &Path, pack_order: &[u32], midx_file_hash: &[u8]) -> Result<()> {
853 let hash_id: u32 = if midx_file_hash.len() == 32 { 2 } else { 1 };
854 let mut body =
855 Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + midx_file_hash.len());
856 body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
857 body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
858 body.extend_from_slice(&hash_id.to_be_bytes());
859 for idx in pack_order {
860 body.extend_from_slice(&idx.to_be_bytes());
861 }
862 body.extend_from_slice(midx_file_hash);
863 fs::write(path, body).map_err(Error::Io)
864}
865
866fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
867 let (hdr, _, _) = parse_midx_header(data)?;
868 let n = hdr.num_chunks as usize;
869 let pos = header_end;
870 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
871 if data.len() < toc_end + 20 {
872 return Err(Error::CorruptObject(
873 "truncated MIDX chunk table".to_owned(),
874 ));
875 }
876 for i in 0..n {
877 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
878 let id = read_be_u32(data, base)?;
879 let off = read_be_u64(data, base + 4)? as usize;
880 if id == chunk_id {
881 let next_off = if i + 1 < n {
882 let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
883 read_be_u64(data, nb + 4)? as usize
884 } else {
885 let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
886 read_be_u64(data, term + 4)? as usize
887 };
888 return Ok((off, next_off.saturating_sub(off)));
889 }
890 }
891 Err(Error::CorruptObject(format!(
892 "MIDX chunk {chunk_id:08x} not found"
893 )))
894}
895
896#[derive(Debug, Clone)]
900pub struct MidxLoadError(pub String);
901
902impl std::fmt::Display for MidxLoadError {
903 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
904 write!(f, "{}", self.0)
905 }
906}
907
908struct TocEntry {
910 id: u32,
911 offset: usize,
912}
913
914fn parse_midx_toc(
918 data: &[u8],
919 hash_len: usize,
920 errors: &mut Vec<String>,
921) -> std::result::Result<Vec<TocEntry>, MidxLoadError> {
922 if data.len() < MIDX_HEADER_SIZE + hash_len {
923 return Err(MidxLoadError("multi-pack-index file too small".to_owned()));
924 }
925 let num_chunks = data[6] as usize;
926 let toc_off = MIDX_HEADER_SIZE;
927 let needed = toc_off + (num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE;
928 if data.len() < needed {
929 return Err(MidxLoadError(
930 "multi-pack-index chunk table is truncated".to_owned(),
931 ));
932 }
933 let file_size = data.len();
934 let mut chunks: Vec<TocEntry> = Vec::with_capacity(num_chunks);
935
936 let read_be64 = |off: usize| -> u64 {
937 let mut b = [0u8; 8];
938 b.copy_from_slice(&data[off..off + 8]);
939 u64::from_be_bytes(b)
940 };
941 let read_be32 = |off: usize| -> u32 {
942 let mut b = [0u8; 4];
943 b.copy_from_slice(&data[off..off + 4]);
944 u32::from_be_bytes(b)
945 };
946
947 for i in 0..num_chunks {
948 let entry = toc_off + i * CHUNK_TOC_ENTRY_SIZE;
949 let chunk_id = read_be32(entry);
950 let chunk_offset = read_be64(entry + 4);
951
952 if chunk_id == 0 {
953 errors.push("terminating chunk id appears earlier than expected".to_owned());
954 return Err(MidxLoadError(
955 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
956 ));
957 }
958 if !(chunk_offset as usize).is_multiple_of(MIDX_CHUNK_ALIGNMENT) {
959 errors.push(format!(
960 "chunk id {chunk_id:x} not {MIDX_CHUNK_ALIGNMENT}-byte aligned"
961 ));
962 return Err(MidxLoadError(
963 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
964 ));
965 }
966
967 let next_entry = toc_off + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
968 let next_chunk_offset = read_be64(next_entry + 4);
969
970 if next_chunk_offset < chunk_offset
971 || next_chunk_offset > (file_size as u64).saturating_sub(hash_len as u64)
972 {
973 errors.push(format!(
974 "improper chunk offset(s) {chunk_offset:x} and {next_chunk_offset:x}"
975 ));
976 return Err(MidxLoadError(
977 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
978 ));
979 }
980
981 if chunks.iter().any(|c| c.id == chunk_id) {
982 errors.push(format!("duplicate chunk ID {chunk_id:x} found"));
983 return Err(MidxLoadError(
984 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
985 ));
986 }
987
988 chunks.push(TocEntry {
989 id: chunk_id,
990 offset: chunk_offset as usize,
991 });
992 }
993
994 let term_entry = toc_off + num_chunks * CHUNK_TOC_ENTRY_SIZE;
996 let final_id = read_be32(term_entry);
997 if final_id != 0 {
998 errors.push(format!("final chunk has non-zero id {final_id:x}"));
999 return Err(MidxLoadError(
1000 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
1001 ));
1002 }
1003
1004 let term_offset = read_be64(term_entry + 4) as usize;
1007 chunks.push(TocEntry {
1008 id: 0,
1009 offset: term_offset,
1010 });
1011
1012 Ok(chunks)
1013}
1014
1015fn toc_chunk_range(chunks: &[TocEntry], data_len: usize, id: u32) -> Option<(usize, usize)> {
1017 for (i, c) in chunks.iter().enumerate() {
1018 if c.id == id {
1019 let next = if i + 1 < chunks.len() {
1020 chunks[i + 1].offset
1021 } else {
1022 data_len.saturating_sub(20)
1023 };
1024 return Some((c.offset, next.saturating_sub(c.offset)));
1025 }
1026 }
1027 None
1028}
1029
1030pub fn verify_midx(objects_dir: &Path) -> std::result::Result<(), Vec<String>> {
1037 let pack_dir = objects_dir.join("pack");
1038 let path = match resolve_tip_midx_path(&pack_dir) {
1039 Some(p) => p,
1040 None => return Ok(()),
1041 };
1042 let data = match fs::read(&path) {
1043 Ok(d) => d,
1044 Err(_) => return Ok(()),
1045 };
1046
1047 let mut fatal: Vec<String> = Vec::new();
1048 let mut errors: Vec<String> = Vec::new();
1049
1050 if data.len() < MIDX_HEADER_SIZE + 20 {
1052 return Err(vec!["multi-pack-index file is too small".to_owned()]);
1053 }
1054 let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
1055 if sig != MIDX_SIGNATURE {
1056 return Err(vec![format!(
1057 "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
1058 )]);
1059 }
1060 let version = data[4];
1061 if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
1062 return Err(vec![format!(
1063 "multi-pack-index version {version} not recognized"
1064 )]);
1065 }
1066 let hash_version = data[5];
1067 let expected_hash_version = repo_midx_hash_version_for_objects_dir(objects_dir);
1068 if hash_version != expected_hash_version {
1069 return Err(vec![format!(
1070 "multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
1071 )]);
1072 }
1073 let hash_len = if hash_version == 2 { 32usize } else { 20usize };
1074 let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1075
1076 let chunks = match parse_midx_toc(&data, hash_len, &mut errors) {
1078 Ok(c) => c,
1079 Err(e) => {
1080 errors.push(e.0);
1081 return Err(errors);
1082 }
1083 };
1084
1085 let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
1087 else {
1088 errors.push("multi-pack-index required pack-name chunk missing or corrupted".to_owned());
1089 return Err(errors);
1090 };
1091
1092 let Some((fan_off, fan_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
1094 else {
1095 errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
1096 return Err(errors);
1097 };
1098 if fan_len != 256 * 4 {
1099 errors.push("multi-pack-index OID fanout is of the wrong size".to_owned());
1100 errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
1101 return Err(errors);
1102 }
1103 let fanout = |i: usize| -> u32 {
1104 let b = fan_off + i * 4;
1105 u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
1106 };
1107 for i in 0..255 {
1108 let f1 = fanout(i);
1109 let f2 = fanout(i + 1);
1110 if f1 > f2 {
1111 errors.push(format!(
1112 "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
1113 i + 1
1114 ));
1115 errors
1116 .push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
1117 return Err(errors);
1118 }
1119 }
1120 let num_objects = fanout(255) as usize;
1121
1122 let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
1124 else {
1125 errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1126 return Err(errors);
1127 };
1128 if oidl_len != hash_len * num_objects {
1129 errors.push("multi-pack-index OID lookup chunk is the wrong size".to_owned());
1130 errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1131 return Err(errors);
1132 }
1133
1134 let Some((ooff_off, ooff_len)) =
1136 toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1137 else {
1138 errors
1139 .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1140 return Err(errors);
1141 };
1142 if ooff_len != num_objects * 8 {
1143 errors.push("multi-pack-index object offset chunk is the wrong size".to_owned());
1144 errors
1145 .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1146 return Err(errors);
1147 }
1148
1149 let large_off = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1150
1151 let names = match parse_pack_names_blob(&data[pn_off..pn_off + pn_len]) {
1153 Ok(n) => n,
1154 Err(_) => {
1155 errors.push("multi-pack-index pack-name chunk is too short".to_owned());
1156 return Err(errors);
1157 }
1158 };
1159 if version == MIDX_VERSION_V1 {
1160 for i in 1..names.len() {
1161 if names[i] <= names[i - 1] {
1162 fatal.push(format!(
1163 "multi-pack-index pack names out of order: '{}' before '{}'",
1164 names[i - 1],
1165 names[i]
1166 ));
1167 errors.extend(fatal);
1169 return Err(errors);
1170 }
1171 }
1172 }
1173
1174 if !midx_checksum_is_valid(&data) {
1176 errors.push("incorrect checksum".to_owned());
1177 }
1178
1179 let mut pack_indexes: Vec<Option<PackIndex>> = Vec::with_capacity(num_packs);
1181 for i in 0..num_packs {
1182 let loaded = match names.get(i) {
1188 Some(name) => read_pack_index_no_verify(&pack_dir.join(name)).ok(),
1189 None => None,
1190 };
1191 if loaded.is_none() {
1192 errors.push(format!("failed to load pack in position {i}"));
1193 }
1194 pack_indexes.push(loaded);
1195 }
1196
1197 if num_objects == 0 {
1198 errors.push("the midx contains no oid".to_owned());
1199 if errors.is_empty() {
1200 return Ok(());
1201 }
1202 return Err(errors);
1203 }
1204
1205 let oid_at =
1207 |i: usize| -> &[u8] { &data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len] };
1208 for i in 0..num_objects.saturating_sub(1) {
1209 let a = oid_at(i);
1210 let b = oid_at(i + 1);
1211 if a >= b {
1212 errors.push(format!(
1213 "oid lookup out of order: oid[{i}] = {} >= {} = oid[{}]",
1214 hex::encode(a),
1215 hex::encode(b),
1216 i + 1
1217 ));
1218 }
1219 }
1220
1221 for i in 0..num_objects {
1223 let ob = ooff_off + i * 8;
1224 let pack_int_id = u32::from_be_bytes([data[ob], data[ob + 1], data[ob + 2], data[ob + 3]]);
1225 let off_raw = u32::from_be_bytes([data[ob + 4], data[ob + 5], data[ob + 6], data[ob + 7]]);
1226 let oid_hex = hex::encode(oid_at(i));
1227
1228 if pack_int_id as usize >= num_packs {
1229 errors.push(format!(
1230 "bad pack-int-id: {pack_int_id} ({num_packs} total packs)"
1231 ));
1232 errors.push(format!(
1233 "failed to load pack entry for oid[{i}] = {oid_hex}"
1234 ));
1235 continue;
1236 }
1237
1238 let m_offset: u64 = if off_raw & MIDX_LARGE_OFFSET_NEEDED != 0 {
1240 let slot = (off_raw & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1241 match large_off {
1242 Some((lo_off, lo_len)) if (slot + 1) * 8 <= lo_len => {
1243 let b = lo_off + slot * 8;
1244 let mut arr = [0u8; 8];
1245 arr.copy_from_slice(&data[b..b + 8]);
1246 u64::from_be_bytes(arr)
1247 }
1248 _ => {
1249 errors.push("multi-pack-index large offset out of bounds".to_owned());
1250 continue;
1251 }
1252 }
1253 } else {
1254 u64::from(off_raw)
1255 };
1256
1257 let Some(Some(idx)) = pack_indexes.get(pack_int_id as usize) else {
1258 errors.push(format!(
1259 "failed to load pack entry for oid[{i}] = {oid_hex}"
1260 ));
1261 continue;
1262 };
1263 let Ok(oid) = ObjectId::from_bytes(oid_at(i)) else {
1264 errors.push(format!(
1265 "failed to load pack entry for oid[{i}] = {oid_hex}"
1266 ));
1267 continue;
1268 };
1269 match idx.find_offset(&oid) {
1270 Some(p_offset) => {
1271 if m_offset != p_offset {
1272 errors.push(format!(
1273 "incorrect object offset for oid[{i}] = {oid_hex}: {m_offset:x} != {p_offset:x}"
1274 ));
1275 }
1276 }
1277 None => {
1278 errors.push(format!(
1279 "failed to load pack entry for oid[{i}] = {oid_hex}"
1280 ));
1281 }
1282 }
1283 }
1284
1285 if errors.is_empty() {
1286 Ok(())
1287 } else {
1288 Err(errors)
1289 }
1290}
1291
1292fn midx_checksum_is_valid(data: &[u8]) -> bool {
1295 let hash_len = midx_hash_len(data);
1296 if data.len() < hash_len {
1297 return false;
1298 }
1299 let body = &data[..data.len() - hash_len];
1300 let stored = &data[data.len() - hash_len..];
1301 if hash_len == 32 {
1302 let mut hasher = Sha256::new();
1303 Sha256Digest::update(&mut hasher, body);
1304 hasher.finalize().as_slice() == stored
1305 } else {
1306 let mut hasher = Sha1::new();
1307 hasher.update(body);
1308 hasher.finalize().as_slice() == stored
1309 }
1310}
1311
1312pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
1319 let pack_dir = objects_dir.join("pack");
1320 let path = resolve_tip_midx_path(&pack_dir)
1321 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1322 let data = fs::read(&path).map_err(Error::Io)?;
1323 let (_, hdr_end, _) = parse_midx_header(&data)?;
1324 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1325 parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
1326}
1327
1328pub struct MidxObjectRef {
1330 pub oid: ObjectId,
1331 pub pack_int_id: usize,
1333}
1334
1335pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
1339 let pack_dir = objects_dir.join("pack");
1340 let path = resolve_tip_midx_path(&pack_dir)
1341 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1342 let data = fs::read(&path).map_err(Error::Io)?;
1343 let (_, hdr_end, _) = parse_midx_header(&data)?;
1344 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1345 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1346 let hash_len = midx_hash_len(&data);
1347 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1348 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1349 if oidl_len % hash_len != 0 || ooff_len % 8 != 0 {
1350 return Err(Error::CorruptObject(
1351 "bad MIDX oid-lookup / object-offsets size".to_owned(),
1352 ));
1353 }
1354 let num = oidl_len / hash_len;
1355 if num * 8 != ooff_len {
1356 return Err(Error::CorruptObject(
1357 "MIDX oid count does not match object-offsets".to_owned(),
1358 ));
1359 }
1360 let mut objects = Vec::with_capacity(num);
1361 for i in 0..num {
1362 let oid = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])
1363 .map_err(|e| Error::CorruptObject(e.to_string()))?;
1364 let base = ooff_off + i * 8;
1365 let pack_id = read_be_u32(&data, base)? as usize;
1366 objects.push(MidxObjectRef {
1367 oid,
1368 pack_int_id: pack_id,
1369 });
1370 }
1371 Ok((names, objects))
1372}
1373
1374pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
1376 let pack_dir = objects_dir.join("pack");
1377 let path = resolve_tip_midx_path(&pack_dir)
1378 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1379 midx_checksum_hex_from_path(&path)
1380}
1381
1382fn resolve_read_midx_path(pack_dir: &Path, checksum: Option<&str>) -> Result<std::path::PathBuf> {
1387 match checksum {
1388 Some(cs) => resolve_midx_layer_path(pack_dir, cs)
1389 .ok_or_else(|| Error::CorruptObject(format!("could not find MIDX with checksum {cs}"))),
1390 None => resolve_tip_midx_path(pack_dir)
1391 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned())),
1392 }
1393}
1394
1395pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
1398 format_midx_show_objects_layer(objects_dir, None)
1399}
1400
1401pub fn format_midx_show_objects_layer(
1403 objects_dir: &Path,
1404 checksum: Option<&str>,
1405) -> Result<String> {
1406 let mut out = format_midx_dump_layer(objects_dir, checksum)?;
1407 let pack_dir = objects_dir.join("pack");
1408 let path = resolve_read_midx_path(&pack_dir, checksum)?;
1409 let data = fs::read(&path).map_err(Error::Io)?;
1410 let (_, hdr_end, _) = parse_midx_header(&data)?;
1411 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1412 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1413 let hash_len = midx_hash_len(&data);
1414 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1415 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1416 if oidl_len % hash_len != 0 || ooff_len % 8 != 0 {
1417 return Err(Error::CorruptObject(
1418 "bad MIDX oid-lookup / object-offsets size".to_owned(),
1419 ));
1420 }
1421 let num = oidl_len / hash_len;
1422 if num * 8 != ooff_len {
1423 return Err(Error::CorruptObject(
1424 "MIDX oid count does not match object-offsets".to_owned(),
1425 ));
1426 }
1427 for i in 0..num {
1428 let oid = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])
1429 .map_err(|e| Error::CorruptObject(e.to_string()))?;
1430 let base = ooff_off + i * 8;
1431 let pack_id = read_be_u32(&data, base)? as usize;
1432 let offset = u64::from(read_be_u32(&data, base + 4)?);
1433 let idx_name = names
1434 .get(pack_id)
1435 .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
1436 let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1440 let dir_disp = objects_dir.display().to_string();
1441 let dir_disp = if objects_dir.is_absolute() || dir_disp.starts_with("./") {
1442 dir_disp
1443 } else {
1444 format!("./{dir_disp}")
1445 };
1446 out.push_str(&format!(
1447 "{} {}\t{}/pack/{}.pack\n",
1448 oid.to_hex(),
1449 offset,
1450 dir_disp,
1451 stem
1452 ));
1453 }
1454 Ok(out)
1455}
1456
1457pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
1458 format_midx_dump_layer(objects_dir, None)
1459}
1460
1461pub fn format_midx_dump_layer(objects_dir: &Path, checksum: Option<&str>) -> Result<String> {
1464 let pack_dir = objects_dir.join("pack");
1465 let path = resolve_read_midx_path(&pack_dir, checksum)?;
1466 let data = fs::read(&path).map_err(Error::Io)?;
1467 let (hdr, hdr_end, _) = parse_midx_header(&data)?;
1468 let sig = read_be_u32(&data, 0)?;
1469 let version = data[4];
1470 let hash_len: u8 = match data[5] {
1473 1 => 20,
1474 2 => 32,
1475 other => other,
1476 };
1477 let num_chunks = hdr.num_chunks;
1478 let num_packs = read_be_u32(&data, 8)?;
1479
1480 let mut chunk_tags: Vec<&'static str> = Vec::new();
1481 let n = num_chunks as usize;
1482 let pos = hdr_end;
1483 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
1484 if data.len() < toc_end + 20 {
1485 return Err(Error::CorruptObject(
1486 "truncated MIDX chunk table".to_owned(),
1487 ));
1488 }
1489 for i in 0..n {
1490 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
1491 let id = read_be_u32(&data, base)?;
1492 let tag = match id {
1493 x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
1494 x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
1495 x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
1496 x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
1497 x if x == MIDX_CHUNKID_LARGEOFFSETS => "large-offsets",
1498 x if x == MIDX_CHUNKID_REVINDEX => "revindex",
1499 x if x == 0x4254_4d50 => "bitmapped-packs",
1500 _ => "unknown",
1501 };
1502 chunk_tags.push(tag);
1503 }
1504
1505 let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1506 let num_objects = ooff_len / 8;
1507
1508 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1509 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1510
1511 let mut out = String::new();
1512 out.push_str(&format!(
1513 "header: {:08x} {} {} {} {}\n",
1514 sig, version, hash_len, num_chunks, num_packs
1515 ));
1516 out.push_str("chunks:");
1517 for t in &chunk_tags {
1518 out.push(' ');
1519 out.push_str(t);
1520 }
1521 out.push('\n');
1522 out.push_str(&format!("num_objects: {num_objects}\n"));
1523 out.push_str("packs:\n");
1524 for n in &pack_names {
1525 out.push_str(n);
1526 out.push('\n');
1527 }
1528 out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
1529 Ok(out)
1530}
1531
1532#[derive(Debug, Clone)]
1538pub struct MidxReuseTables {
1539 pub oids: Vec<ObjectId>,
1541 pub pack_and_offset: Vec<(u32, u64)>,
1543 pub rid_order: Vec<u32>,
1545 pub oid_idx_to_rank: Vec<u32>,
1547}
1548
1549pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
1553 let pack_dir = objects_dir.join("pack");
1554 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1555 return Ok(None);
1556 };
1557 let data = fs::read(&path).map_err(Error::Io)?;
1558 let hash_len = midx_hash_len(&data);
1559 let (_, hdr_end, _) = parse_midx_header(&data)?;
1560 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1561 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1562 let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
1563 return Ok(None);
1564 };
1565 if oid_l_len % hash_len != 0 || ooff_len != oid_l_len / hash_len * 8 {
1566 return Err(Error::CorruptObject(
1567 "MIDX OID / offset chunk size mismatch".to_owned(),
1568 ));
1569 }
1570 let num_objects = oid_l_len / hash_len;
1571 if ridx_len != num_objects.saturating_mul(4) {
1572 return Err(Error::CorruptObject(
1573 "MIDX reverse index length does not match object count".to_owned(),
1574 ));
1575 }
1576 if num_objects == 0 {
1577 return Ok(None);
1578 }
1579
1580 let mut oids = Vec::with_capacity(num_objects);
1581 for i in 0..num_objects {
1582 let base = oidl_off + i * hash_len;
1583 oids.push(ObjectId::from_bytes(&data[base..base + hash_len])?);
1584 }
1585
1586 let mut pack_and_offset = Vec::with_capacity(num_objects);
1587 for i in 0..num_objects {
1588 let ob = ooff_off + i * 8;
1589 let pack_id = read_be_u32(&data, ob)?;
1590 let off32 = read_be_u32(&data, ob + 4)?;
1591 pack_and_offset.push((pack_id, u64::from(off32)));
1592 }
1593
1594 let mut rid_order = Vec::with_capacity(num_objects);
1595 for i in 0..num_objects {
1596 let base = ridx_off + i * 4;
1597 rid_order.push(read_be_u32(&data, base)?);
1598 }
1599
1600 let mut oid_idx_to_rank = vec![0u32; num_objects];
1601 for (rank, &oid_idx) in rid_order.iter().enumerate() {
1602 let idx = usize::try_from(oid_idx)
1603 .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
1604 if idx >= num_objects {
1605 return Err(Error::CorruptObject(
1606 "MIDX reverse index out of range".to_owned(),
1607 ));
1608 }
1609 oid_idx_to_rank[idx] = u32::try_from(rank)
1610 .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
1611 }
1612
1613 Ok(Some(MidxReuseTables {
1614 oids,
1615 pack_and_offset,
1616 rid_order,
1617 oid_idx_to_rank,
1618 }))
1619}
1620
1621impl MidxReuseTables {
1622 #[must_use]
1624 pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
1625 let oid_idx = self.oids.binary_search(oid).ok()?;
1626 Some(self.oid_idx_to_rank[oid_idx])
1627 }
1628
1629 #[must_use]
1634 pub fn canonical_pack(&self, oid: &ObjectId) -> Option<u32> {
1635 let oid_idx = self.oids.binary_search(oid).ok()?;
1636 Some(self.pack_and_offset[oid_idx].0)
1637 }
1638}
1639
1640#[derive(Debug, Clone, Copy)]
1642pub struct MidxBtmpPackRange {
1643 pub pack_id: u32,
1645 pub bitmap_pos: u32,
1647 pub bitmap_nr: u32,
1649}
1650
1651pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
1655 let pack_dir = objects_dir.join("pack");
1656 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1657 return Ok(Vec::new());
1658 };
1659 let data = fs::read(&path).map_err(Error::Io)?;
1660 let (_, hdr_end, _) = parse_midx_header(&data)?;
1661 let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1662 return Ok(Vec::new());
1663 };
1664 if btmp_len == 0 || btmp_len % 8 != 0 {
1665 return Err(Error::CorruptObject(
1666 "invalid MIDX BTMP chunk length".to_owned(),
1667 ));
1668 }
1669 let num_packs = read_be_u32(&data, 8)?;
1670 let n_entries = btmp_len / 8;
1671 if u32::try_from(n_entries).ok() != Some(num_packs) {
1672 return Err(Error::CorruptObject(
1673 "MIDX BTMP entry count does not match num_packs".to_owned(),
1674 ));
1675 }
1676 let mut out = Vec::with_capacity(n_entries);
1677 for i in 0..n_entries {
1678 let base = btmp_off + i * 8;
1679 let bitmap_pos = read_be_u32(&data, base)?;
1680 let bitmap_nr = read_be_u32(&data, base + 4)?;
1681 out.push(MidxBtmpPackRange {
1682 pack_id: u32::try_from(i)
1683 .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
1684 bitmap_pos,
1685 bitmap_nr,
1686 });
1687 }
1688 Ok(out)
1689}
1690
1691pub fn format_midx_bitmapped_packs(objects_dir: &Path) -> Result<String> {
1696 let pack_dir = objects_dir.join("pack");
1697 let path = resolve_tip_midx_path(&pack_dir)
1698 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1699 let data = fs::read(&path).map_err(Error::Io)?;
1700 let (_, hdr_end, _) = parse_midx_header(&data)?;
1701 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1702 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1703 let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1704 return Err(Error::CorruptObject(
1705 "MIDX does not contain the BTMP chunk".to_owned(),
1706 ));
1707 };
1708 let n_entries = btmp_len / 8;
1709 let mut out = String::new();
1710 for i in 0..n_entries {
1711 let base = btmp_off + i * 8;
1712 let bitmap_pos = read_be_u32(&data, base)?;
1713 let bitmap_nr = read_be_u32(&data, base + 4)?;
1714 let idx_name = names.get(i).ok_or_else(|| {
1715 Error::CorruptObject("BTMP entry has no corresponding pack name".to_owned())
1716 })?;
1717 let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1718 out.push_str(&format!("{stem}.pack\n"));
1719 out.push_str(&format!(" bitmap_pos: {bitmap_pos}\n"));
1720 out.push_str(&format!(" bitmap_nr: {bitmap_nr}\n"));
1721 }
1722 Ok(out)
1723}
1724
1725pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
1727 let pack_dir = objects_dir.join("pack");
1728 let path = resolve_tip_midx_path(&pack_dir)
1729 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1730 let data = fs::read(&path).map_err(Error::Io)?;
1731 let hash_len = midx_hash_len(&data);
1732 let (_, hdr_end, _) = parse_midx_header(&data)?;
1733 let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1734 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1735 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1736 if fanout_len != 256 * 4 || oid_l_len % hash_len != 0 || ooff_len != oid_l_len / hash_len * 8 {
1737 return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
1738 }
1739 let num_objects = oid_l_len / hash_len;
1740 let first = oid.as_bytes()[0] as usize;
1741 let j0 = if first == 0 {
1742 0usize
1743 } else {
1744 read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
1745 };
1746 let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
1747 let mut lo = j0;
1748 let mut hi = j1;
1749 while lo < hi {
1750 let mid = (lo + hi) / 2;
1751 let base = oidl_off + mid * hash_len;
1752 let cmp = data[base..base + hash_len].cmp(oid.as_bytes());
1753 if cmp == std::cmp::Ordering::Less {
1754 lo = mid + 1;
1755 } else {
1756 hi = mid;
1757 }
1758 }
1759 if lo >= num_objects {
1760 return Err(Error::CorruptObject(format!(
1761 "object {} not in multi-pack-index",
1762 oid.to_hex()
1763 )));
1764 }
1765 let base = oidl_off + lo * hash_len;
1766 if data[base..base + hash_len] != *oid.as_bytes() {
1767 return Err(Error::CorruptObject(format!(
1768 "object {} not in multi-pack-index",
1769 oid.to_hex()
1770 )));
1771 }
1772 let ob = ooff_off + lo * 8;
1773 let pack_id = read_be_u32(&data, ob)?;
1774 let off32 = read_be_u32(&data, ob + 4)?;
1775 Ok((pack_id, u64::from(off32)))
1776}
1777
1778pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
1782 let pack_dir = objects_dir.join("pack");
1783 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1784 return Ok(None);
1785 };
1786 let data = midx_cache::get_bytes(&midx_path)?;
1787 let hash_len = midx_hash_len(&data);
1788 let MidxReadView {
1789 oidf_off,
1790 oidl_off,
1791 num_objects,
1792 ..
1793 } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1794 MidxLoadResult::Ok(v) => v,
1795 MidxLoadResult::Skip => return Ok(None),
1796 };
1797
1798 let first = oid.as_bytes()[0] as usize;
1799 let lo = if first == 0 {
1800 0u32
1801 } else {
1802 read_be_u32(&data, oidf_off + (first - 1) * 4)?
1803 };
1804 let hi = read_be_u32(&data, oidf_off + first * 4)?;
1805
1806 let mut i = lo as usize;
1807 while i < hi as usize && i < num_objects {
1808 let o = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])?;
1809 match o.cmp(oid) {
1810 std::cmp::Ordering::Equal => return Ok(Some(true)),
1811 std::cmp::Ordering::Greater => return Ok(Some(false)),
1812 std::cmp::Ordering::Less => i += 1,
1813 }
1814 }
1815 Ok(Some(false))
1816}
1817
1818struct MidxReadView {
1820 oidf_off: usize,
1821 oidl_off: usize,
1822 ooff_off: usize,
1823 loff: Option<(usize, usize)>,
1824 num_objects: usize,
1825 pack_names: Vec<String>,
1826}
1827
1828enum MidxLoadResult {
1829 Ok(MidxReadView),
1830 Skip,
1833}
1834
1835fn midx_warn_once(line: &str) {
1841 use std::sync::Mutex;
1842 use std::sync::OnceLock;
1843 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
1844 let seen = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
1845 if let Ok(mut set) = seen.lock() {
1846 if set.insert(line.to_string()) {
1847 eprintln!("{line}");
1848 }
1849 } else {
1850 eprintln!("{line}");
1851 }
1852}
1853
1854fn midx_die(lines: &[&str]) -> ! {
1857 use std::io::Write;
1858 let mut err = std::io::stderr().lock();
1859 let n = lines.len();
1860 for (i, l) in lines.iter().enumerate() {
1861 if i + 1 == n {
1862 let _ = writeln!(err, "fatal: {l}");
1863 } else {
1864 let _ = writeln!(err, "error: {l}");
1865 }
1866 }
1867 let _ = err.flush();
1868 std::process::exit(128);
1869}
1870
1871fn midx_load_for_read(data: &[u8], expected_hash_version: u8) -> MidxLoadResult {
1875 if data.len() < MIDX_HEADER_SIZE + 20 {
1876 return MidxLoadResult::Skip;
1877 }
1878 let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
1879 if sig != MIDX_SIGNATURE {
1880 midx_die(&[&format!(
1881 "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
1882 )]);
1883 }
1884 let version = data[4];
1885 if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
1886 midx_die(&[&format!(
1887 "multi-pack-index version {version} not recognized"
1888 )]);
1889 }
1890 let hash_version = data[5];
1891 if hash_version != expected_hash_version {
1892 midx_warn_once(&format!(
1896 "error: multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
1897 ));
1898 return MidxLoadResult::Skip;
1899 }
1900 let hash_len = if hash_version == 2 { 32usize } else { 20usize };
1901 let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1902
1903 let mut toc_errors: Vec<String> = Vec::new();
1907 let chunks = match parse_midx_toc(data, hash_len, &mut toc_errors) {
1908 Ok(c) => c,
1909 Err(_) => {
1910 for e in &toc_errors {
1911 midx_warn_once(&format!("error: {e}"));
1912 }
1913 return MidxLoadResult::Skip;
1914 }
1915 };
1916
1917 let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
1919 else {
1920 midx_die(&["multi-pack-index required pack-name chunk missing or corrupted"]);
1921 };
1922
1923 let Some((oidf_off, oidf_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
1925 else {
1926 midx_die(&["multi-pack-index required OID fanout chunk missing or corrupted"]);
1927 };
1928 if oidf_len != 256 * 4 {
1929 midx_die(&[
1930 "multi-pack-index OID fanout is of the wrong size",
1931 "multi-pack-index required OID fanout chunk missing or corrupted",
1932 ]);
1933 }
1934 let fanout = |i: usize| -> u32 {
1935 let b = oidf_off + i * 4;
1936 u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
1937 };
1938 for i in 0..255 {
1939 let f1 = fanout(i);
1940 let f2 = fanout(i + 1);
1941 if f1 > f2 {
1942 midx_die(&[
1943 &format!(
1944 "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
1945 i + 1
1946 ),
1947 "multi-pack-index required OID fanout chunk missing or corrupted",
1948 ]);
1949 }
1950 }
1951 let num_objects = fanout(255) as usize;
1952
1953 let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
1955 else {
1956 midx_die(&["multi-pack-index required OID lookup chunk missing or corrupted"]);
1957 };
1958 if oidl_len != hash_len * num_objects {
1959 midx_die(&[
1960 "multi-pack-index OID lookup chunk is the wrong size",
1961 "multi-pack-index required OID lookup chunk missing or corrupted",
1962 ]);
1963 }
1964
1965 let Some((ooff_off, ooff_len)) =
1967 toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1968 else {
1969 midx_die(&["multi-pack-index required object offsets chunk missing or corrupted"]);
1970 };
1971 if ooff_len != num_objects * 8 {
1972 midx_die(&[
1973 "multi-pack-index object offset chunk is the wrong size",
1974 "multi-pack-index required object offsets chunk missing or corrupted",
1975 ]);
1976 }
1977
1978 let loff = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1979
1980 if let Some((_, rlen)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_REVINDEX) {
1982 if rlen != num_objects * 4 {
1983 midx_warn_once("error: multi-pack-index reverse-index chunk is the wrong size");
1984 midx_warn_once("warning: multi-pack bitmap is missing required reverse index");
1985 }
1986 }
1987
1988 let mut pack_names: Vec<String> = Vec::with_capacity(num_packs);
1990 let blob = &data[pn_off..pn_off + pn_len];
1991 let mut start = 0usize;
1992 for _ in 0..num_packs {
1993 let Some(rel) = blob[start..].iter().position(|&b| b == 0) else {
1994 midx_die(&["multi-pack-index pack-name chunk is too short"]);
1995 };
1996 let name = match std::str::from_utf8(&blob[start..start + rel]) {
1997 Ok(s) => s.to_string(),
1998 Err(_) => midx_die(&["multi-pack-index pack-name chunk is too short"]),
1999 };
2000 if version == MIDX_VERSION_V1
2001 && !pack_names.is_empty()
2002 && name.as_str() <= pack_names.last().map(|s| s.as_str()).unwrap_or("")
2003 {
2004 midx_die(&[&format!(
2005 "multi-pack-index pack names out of order: '{}' before '{name}'",
2006 pack_names.last().cloned().unwrap_or_default()
2007 )]);
2008 }
2009 pack_names.push(name);
2010 start += rel + 1;
2011 }
2012
2013 MidxLoadResult::Ok(MidxReadView {
2014 oidf_off,
2015 oidl_off,
2016 ooff_off,
2017 loff,
2018 num_objects,
2019 pack_names,
2020 })
2021}
2022
2023pub fn validate_midx_referenced_packs(objects_dir: &Path) {
2032 use std::sync::Mutex;
2033 use std::sync::OnceLock;
2034 static DONE: OnceLock<Mutex<HashSet<std::path::PathBuf>>> = OnceLock::new();
2035 let done = DONE.get_or_init(|| Mutex::new(HashSet::new()));
2036 if let Ok(mut set) = done.lock() {
2037 if !set.insert(objects_dir.to_path_buf()) {
2038 return;
2039 }
2040 }
2041
2042 let pack_dir = objects_dir.join("pack");
2043 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
2044 return;
2045 };
2046 let Ok(data) = fs::read(&midx_path) else {
2047 return;
2048 };
2049 let MidxReadView { pack_names, .. } =
2050 match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
2051 MidxLoadResult::Ok(v) => v,
2052 MidxLoadResult::Skip => return,
2053 };
2054 for idx_name in &pack_names {
2055 let idx_path = pack_dir.join(idx_name);
2056 if !idx_path.exists() {
2060 continue;
2061 }
2062 if crate::pack::read_pack_index_no_verify(&idx_path).is_err() {
2068 let mut pack_path = idx_path.clone();
2069 pack_path.set_extension("pack");
2070 midx_warn_once(&format!(
2071 "error: packfile {} index unavailable",
2072 pack_path.display()
2073 ));
2074 }
2075 }
2076}
2077
2078pub fn try_read_object_via_midx(
2083 objects_dir: &Path,
2084 oid: &ObjectId,
2085) -> Result<Option<crate::objects::Object>> {
2086 let pack_dir = objects_dir.join("pack");
2087 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
2088 return Ok(None);
2089 };
2090 let data = midx_cache::get_bytes(&midx_path)?;
2091
2092 let MidxReadView {
2096 oidf_off,
2097 oidl_off,
2098 ooff_off,
2099 loff,
2100 num_objects,
2101 pack_names,
2102 } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
2103 MidxLoadResult::Ok(v) => v,
2104 MidxLoadResult::Skip => return Ok(None),
2105 };
2106
2107 let first = oid.as_bytes()[0] as usize;
2108 let lo = if first == 0 {
2109 0u32
2110 } else {
2111 read_be_u32(&data, oidf_off + (first - 1) * 4)?
2112 };
2113 let hi = read_be_u32(&data, oidf_off + first * 4)?;
2114
2115 let hash_len = midx_hash_len(&data);
2116 let mut pos = None;
2117 let mut i = lo as usize;
2118 while i < hi as usize && i < num_objects {
2119 let o = ObjectId::from_bytes(&data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len])?;
2120 let c = o.cmp(oid);
2121 if c == std::cmp::Ordering::Equal {
2122 pos = Some(i);
2123 break;
2124 }
2125 if c == std::cmp::Ordering::Greater {
2126 break;
2127 }
2128 i += 1;
2129 }
2130 let Some(pos) = pos else {
2131 return Ok(None);
2132 };
2133
2134 let obase = ooff_off + pos * 8;
2135 let pack_id = read_be_u32(&data, obase)?;
2136 let raw_off = read_be_u32(&data, obase + 4)?;
2137 let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
2138 let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
2139 let need = (idx + 1) * 8;
2140 match loff {
2141 Some((loff_off, loff_len)) if loff_len >= need => {
2142 read_be_u64(&data, loff_off + idx * 8)?
2143 }
2144 _ => {
2145 midx_die(&["multi-pack-index large offset out of bounds"]);
2147 }
2148 }
2149 } else {
2150 u64::from(raw_off)
2151 };
2152
2153 let idx_name = pack_names
2154 .get(pack_id as usize)
2155 .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
2156 let idx_path = pack_dir.join(idx_name);
2157 if !idx_path.exists() {
2162 return Ok(None);
2163 }
2164 let idx = match crate::pack::read_pack_index_cached(&idx_path) {
2172 Ok(idx) => idx,
2173 Err(_) => {
2174 let mut pack_path = idx_path.clone();
2175 pack_path.set_extension("pack");
2176 midx_warn_once(&format!(
2177 "error: packfile {} index unavailable",
2178 pack_path.display()
2179 ));
2180 return Ok(None);
2181 }
2182 };
2183 crate::pack::read_object_from_pack(&idx, oid).map(Some)
2184}
2185
2186pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
2187 let pack_dir = objects_dir.join("pack");
2188 let path = resolve_tip_midx_path(&pack_dir)
2189 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
2190 let data = fs::read(&path).map_err(Error::Io)?;
2191 let (_, hdr_end, _) = parse_midx_header(&data)?;
2192 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2193 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
2194 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
2195 let (ridx_off, ridx_len) = match find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) {
2201 Ok(r) => r,
2202 Err(_) => {
2203 return Err(Error::CorruptObject(
2204 "could not determine MIDX preferred pack".to_owned(),
2205 ));
2206 }
2207 };
2208
2209 if ridx_len < 4 || ooff_len < 8 {
2210 return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
2211 }
2212 let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
2213 let entry_base = ooff_off + first_oid_idx * 8;
2214 if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
2215 return Err(Error::CorruptObject(
2216 "bad MIDX object-offsets index".to_owned(),
2217 ));
2218 }
2219 let pack_id = read_be_u32(&data, entry_base)?;
2220 let idx = usize::try_from(pack_id)
2221 .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
2222 names
2223 .get(idx)
2224 .cloned()
2225 .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
2226}
2227
2228pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
2235 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2236 scrub_root_midx_sidecars_except(pack_dir, None)?;
2237 let midx_d = midx_d_dir(pack_dir);
2238 if midx_d.exists() {
2239 let _ = fs::remove_dir_all(&midx_d);
2240 }
2241 Ok(())
2242}
2243
2244pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
2245 write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
2246}
2247
2248pub fn write_multi_pack_index_with_options(
2250 pack_dir: &Path,
2251 opts: &WriteMultiPackIndexOptions,
2252) -> Result<()> {
2253 if opts.pack_names_subset_ordered.is_none() {
2256 if let Some(existing) = resolve_tip_midx_path(pack_dir) {
2257 if let Ok(bytes) = fs::read(&existing) {
2258 if midx_checksum_is_valid(&bytes) {
2259 if let Ok((_, existing_names)) = oids_and_packs_from_midx_data(&bytes) {
2263 for (i, name) in existing_names.iter().enumerate() {
2264 let stem = name.strip_suffix(".idx").unwrap_or(name);
2265 if !pack_dir.join(format!("{stem}.pack")).exists() {
2266 eprintln!("error: could not load pack {i}");
2267 return Err(Error::CorruptObject(format!(
2268 "could not load pack {i}"
2269 )));
2270 }
2271 }
2272 }
2273 } else {
2274 eprintln!("warning: ignoring existing multi-pack-index; checksum mismatch");
2275 }
2276 }
2277 }
2278 }
2279
2280 let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
2285 .map(|rd| {
2286 rd.filter_map(|e| e.ok())
2287 .filter_map(|e| {
2288 let name = e.file_name().to_string_lossy().to_string();
2289 let stem = name.strip_suffix(".idx")?;
2290 if pack_dir.join(format!("{stem}.pack")).exists() {
2291 Some(name)
2292 } else {
2293 None
2294 }
2295 })
2296 .collect()
2297 })
2298 .unwrap_or_default();
2299 idx_names.sort();
2300
2301 let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
2302 let mut out = Vec::new();
2303 for line in sub {
2304 let want = normalize_pack_idx_basename(line)?;
2305 if let Some(found) = idx_names.iter().find(|n| **n == want).cloned() {
2306 if !out.contains(&found) {
2307 out.push(found);
2308 }
2309 }
2310 }
2313 out
2314 } else {
2315 idx_names
2316 };
2317
2318 let mut preferred_warned = false;
2321 if let Some(raw) = opts.preferred_pack_name.as_deref() {
2322 if opts.preferred_pack_idx.is_none()
2323 && !idx_names
2324 .iter()
2325 .any(|n| cmp_idx_or_pack_name(raw, n).is_eq())
2326 {
2327 eprintln!("warning: unknown preferred pack: '{raw}'");
2328 preferred_warned = true;
2329 }
2330 }
2331
2332 if idx_names.is_empty() {
2333 eprintln!("error: no pack files to index.");
2335 return Err(Error::CorruptObject("no pack files to index.".to_owned()));
2336 }
2337
2338 let (base_oids, base_pack_names) = if opts.incremental {
2339 collect_incremental_base(pack_dir)?
2340 } else {
2341 (HashSet::new(), HashSet::new())
2342 };
2343
2344 let layer_idx_names: Vec<String> = if opts.incremental {
2345 idx_names
2346 .iter()
2347 .filter(|n| {
2348 !base_pack_names
2349 .iter()
2350 .any(|bp| pack_names_match_layer(bp, n))
2351 })
2352 .cloned()
2353 .collect()
2354 } else {
2355 idx_names.clone()
2356 };
2357
2358 if opts.incremental && layer_idx_names.is_empty() {
2359 return Ok(());
2360 }
2361
2362 let work_names = if opts.incremental {
2363 &layer_idx_names[..]
2364 } else {
2365 &idx_names[..]
2366 };
2367
2368 let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
2369 if preferred_idx.is_none() && !preferred_warned {
2370 if let Some(raw) = opts.preferred_pack_name.as_deref() {
2371 preferred_idx = work_names
2373 .iter()
2374 .position(|n| cmp_idx_or_pack_name(raw, n).is_eq());
2375 }
2376 }
2377 if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
2378 preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
2379 }
2380 if let Some(p) = preferred_idx {
2381 if p >= work_names.len() {
2382 return Err(Error::CorruptObject(
2383 "preferred pack index out of range".to_owned(),
2384 ));
2385 }
2386 }
2387
2388 let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
2389 for name in work_names {
2390 let path = pack_dir.join(name);
2391 indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2394 }
2395
2396 if let Some(p) = preferred_idx {
2398 if indexes.get(p).map(|i| i.entries.len()).unwrap_or(0) == 0 {
2399 let name = work_names.get(p).cloned().unwrap_or_default();
2400 let pack_name = name.strip_suffix(".idx").unwrap_or(&name);
2401 eprintln!("error: cannot select preferred pack {pack_name}.pack with no objects");
2402 return Err(Error::CorruptObject(
2403 "cannot select preferred pack with no objects".to_owned(),
2404 ));
2405 }
2406 }
2407
2408 let pack_mtimes_layer: Vec<std::time::SystemTime> =
2409 indexes.iter().map(pack_mtime_for_midx).collect();
2410 let preferred_u32 = preferred_idx.map(|p| p as u32);
2411 let select_hash_len = if repo_midx_hash_version(pack_dir) == 2 { 32 } else { 20 };
2412
2413 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
2414 for (pack_id, idx) in indexes.iter().enumerate() {
2415 let pack_id = u32::try_from(pack_id).map_err(|_| {
2416 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
2417 })?;
2418 let mtime = pack_mtimes_layer[pack_id as usize];
2419 for e in &idx.entries {
2420 if e.oid.len() != select_hash_len {
2421 continue;
2422 }
2423 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
2424 continue;
2425 };
2426 if opts.incremental && base_oids.contains(&oid) {
2427 continue;
2428 }
2429 let cand = MidxEntry {
2430 oid,
2431 pack_id,
2432 offset: e.offset,
2433 pack_mtime: mtime,
2434 };
2435 match best.get(&oid) {
2436 None => {
2437 best.insert(oid, cand);
2438 }
2439 Some(cur) => {
2440 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
2441 best.insert(oid, cand);
2442 }
2443 }
2444 }
2445 }
2446 }
2447
2448 let bitmap_placeholders =
2449 opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
2450
2451 let omit_embedded_ridx = opts.write_rev_placeholder;
2452 let exclude = if opts.incremental && !base_oids.is_empty() {
2456 Some(&base_oids)
2457 } else {
2458 None
2459 };
2460 let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2461 work_names,
2462 &indexes,
2463 preferred_idx,
2464 bitmap_placeholders,
2465 omit_embedded_ridx,
2466 opts.version.unwrap_or(MIDX_VERSION_V2),
2467 repo_midx_hash_version(pack_dir),
2468 exclude,
2469 )?;
2470
2471 let hash_len = if repo_midx_hash_version(pack_dir) == 2 { 32 } else { 20 };
2472 let hash = &out[out.len() - hash_len..];
2473 let hash_hex = hex::encode(hash);
2474 let hash_arr: Vec<u8> = hash.to_vec();
2475
2476 if opts.incremental {
2477 let root_midx = pack_dir.join("multi-pack-index");
2478 let chain_path = chain_file_path(pack_dir);
2479 let chain_existed = chain_path.exists();
2480
2481 let mut chain = if root_midx.exists() && !chain_existed {
2482 let root_hex = midx_checksum_hex_from_path(&root_midx)?;
2483 link_root_midx_into_chain(pack_dir, &root_hex)?;
2484 vec![root_hex]
2485 } else {
2486 read_chain_layer_hashes(pack_dir).unwrap_or_default()
2487 };
2488
2489 chain.push(hash_hex.clone());
2490
2491 let midx_d = midx_d_dir(pack_dir);
2492 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2493
2494 let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2495 fs::write(&layer_path, &out).map_err(Error::Io)?;
2496
2497 let mut chain_data = String::new();
2498 for h in &chain {
2499 chain_data.push_str(h);
2500 chain_data.push('\n');
2501 }
2502 fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2503
2504 clear_stale_split_layers(pack_dir, &chain)?;
2505
2506 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2507 scrub_root_midx_sidecars(pack_dir)?;
2508 if bitmap_placeholders {
2509 let full = hex::encode(hash);
2510 fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
2511 .map_err(Error::Io)?;
2512 if opts.write_rev_placeholder {
2513 let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
2514 if let Some(order) = rev_sidecar_order.as_ref() {
2515 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2516 } else {
2517 fs::write(rev_path, []).map_err(Error::Io)?;
2518 }
2519 }
2520 }
2521 } else {
2522 let dest = pack_dir.join("multi-pack-index");
2528
2529 let bitmap_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap"));
2533 let bitmap_ok = !opts.write_bitmap_placeholders || bitmap_path.exists();
2534 if bitmap_ok && !chain_file_path(pack_dir).exists() {
2538 if let Ok(existing) = fs::read(&dest) {
2539 if existing == out {
2540 return Ok(());
2541 }
2542 }
2543 }
2544
2545 clear_incremental_midx_files(pack_dir)?;
2546
2547 fs::write(&dest, &out).map_err(Error::Io)?;
2548
2549 scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
2550
2551 if opts.write_bitmap_placeholders {
2552 fs::write(
2553 pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2554 [],
2555 )
2556 .map_err(Error::Io)?;
2557 if opts.write_rev_placeholder {
2558 let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
2559 if let Some(order) = rev_sidecar_order.as_ref() {
2560 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2561 } else {
2562 fs::write(rev_path, []).map_err(Error::Io)?;
2563 }
2564 }
2565 }
2566 }
2567
2568 midx_cache::evict_pack_dir(pack_dir);
2569 Ok(())
2570}
2571
2572fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
2573 if base_name == disk_idx {
2574 return true;
2575 }
2576 cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
2577}
2578
2579#[derive(Debug)]
2582pub enum CompactError {
2583 NoChain,
2585 MissingEndpoint(String),
2588 IdenticalEndpoints,
2590 NotAncestor(String, String),
2593 V1Format,
2595 Other(String),
2597}
2598
2599impl std::fmt::Display for CompactError {
2600 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2601 match self {
2602 CompactError::NoChain => write!(f, "no multi-pack-index chain to compact"),
2603 CompactError::MissingEndpoint(s) => write!(f, "could not find MIDX: {s}"),
2604 CompactError::IdenticalEndpoints => {
2605 write!(f, "MIDX compaction endpoints must be unique")
2606 }
2607 CompactError::NotAncestor(from, to) => {
2608 write!(f, "MIDX {from} must be an ancestor of {to}")
2609 }
2610 CompactError::V1Format => write!(f, "cannot perform MIDX compaction with v1 format"),
2611 CompactError::Other(s) => write!(f, "{s}"),
2612 }
2613 }
2614}
2615
2616impl From<Error> for CompactError {
2617 fn from(e: Error) -> Self {
2618 CompactError::Other(e.to_string())
2619 }
2620}
2621
2622fn collect_layer_oids(pack_dir: &Path, hashes: &[String]) -> Result<HashSet<ObjectId>> {
2625 let mut oids = HashSet::new();
2626 for h in hashes {
2627 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
2628 let data = load_midx_file(&p)?;
2629 let (layer_oids, _) = oids_and_packs_from_midx_data(&data)?;
2630 oids.extend(layer_oids);
2631 }
2632 Ok(oids)
2633}
2634
2635fn layer_pack_names(pack_dir: &Path, hash: &str) -> Result<Vec<String>> {
2637 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{hash}.midx"));
2638 let data = load_midx_file(&p)?;
2639 let (_, hdr_end, _) = parse_midx_header(&data)?;
2640 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2641 parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
2642}
2643
2644pub fn compact_multi_pack_index(
2653 pack_dir: &Path,
2654 from_arg: &str,
2655 to_arg: &str,
2656 write_bitmaps: bool,
2657 write_rev: bool,
2658 version: Option<u8>,
2659) -> std::result::Result<(), CompactError> {
2660 if version == Some(MIDX_VERSION_V1) {
2661 return Err(CompactError::V1Format);
2662 }
2663
2664 let chain = read_chain_layer_hashes(pack_dir).map_err(|_| CompactError::NoChain)?;
2665 if chain.is_empty() {
2666 return Err(CompactError::NoChain);
2667 }
2668
2669 let from_hex = from_arg.to_ascii_lowercase();
2670 let to_hex = to_arg.to_ascii_lowercase();
2671
2672 let from_pos = chain.iter().position(|h| *h == from_hex);
2673 let to_pos = chain.iter().position(|h| *h == to_hex);
2674
2675 let Some(from_pos) = from_pos else {
2677 return Err(CompactError::MissingEndpoint(from_arg.to_string()));
2678 };
2679 let Some(to_pos) = to_pos else {
2680 return Err(CompactError::MissingEndpoint(to_arg.to_string()));
2681 };
2682
2683 if from_pos == to_pos {
2684 return Err(CompactError::IdenticalEndpoints);
2685 }
2686 if from_pos > to_pos {
2690 return Err(CompactError::NotAncestor(
2691 from_arg.to_string(),
2692 to_arg.to_string(),
2693 ));
2694 }
2695
2696 let base_hashes = &chain[..from_pos];
2699 let merged_hashes = &chain[from_pos..=to_pos];
2700 let upper_hashes = &chain[to_pos + 1..];
2701
2702 let base_oids = collect_layer_oids(pack_dir, base_hashes)?;
2703
2704 let mut ordered_idx_names: Vec<String> = Vec::new();
2707 for h in merged_hashes {
2708 for name in layer_pack_names(pack_dir, h)? {
2709 if !ordered_idx_names.contains(&name) {
2710 ordered_idx_names.push(name);
2711 }
2712 }
2713 }
2714
2715 if ordered_idx_names.is_empty() {
2716 return Err(CompactError::Other(
2717 "no packs found in compaction range".to_owned(),
2718 ));
2719 }
2720
2721 let mut indexes: Vec<PackIndex> = Vec::with_capacity(ordered_idx_names.len());
2723 for name in &ordered_idx_names {
2724 let path = pack_dir.join(name);
2725 indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2726 }
2727
2728 let preferred_idx = if write_bitmaps { Some(0usize) } else { None };
2731
2732 let exclude = if base_oids.is_empty() {
2733 None
2734 } else {
2735 Some(&base_oids)
2736 };
2737
2738 let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2739 &ordered_idx_names,
2740 &indexes,
2741 preferred_idx,
2742 write_bitmaps,
2743 write_rev,
2744 version.unwrap_or(MIDX_VERSION_V2),
2745 repo_midx_hash_version(pack_dir),
2746 exclude,
2747 )?;
2748
2749 let hash_len = if repo_midx_hash_version(pack_dir) == 2 { 32 } else { 20 };
2750 let hash = &out[out.len() - hash_len..];
2751 let hash_hex = hex::encode(hash);
2752 let hash_arr: Vec<u8> = hash.to_vec();
2753
2754 let midx_d = midx_d_dir(pack_dir);
2755 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2756
2757 let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2758 fs::write(&layer_path, &out).map_err(Error::Io)?;
2759
2760 let mut new_chain: Vec<String> = Vec::new();
2762 new_chain.extend(base_hashes.iter().cloned());
2763 new_chain.push(hash_hex.clone());
2764 new_chain.extend(upper_hashes.iter().cloned());
2765
2766 let mut chain_data = String::new();
2767 for h in &new_chain {
2768 chain_data.push_str(h);
2769 chain_data.push('\n');
2770 }
2771 fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2772
2773 if write_bitmaps {
2774 fs::write(
2775 midx_d.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2776 [],
2777 )
2778 .map_err(Error::Io)?;
2779 let rev_path = midx_d.join(format!("multi-pack-index-{hash_hex}.rev"));
2780 if write_rev {
2781 if let Some(order) = rev_sidecar_order.as_ref() {
2782 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2783 } else {
2784 fs::write(rev_path, []).map_err(Error::Io)?;
2785 }
2786 }
2787 }
2788
2789 clear_stale_split_layers(pack_dir, &new_chain)?;
2791
2792 midx_cache::evict_pack_dir(pack_dir);
2793 Ok(())
2794}
2795
2796fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
2797 scrub_root_midx_sidecars_except(pack_dir, None)
2798}
2799
2800fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
2801 let Ok(rd) = fs::read_dir(pack_dir) else {
2802 return Ok(());
2803 };
2804 for ent in rd {
2805 let ent = ent.map_err(Error::Io)?;
2806 let name = ent.file_name().to_string_lossy().to_string();
2807 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
2808 continue;
2809 };
2810 if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
2811 continue;
2812 }
2813 let hash_part = rest
2814 .strip_suffix(".bitmap")
2815 .or_else(|| rest.strip_suffix(".rev"))
2816 .unwrap_or(rest);
2817 if keep_hex.is_some_and(|k| k == hash_part) {
2821 continue;
2822 }
2823 let _ = fs::remove_file(ent.path());
2824 }
2825 Ok(())
2826}