1use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::ObjectId;
21use crate::pack::{read_pack_index_no_verify, PackIndex};
22
23const MIDX_SIGNATURE: u32 = 0x4d49_4458;
24const MIDX_VERSION_V1: u8 = 1;
25const MIDX_VERSION_V2: u8 = 2;
26const HASH_VERSION_SHA1: u8 = 1;
27const HASH_VERSION_SHA256: u8 = 2;
28const MIDX_HEADER_SIZE: usize = 12;
29const CHUNK_TOC_ENTRY_SIZE: usize = 12;
30const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
31const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
32const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
33const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
34const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
35const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
36const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
37
38const RIDX_SIGNATURE: u32 = 0x5249_4458;
40const RIDX_VERSION: u32 = 1;
41const RIDX_HEADER_SIZE: usize = 12;
42const MIDX_CHUNK_ALIGNMENT: usize = 4;
43
44const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
46
47struct MidxEntry {
48 oid: ObjectId,
49 pack_id: u32,
50 offset: u64,
51 pack_mtime: std::time::SystemTime,
52}
53
54#[derive(Debug, Clone, Default)]
56pub struct WriteMultiPackIndexOptions {
57 pub preferred_pack_idx: Option<u32>,
60 pub preferred_pack_name: Option<String>,
63 pub pack_names_subset_ordered: Option<Vec<String>>,
65 pub write_bitmap_placeholders: bool,
67 pub incremental: bool,
70 pub write_rev_placeholder: bool,
73 pub version: Option<u8>,
76}
77
78fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
79 let t = raw.trim();
80 let t = std::path::Path::new(t)
81 .file_name()
82 .and_then(|s| s.to_str())
83 .unwrap_or(t);
84 let t = t.strip_prefix("./").unwrap_or(t);
85 if t.ends_with(".idx") {
86 Ok(t.to_string())
87 } else if t.ends_with(".pack") {
88 Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
89 } else {
90 Ok(format!("{t}.idx"))
91 }
92}
93
94fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
100 let end = off.checked_add(4).filter(|&e| e <= data.len());
101 let Some(end) = end else {
102 return Err(Error::CorruptObject(
103 "truncated MIDX data reading u32".to_owned(),
104 ));
105 };
106 let bytes: [u8; 4] = data[off..end]
107 .try_into()
108 .map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
109 Ok(u32::from_be_bytes(bytes))
110}
111
112fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
118 let end = off.checked_add(8).filter(|&e| e <= data.len());
119 let Some(end) = end else {
120 return Err(Error::CorruptObject(
121 "truncated MIDX data reading u64".to_owned(),
122 ));
123 };
124 let bytes: [u8; 8] = data[off..end]
125 .try_into()
126 .map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
127 Ok(u64::from_be_bytes(bytes))
128}
129
130struct MidxFileHeader {
131 num_chunks: u8,
132}
133
134fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
135 if data.len() < MIDX_HEADER_SIZE + 20 {
136 return Err(Error::CorruptObject("midx file too small".to_owned()));
137 }
138 let sig = read_be_u32(data, 0)?;
139 if sig != MIDX_SIGNATURE {
140 return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
141 }
142 let version = data[4];
143 if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
144 return Err(Error::CorruptObject(format!(
145 "multi-pack-index version {version} not recognized"
146 )));
147 }
148 let object_hash_bytes = data[5];
149 let num_chunks = data[6];
150 let _num_packs = read_be_u32(data, 8)?;
151 Ok((
152 MidxFileHeader { num_chunks },
153 MIDX_HEADER_SIZE,
154 object_hash_bytes,
155 ))
156}
157
158fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
159 let mut names = Vec::new();
160 let mut start = 0usize;
161 for (i, &b) in pn.iter().enumerate() {
162 if b == 0 && i >= start {
163 if i > start {
164 let s = std::str::from_utf8(&pn[start..i])
165 .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
166 names.push(s.to_string());
167 }
168 start = i + 1;
169 }
170 }
171 Ok(names)
172}
173
174fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
176 let a = idx_or_pack_name.as_bytes();
177 let b = idx_name.as_bytes();
178 let mut i = 0usize;
179 let min = a.len().min(b.len());
180 while i < min && a[i] == b[i] {
181 i += 1;
182 }
183 let suf_a = &a[i..];
184 let suf_b = &b[i..];
185 if suf_b == b"idx" && suf_a == b"pack" {
186 return std::cmp::Ordering::Equal;
187 }
188 suf_a.cmp(suf_b)
189}
190
191fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
192 let mut best: Option<(usize, std::time::SystemTime)> = None;
193 for (i, n) in names.iter().enumerate() {
194 let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
195 let mtime = meta.modified().map_err(Error::Io)?;
196 match best {
197 None => best = Some((i, mtime)),
198 Some((_, t)) if mtime < t => best = Some((i, mtime)),
199 _ => {}
200 }
201 }
202 Ok(best.map(|(i, _)| i))
203}
204
205fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
206 pack_dir.join("multi-pack-index.d")
207}
208
209fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
210 midx_d_dir(pack_dir).join("multi-pack-index-chain")
211}
212
213fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
214 let path = chain_file_path(pack_dir);
215 let f = fs::File::open(&path).map_err(Error::Io)?;
216 let mut out = Vec::new();
217 for line in BufReader::new(f).lines() {
218 let line = line.map_err(Error::Io)?;
219 let t = line.trim();
220 if t.is_empty() {
221 continue;
222 }
223 if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
224 return Err(Error::CorruptObject(format!(
225 "invalid multi-pack-index chain line: {t}"
226 )));
227 }
228 out.push(t.to_ascii_lowercase());
229 }
230 Ok(out)
231}
232
233fn repo_midx_hash_version(pack_dir: &Path) -> u8 {
241 let Some(objects_dir) = pack_dir.parent() else {
243 return HASH_VERSION_SHA1;
244 };
245 repo_midx_hash_version_for_objects_dir(objects_dir)
246}
247
248fn repo_midx_hash_version_for_objects_dir(objects_dir: &Path) -> u8 {
250 let Some(gitdir) = objects_dir.parent() else {
251 return HASH_VERSION_SHA1;
252 };
253 let config_path = gitdir.join("config");
254 let Ok(text) = fs::read_to_string(&config_path) else {
255 return HASH_VERSION_SHA1;
256 };
257 let mut in_extensions = false;
261 for raw in text.lines() {
262 let line = raw.trim();
263 if line.starts_with('[') {
264 let section = line.trim_start_matches('[').trim_end_matches(']');
265 let name = section.split_whitespace().next().unwrap_or("");
266 in_extensions = name.eq_ignore_ascii_case("extensions");
267 continue;
268 }
269 if !in_extensions {
270 continue;
271 }
272 if let Some((key, value)) = line.split_once('=') {
273 if key.trim().eq_ignore_ascii_case("objectformat")
274 && value.trim().eq_ignore_ascii_case("sha256")
275 {
276 return HASH_VERSION_SHA256;
277 }
278 }
279 }
280 HASH_VERSION_SHA1
281}
282
283pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
284 let root = pack_dir.join("multi-pack-index");
285 if root.exists() {
286 return Some(root);
287 }
288 let hashes = read_chain_layer_hashes(pack_dir).ok()?;
289 let last = hashes.last()?;
290 Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
291}
292
293pub fn resolve_midx_layer_path(pack_dir: &Path, checksum: &str) -> Option<std::path::PathBuf> {
297 let checksum = checksum.to_ascii_lowercase();
298 if let Ok(hashes) = read_chain_layer_hashes(pack_dir) {
299 if hashes.contains(&checksum) {
300 return Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{checksum}.midx")));
301 }
302 }
303 let root = pack_dir.join("multi-pack-index");
304 if root.exists() {
305 if let Ok(hex) = midx_checksum_hex_from_path(&root) {
306 if hex == checksum {
307 return Some(root);
308 }
309 }
310 }
311 None
312}
313
314fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
315 let data = fs::read(path).map_err(Error::Io)?;
316 let _ = parse_midx_header(&data)?;
317 Ok(data)
318}
319
320fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
321 let (_, hdr_end, _) = parse_midx_header(data)?;
322 let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
323 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
324 let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
325 let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
326 let num_objects = ooff_len / 8;
327 if oidl_len != num_objects * 20 {
328 return Err(Error::CorruptObject(
329 "MIDX oid-lookup size mismatch".to_owned(),
330 ));
331 }
332 let mut oids = HashSet::with_capacity(num_objects);
333 for i in 0..num_objects {
334 let start = oidl_off + i * 20;
335 let oid = ObjectId::from_bytes(&data[start..start + 20])?;
336 oids.insert(oid);
337 }
338 Ok((oids, pack_names))
339}
340
341fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
342 let mut oids = HashSet::new();
343 let mut packs = HashSet::new();
344 let root = pack_dir.join("multi-pack-index");
345 let chain_path = chain_file_path(pack_dir);
346 if chain_path.exists() {
347 for h in read_chain_layer_hashes(pack_dir)? {
348 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
349 let data = load_midx_file(&p)?;
350 let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
351 oids.extend(layer_oids);
352 for n in names {
353 packs.insert(n);
354 }
355 }
356 return Ok((oids, packs));
357 }
358 if root.exists() {
359 let data = load_midx_file(&root)?;
360 let (o, names) = oids_and_packs_from_midx_data(&data)?;
361 oids = o;
362 for n in names {
363 packs.insert(n);
364 }
365 }
366 Ok((oids, packs))
367}
368
369fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
370 let data = fs::read(path).map_err(Error::Io)?;
371 if data.len() < 20 {
372 return Err(Error::CorruptObject(
373 "midx too small for checksum".to_owned(),
374 ));
375 }
376 let hash = &data[data.len() - 20..];
377 Ok(hex::encode(hash))
378}
379
380fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
381 let _ = fs::remove_file(dst);
382 if fs::hard_link(src, dst).is_ok() {
383 return Ok(());
384 }
385 fs::copy(src, dst).map_err(Error::Io)?;
386 Ok(())
387}
388
389fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
390 let midx_d = midx_d_dir(pack_dir);
391 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
392 let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
393 hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
394 let exts = ["bitmap", "rev"];
395 for ext in exts {
396 let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
397 if src.exists() {
398 let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
399 hard_link_or_copy(&src, &dst)?;
400 }
401 }
402 Ok(())
403}
404
405fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
406 let midx_d = midx_d_dir(pack_dir);
407 if !midx_d.exists() {
408 return Ok(());
409 }
410 let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
411 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
412 let ent = ent.map_err(Error::Io)?;
413 let name = ent.file_name().to_string_lossy().to_string();
414 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
415 continue;
416 };
417 let Some((hash_part, _ext)) = rest.split_once('.') else {
418 continue;
419 };
420 if hash_part.len() == 40 && !keep.contains(hash_part) {
421 let _ = fs::remove_file(ent.path());
422 }
423 }
424 Ok(())
425}
426
427fn clear_incremental_midx_files(pack_dir: &Path) -> Result<()> {
437 let midx_d = midx_d_dir(pack_dir);
438 let _ = fs::remove_file(chain_file_path(pack_dir));
440 if !midx_d.exists() {
441 return Ok(());
442 }
443 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
444 let ent = ent.map_err(Error::Io)?;
445 let name = ent.file_name().to_string_lossy().to_string();
446 if name.starts_with("multi-pack-index-")
447 && (name.ends_with(".midx") || name.ends_with(".bitmap") || name.ends_with(".rev"))
448 {
449 let _ = fs::remove_file(ent.path());
450 }
451 }
452 Ok(())
453}
454
455fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
456 fs::metadata(&idx.pack_path)
457 .and_then(|m| m.modified())
458 .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
459}
460
461fn midx_pick_better_entry(
462 cur: &MidxEntry,
463 cand_pack: u32,
464 cand_offset: u64,
465 cand_mtime: std::time::SystemTime,
466 preferred_pack: Option<u32>,
467) -> bool {
468 let cur_pref = preferred_pack == Some(cur.pack_id);
469 let new_pref = preferred_pack == Some(cand_pack);
470 if new_pref && !cur_pref {
471 return true;
472 }
473 if cur_pref && !new_pref {
474 return false;
475 }
476 match cand_mtime.cmp(&cur.pack_mtime) {
477 std::cmp::Ordering::Greater => true,
478 std::cmp::Ordering::Less => false,
479 std::cmp::Ordering::Equal => {
480 if cand_pack != cur.pack_id {
481 cand_pack < cur.pack_id
482 } else {
483 cand_offset < cur.offset
484 }
485 }
486 }
487}
488
489#[allow(clippy::too_many_arguments)]
494fn build_midx_bytes_filtered(
495 idx_names: &[String],
496 indexes: &[PackIndex],
497 preferred_idx: Option<usize>,
498 write_bitmap_placeholders: bool,
499 omit_embedded_ridx_chunk: bool,
500 version: u8,
501 hash_version: u8,
502 exclude_oids: Option<&HashSet<ObjectId>>,
503) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
504 let preferred_pack_idx = preferred_idx.map(|p| p as u32);
505 let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
506
507 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
508 for (pack_id, idx) in indexes.iter().enumerate() {
509 let pack_id = u32::try_from(pack_id).map_err(|_| {
510 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
511 })?;
512 let mtime = pack_mtimes[pack_id as usize];
513 for e in &idx.entries {
514 if e.oid.len() != 20 {
515 continue;
516 }
517 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
518 continue;
519 };
520 if let Some(ex) = exclude_oids {
521 if ex.contains(&oid) {
522 continue;
523 }
524 }
525 let cand = MidxEntry {
526 oid,
527 pack_id,
528 offset: e.offset,
529 pack_mtime: mtime,
530 };
531 match best.get(&oid) {
532 None => {
533 best.insert(oid, cand);
534 }
535 Some(cur) => {
536 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
537 best.insert(oid, cand);
538 }
539 }
540 }
541 }
542 }
543
544 let mut entries: Vec<MidxEntry> = best.into_values().collect();
545 entries.sort_by_key(|a| a.oid);
546
547 let large_offsets_needed = entries.iter().any(|e| e.offset > u64::from(u32::MAX));
555
556 let num_packs = indexes.len() as u32;
557
558 let mut pack_names_blob = Vec::new();
559 for name in idx_names {
560 pack_names_blob.extend_from_slice(name.as_bytes());
561 pack_names_blob.push(0);
562 }
563 let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
564 % MIDX_CHUNK_ALIGNMENT;
565 pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
566 let chunk_pnam = pack_names_blob;
567
568 let mut chunk_oidf = vec![0u8; 256 * 4];
569 let mut j = 0usize;
570 for i in 0..256 {
571 while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
572 j += 1;
573 }
574 chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
575 }
576
577 let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
578 for e in &entries {
579 chunk_oidl.extend_from_slice(e.oid.as_bytes());
580 }
581
582 let mut large_offsets: Vec<u64> = Vec::new();
583 let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
584 for e in &entries {
585 chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
586 let encoded = if large_offsets_needed && e.offset >> 31 != 0 {
587 let slot = u32::try_from(large_offsets.len()).map_err(|_| {
588 Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
589 })?;
590 large_offsets.push(e.offset);
591 MIDX_LARGE_OFFSET_NEEDED | slot
592 } else {
593 e.offset as u32
597 };
598 chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
599 }
600
601 let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
602 Vec::new()
603 } else {
604 let mut v = Vec::with_capacity(large_offsets.len() * 8);
605 for off in &large_offsets {
606 v.extend_from_slice(&off.to_be_bytes());
607 }
608 v
609 };
610
611 let pref = preferred_pack_idx;
612 let mut order: Vec<u32> = (0..entries.len() as u32).collect();
613 order.sort_by(|&ai, &bi| {
614 let a = &entries[ai as usize];
615 let b = &entries[bi as usize];
616 let a_pref = pref == Some(a.pack_id);
617 let b_pref = pref == Some(b.pack_id);
618 b_pref
619 .cmp(&a_pref)
620 .then_with(|| a.pack_id.cmp(&b.pack_id))
621 .then_with(|| a.offset.cmp(&b.offset))
622 .then_with(|| ai.cmp(&bi))
623 });
624
625 let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
626 for oid_idx in &order {
627 chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
628 }
629
630 let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
633 Some(order.clone())
634 } else {
635 None
636 };
637 let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
638 let num_packs_usize = indexes.len();
643 let mut bitmap_pos = vec![u32::MAX; num_packs_usize];
644 let mut bitmap_nr = vec![0u32; num_packs_usize];
645 for (rank, &oid_idx) in order.iter().enumerate() {
646 let pack = entries[oid_idx as usize].pack_id as usize;
647 if let Some(p) = bitmap_pos.get_mut(pack) {
648 if *p == u32::MAX {
649 *p = rank as u32;
650 }
651 }
652 if let Some(n) = bitmap_nr.get_mut(pack) {
653 *n += 1;
654 }
655 }
656 let mut v = Vec::new();
657 for pack in 0..num_packs_usize {
658 let pos = if bitmap_pos[pack] == u32::MAX {
659 0
660 } else {
661 bitmap_pos[pack]
662 };
663 v.extend_from_slice(&pos.to_be_bytes());
664 v.extend_from_slice(&bitmap_nr[pack].to_be_bytes());
665 }
666 let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
667 v.extend(std::iter::repeat_n(0u8, pad));
668 v
669 } else {
670 Vec::new()
671 };
672
673 let mut chunks: Vec<(u32, Vec<u8>)> = vec![
674 (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
675 (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
676 (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
677 (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
678 ];
679 if !chunk_loff.is_empty() {
680 chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
681 }
682 if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
683 chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
684 }
685 if write_bitmap_placeholders {
686 chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
687 }
688
689 let num_chunks: u8 = chunks
690 .len()
691 .try_into()
692 .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
693
694 let mut body = Vec::new();
695 let mut cur_offset =
696 MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
697
698 for (id, data) in &chunks {
699 body.extend_from_slice(&id.to_be_bytes());
700 body.extend_from_slice(&cur_offset.to_be_bytes());
701 cur_offset += data.len() as u64;
702 }
703 body.extend_from_slice(&0u32.to_be_bytes());
704 body.extend_from_slice(&cur_offset.to_be_bytes());
705
706 for (_, data) in &chunks {
707 body.extend_from_slice(data);
708 }
709
710 let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
711 out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
712 out.push(if version == MIDX_VERSION_V1 {
713 MIDX_VERSION_V1
714 } else {
715 MIDX_VERSION_V2
716 });
717 out.push(hash_version);
718 out.push(num_chunks);
719 out.push(0);
720 out.extend_from_slice(&num_packs.to_be_bytes());
721 out.extend_from_slice(&body);
722
723 let mut hasher = Sha1::new();
724 hasher.update(&out);
725 let hash = hasher.finalize();
726 out.extend_from_slice(&hash);
727
728 Ok((out, rev_sidecar_order))
729}
730
731fn write_midx_rev_sidecar(
733 path: &Path,
734 pack_order: &[u32],
735 midx_file_hash: &[u8; 20],
736) -> Result<()> {
737 let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
738 body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
739 body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
740 body.extend_from_slice(&1u32.to_be_bytes());
741 for idx in pack_order {
742 body.extend_from_slice(&idx.to_be_bytes());
743 }
744 body.extend_from_slice(midx_file_hash);
745 fs::write(path, body).map_err(Error::Io)
746}
747
748fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
749 let (hdr, _, _) = parse_midx_header(data)?;
750 let n = hdr.num_chunks as usize;
751 let pos = header_end;
752 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
753 if data.len() < toc_end + 20 {
754 return Err(Error::CorruptObject(
755 "truncated MIDX chunk table".to_owned(),
756 ));
757 }
758 for i in 0..n {
759 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
760 let id = read_be_u32(data, base)?;
761 let off = read_be_u64(data, base + 4)? as usize;
762 if id == chunk_id {
763 let next_off = if i + 1 < n {
764 let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
765 read_be_u64(data, nb + 4)? as usize
766 } else {
767 let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
768 read_be_u64(data, term + 4)? as usize
769 };
770 return Ok((off, next_off.saturating_sub(off)));
771 }
772 }
773 Err(Error::CorruptObject(format!(
774 "MIDX chunk {chunk_id:08x} not found"
775 )))
776}
777
778#[derive(Debug, Clone)]
782pub struct MidxLoadError(pub String);
783
784impl std::fmt::Display for MidxLoadError {
785 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
786 write!(f, "{}", self.0)
787 }
788}
789
790struct TocEntry {
792 id: u32,
793 offset: usize,
794}
795
796fn parse_midx_toc(
800 data: &[u8],
801 hash_len: usize,
802 errors: &mut Vec<String>,
803) -> std::result::Result<Vec<TocEntry>, MidxLoadError> {
804 if data.len() < MIDX_HEADER_SIZE + hash_len {
805 return Err(MidxLoadError("multi-pack-index file too small".to_owned()));
806 }
807 let num_chunks = data[6] as usize;
808 let toc_off = MIDX_HEADER_SIZE;
809 let needed = toc_off + (num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE;
810 if data.len() < needed {
811 return Err(MidxLoadError(
812 "multi-pack-index chunk table is truncated".to_owned(),
813 ));
814 }
815 let file_size = data.len();
816 let mut chunks: Vec<TocEntry> = Vec::with_capacity(num_chunks);
817
818 let read_be64 = |off: usize| -> u64 {
819 let mut b = [0u8; 8];
820 b.copy_from_slice(&data[off..off + 8]);
821 u64::from_be_bytes(b)
822 };
823 let read_be32 = |off: usize| -> u32 {
824 let mut b = [0u8; 4];
825 b.copy_from_slice(&data[off..off + 4]);
826 u32::from_be_bytes(b)
827 };
828
829 for i in 0..num_chunks {
830 let entry = toc_off + i * CHUNK_TOC_ENTRY_SIZE;
831 let chunk_id = read_be32(entry);
832 let chunk_offset = read_be64(entry + 4);
833
834 if chunk_id == 0 {
835 errors.push("terminating chunk id appears earlier than expected".to_owned());
836 return Err(MidxLoadError(
837 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
838 ));
839 }
840 if !(chunk_offset as usize).is_multiple_of(MIDX_CHUNK_ALIGNMENT) {
841 errors.push(format!(
842 "chunk id {chunk_id:x} not {MIDX_CHUNK_ALIGNMENT}-byte aligned"
843 ));
844 return Err(MidxLoadError(
845 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
846 ));
847 }
848
849 let next_entry = toc_off + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
850 let next_chunk_offset = read_be64(next_entry + 4);
851
852 if next_chunk_offset < chunk_offset
853 || next_chunk_offset > (file_size as u64).saturating_sub(hash_len as u64)
854 {
855 errors.push(format!(
856 "improper chunk offset(s) {chunk_offset:x} and {next_chunk_offset:x}"
857 ));
858 return Err(MidxLoadError(
859 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
860 ));
861 }
862
863 if chunks.iter().any(|c| c.id == chunk_id) {
864 errors.push(format!("duplicate chunk ID {chunk_id:x} found"));
865 return Err(MidxLoadError(
866 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
867 ));
868 }
869
870 chunks.push(TocEntry {
871 id: chunk_id,
872 offset: chunk_offset as usize,
873 });
874 }
875
876 let term_entry = toc_off + num_chunks * CHUNK_TOC_ENTRY_SIZE;
878 let final_id = read_be32(term_entry);
879 if final_id != 0 {
880 errors.push(format!("final chunk has non-zero id {final_id:x}"));
881 return Err(MidxLoadError(
882 "multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
883 ));
884 }
885
886 Ok(chunks)
887}
888
889fn toc_chunk_range(chunks: &[TocEntry], data_len: usize, id: u32) -> Option<(usize, usize)> {
891 for (i, c) in chunks.iter().enumerate() {
892 if c.id == id {
893 let next = if i + 1 < chunks.len() {
894 chunks[i + 1].offset
895 } else {
896 data_len.saturating_sub(20)
897 };
898 return Some((c.offset, next.saturating_sub(c.offset)));
899 }
900 }
901 None
902}
903
904pub fn verify_midx(objects_dir: &Path) -> std::result::Result<(), Vec<String>> {
911 let pack_dir = objects_dir.join("pack");
912 let path = match resolve_tip_midx_path(&pack_dir) {
913 Some(p) => p,
914 None => return Ok(()),
915 };
916 let data = match fs::read(&path) {
917 Ok(d) => d,
918 Err(_) => return Ok(()),
919 };
920
921 let mut fatal: Vec<String> = Vec::new();
922 let mut errors: Vec<String> = Vec::new();
923
924 if data.len() < MIDX_HEADER_SIZE + 20 {
926 return Err(vec!["multi-pack-index file is too small".to_owned()]);
927 }
928 let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
929 if sig != MIDX_SIGNATURE {
930 return Err(vec![format!(
931 "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
932 )]);
933 }
934 let version = data[4];
935 if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
936 return Err(vec![format!(
937 "multi-pack-index version {version} not recognized"
938 )]);
939 }
940 let hash_version = data[5];
941 let expected_hash_version = repo_midx_hash_version_for_objects_dir(objects_dir);
942 if hash_version != expected_hash_version {
943 return Err(vec![format!(
944 "multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
945 )]);
946 }
947 let hash_len = 20usize;
948 let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
949
950 let chunks = match parse_midx_toc(&data, hash_len, &mut errors) {
952 Ok(c) => c,
953 Err(e) => {
954 errors.push(e.0);
955 return Err(errors);
956 }
957 };
958
959 let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
961 else {
962 errors.push("multi-pack-index required pack-name chunk missing or corrupted".to_owned());
963 return Err(errors);
964 };
965
966 let Some((fan_off, fan_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
968 else {
969 errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
970 return Err(errors);
971 };
972 if fan_len != 256 * 4 {
973 errors.push("multi-pack-index OID fanout is of the wrong size".to_owned());
974 errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
975 return Err(errors);
976 }
977 let fanout = |i: usize| -> u32 {
978 let b = fan_off + i * 4;
979 u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
980 };
981 for i in 0..255 {
982 let f1 = fanout(i);
983 let f2 = fanout(i + 1);
984 if f1 > f2 {
985 errors.push(format!(
986 "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
987 i + 1
988 ));
989 errors
990 .push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
991 return Err(errors);
992 }
993 }
994 let num_objects = fanout(255) as usize;
995
996 let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
998 else {
999 errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1000 return Err(errors);
1001 };
1002 if oidl_len != hash_len * num_objects {
1003 errors.push("multi-pack-index OID lookup chunk is the wrong size".to_owned());
1004 errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
1005 return Err(errors);
1006 }
1007
1008 let Some((ooff_off, ooff_len)) =
1010 toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1011 else {
1012 errors
1013 .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1014 return Err(errors);
1015 };
1016 if ooff_len != num_objects * 8 {
1017 errors.push("multi-pack-index object offset chunk is the wrong size".to_owned());
1018 errors
1019 .push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
1020 return Err(errors);
1021 }
1022
1023 let large_off = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1024
1025 let names = match parse_pack_names_blob(&data[pn_off..pn_off + pn_len]) {
1027 Ok(n) => n,
1028 Err(_) => {
1029 errors.push("multi-pack-index pack-name chunk is too short".to_owned());
1030 return Err(errors);
1031 }
1032 };
1033 if version == MIDX_VERSION_V1 {
1034 for i in 1..names.len() {
1035 if names[i] <= names[i - 1] {
1036 fatal.push(format!(
1037 "multi-pack-index pack names out of order: '{}' before '{}'",
1038 names[i - 1],
1039 names[i]
1040 ));
1041 errors.extend(fatal);
1043 return Err(errors);
1044 }
1045 }
1046 }
1047
1048 if !midx_checksum_is_valid(&data) {
1050 errors.push("incorrect checksum".to_owned());
1051 }
1052
1053 let mut pack_indexes: Vec<Option<PackIndex>> = Vec::with_capacity(num_packs);
1055 for i in 0..num_packs {
1056 let loaded = match names.get(i) {
1062 Some(name) => read_pack_index_no_verify(&pack_dir.join(name)).ok(),
1063 None => None,
1064 };
1065 if loaded.is_none() {
1066 errors.push(format!("failed to load pack in position {i}"));
1067 }
1068 pack_indexes.push(loaded);
1069 }
1070
1071 if num_objects == 0 {
1072 errors.push("the midx contains no oid".to_owned());
1073 if errors.is_empty() {
1074 return Ok(());
1075 }
1076 return Err(errors);
1077 }
1078
1079 let oid_at =
1081 |i: usize| -> &[u8] { &data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len] };
1082 for i in 0..num_objects.saturating_sub(1) {
1083 let a = oid_at(i);
1084 let b = oid_at(i + 1);
1085 if a >= b {
1086 errors.push(format!(
1087 "oid lookup out of order: oid[{i}] = {} >= {} = oid[{}]",
1088 hex::encode(a),
1089 hex::encode(b),
1090 i + 1
1091 ));
1092 }
1093 }
1094
1095 for i in 0..num_objects {
1097 let ob = ooff_off + i * 8;
1098 let pack_int_id = u32::from_be_bytes([data[ob], data[ob + 1], data[ob + 2], data[ob + 3]]);
1099 let off_raw = u32::from_be_bytes([data[ob + 4], data[ob + 5], data[ob + 6], data[ob + 7]]);
1100 let oid_hex = hex::encode(oid_at(i));
1101
1102 if pack_int_id as usize >= num_packs {
1103 errors.push(format!(
1104 "bad pack-int-id: {pack_int_id} ({num_packs} total packs)"
1105 ));
1106 errors.push(format!(
1107 "failed to load pack entry for oid[{i}] = {oid_hex}"
1108 ));
1109 continue;
1110 }
1111
1112 let m_offset: u64 = if off_raw & MIDX_LARGE_OFFSET_NEEDED != 0 {
1114 let slot = (off_raw & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1115 match large_off {
1116 Some((lo_off, lo_len)) if (slot + 1) * 8 <= lo_len => {
1117 let b = lo_off + slot * 8;
1118 let mut arr = [0u8; 8];
1119 arr.copy_from_slice(&data[b..b + 8]);
1120 u64::from_be_bytes(arr)
1121 }
1122 _ => {
1123 errors.push("multi-pack-index large offset out of bounds".to_owned());
1124 continue;
1125 }
1126 }
1127 } else {
1128 u64::from(off_raw)
1129 };
1130
1131 let Some(Some(idx)) = pack_indexes.get(pack_int_id as usize) else {
1132 errors.push(format!(
1133 "failed to load pack entry for oid[{i}] = {oid_hex}"
1134 ));
1135 continue;
1136 };
1137 let Ok(oid) = ObjectId::from_bytes(oid_at(i)) else {
1138 errors.push(format!(
1139 "failed to load pack entry for oid[{i}] = {oid_hex}"
1140 ));
1141 continue;
1142 };
1143 match idx.find_offset(&oid) {
1144 Some(p_offset) => {
1145 if m_offset != p_offset {
1146 errors.push(format!(
1147 "incorrect object offset for oid[{i}] = {oid_hex}: {m_offset:x} != {p_offset:x}"
1148 ));
1149 }
1150 }
1151 None => {
1152 errors.push(format!(
1153 "failed to load pack entry for oid[{i}] = {oid_hex}"
1154 ));
1155 }
1156 }
1157 }
1158
1159 if errors.is_empty() {
1160 Ok(())
1161 } else {
1162 Err(errors)
1163 }
1164}
1165
1166fn midx_checksum_is_valid(data: &[u8]) -> bool {
1168 if data.len() < 20 {
1169 return false;
1170 }
1171 let body = &data[..data.len() - 20];
1172 let stored = &data[data.len() - 20..];
1173 let mut hasher = Sha1::new();
1174 hasher.update(body);
1175 let got = hasher.finalize();
1176 got.as_slice() == stored
1177}
1178
1179pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
1186 let pack_dir = objects_dir.join("pack");
1187 let path = resolve_tip_midx_path(&pack_dir)
1188 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1189 let data = fs::read(&path).map_err(Error::Io)?;
1190 let (_, hdr_end, _) = parse_midx_header(&data)?;
1191 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1192 parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
1193}
1194
1195pub struct MidxObjectRef {
1197 pub oid: ObjectId,
1198 pub pack_int_id: usize,
1200}
1201
1202pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
1206 let pack_dir = objects_dir.join("pack");
1207 let path = resolve_tip_midx_path(&pack_dir)
1208 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1209 let data = fs::read(&path).map_err(Error::Io)?;
1210 let (_, hdr_end, _) = parse_midx_header(&data)?;
1211 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1212 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1213 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1214 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1215 if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
1216 return Err(Error::CorruptObject(
1217 "bad MIDX oid-lookup / object-offsets size".to_owned(),
1218 ));
1219 }
1220 let num = oidl_len / 20;
1221 if num * 8 != ooff_len {
1222 return Err(Error::CorruptObject(
1223 "MIDX oid count does not match object-offsets".to_owned(),
1224 ));
1225 }
1226 let mut objects = Vec::with_capacity(num);
1227 for i in 0..num {
1228 let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
1229 .map_err(|e| Error::CorruptObject(e.to_string()))?;
1230 let base = ooff_off + i * 8;
1231 let pack_id = read_be_u32(&data, base)? as usize;
1232 objects.push(MidxObjectRef {
1233 oid,
1234 pack_int_id: pack_id,
1235 });
1236 }
1237 Ok((names, objects))
1238}
1239
1240pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
1242 let pack_dir = objects_dir.join("pack");
1243 let path = resolve_tip_midx_path(&pack_dir)
1244 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1245 midx_checksum_hex_from_path(&path)
1246}
1247
1248fn resolve_read_midx_path(pack_dir: &Path, checksum: Option<&str>) -> Result<std::path::PathBuf> {
1253 match checksum {
1254 Some(cs) => resolve_midx_layer_path(pack_dir, cs)
1255 .ok_or_else(|| Error::CorruptObject(format!("could not find MIDX with checksum {cs}"))),
1256 None => resolve_tip_midx_path(pack_dir)
1257 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned())),
1258 }
1259}
1260
1261pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
1264 format_midx_show_objects_layer(objects_dir, None)
1265}
1266
1267pub fn format_midx_show_objects_layer(
1269 objects_dir: &Path,
1270 checksum: Option<&str>,
1271) -> Result<String> {
1272 let mut out = format_midx_dump_layer(objects_dir, checksum)?;
1273 let pack_dir = objects_dir.join("pack");
1274 let path = resolve_read_midx_path(&pack_dir, checksum)?;
1275 let data = fs::read(&path).map_err(Error::Io)?;
1276 let (_, hdr_end, _) = parse_midx_header(&data)?;
1277 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1278 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1279 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1280 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1281 if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
1282 return Err(Error::CorruptObject(
1283 "bad MIDX oid-lookup / object-offsets size".to_owned(),
1284 ));
1285 }
1286 let num = oidl_len / 20;
1287 if num * 8 != ooff_len {
1288 return Err(Error::CorruptObject(
1289 "MIDX oid count does not match object-offsets".to_owned(),
1290 ));
1291 }
1292 for i in 0..num {
1293 let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
1294 .map_err(|e| Error::CorruptObject(e.to_string()))?;
1295 let base = ooff_off + i * 8;
1296 let pack_id = read_be_u32(&data, base)? as usize;
1297 let offset = u64::from(read_be_u32(&data, base + 4)?);
1298 let idx_name = names
1299 .get(pack_id)
1300 .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
1301 let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1305 let dir_disp = objects_dir.display().to_string();
1306 let dir_disp = if objects_dir.is_absolute() || dir_disp.starts_with("./") {
1307 dir_disp
1308 } else {
1309 format!("./{dir_disp}")
1310 };
1311 out.push_str(&format!(
1312 "{} {}\t{}/pack/{}.pack\n",
1313 oid.to_hex(),
1314 offset,
1315 dir_disp,
1316 stem
1317 ));
1318 }
1319 Ok(out)
1320}
1321
1322pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
1323 format_midx_dump_layer(objects_dir, None)
1324}
1325
1326pub fn format_midx_dump_layer(objects_dir: &Path, checksum: Option<&str>) -> Result<String> {
1329 let pack_dir = objects_dir.join("pack");
1330 let path = resolve_read_midx_path(&pack_dir, checksum)?;
1331 let data = fs::read(&path).map_err(Error::Io)?;
1332 let (hdr, hdr_end, _) = parse_midx_header(&data)?;
1333 let sig = read_be_u32(&data, 0)?;
1334 let version = data[4];
1335 let hash_len: u8 = match data[5] {
1338 1 => 20,
1339 2 => 32,
1340 other => other,
1341 };
1342 let num_chunks = hdr.num_chunks;
1343 let num_packs = read_be_u32(&data, 8)?;
1344
1345 let mut chunk_tags: Vec<&'static str> = Vec::new();
1346 let n = num_chunks as usize;
1347 let pos = hdr_end;
1348 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
1349 if data.len() < toc_end + 20 {
1350 return Err(Error::CorruptObject(
1351 "truncated MIDX chunk table".to_owned(),
1352 ));
1353 }
1354 for i in 0..n {
1355 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
1356 let id = read_be_u32(&data, base)?;
1357 let tag = match id {
1358 x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
1359 x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
1360 x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
1361 x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
1362 x if x == MIDX_CHUNKID_LARGEOFFSETS => "large-offsets",
1363 x if x == MIDX_CHUNKID_REVINDEX => "revindex",
1364 x if x == 0x4254_4d50 => "bitmapped-packs",
1365 _ => "unknown",
1366 };
1367 chunk_tags.push(tag);
1368 }
1369
1370 let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1371 let num_objects = ooff_len / 8;
1372
1373 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1374 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1375
1376 let mut out = String::new();
1377 out.push_str(&format!(
1378 "header: {:08x} {} {} {} {}\n",
1379 sig, version, hash_len, num_chunks, num_packs
1380 ));
1381 out.push_str("chunks:");
1382 for t in &chunk_tags {
1383 out.push(' ');
1384 out.push_str(t);
1385 }
1386 out.push('\n');
1387 out.push_str(&format!("num_objects: {num_objects}\n"));
1388 out.push_str("packs:\n");
1389 for n in &pack_names {
1390 out.push_str(n);
1391 out.push('\n');
1392 }
1393 out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
1394 Ok(out)
1395}
1396
1397#[derive(Debug, Clone)]
1403pub struct MidxReuseTables {
1404 pub oids: Vec<ObjectId>,
1406 pub pack_and_offset: Vec<(u32, u64)>,
1408 pub rid_order: Vec<u32>,
1410 pub oid_idx_to_rank: Vec<u32>,
1412}
1413
1414pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
1418 let pack_dir = objects_dir.join("pack");
1419 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1420 return Ok(None);
1421 };
1422 let data = fs::read(&path).map_err(Error::Io)?;
1423 let (_, hdr_end, _) = parse_midx_header(&data)?;
1424 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1425 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1426 let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
1427 return Ok(None);
1428 };
1429 if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
1430 return Err(Error::CorruptObject(
1431 "MIDX OID / offset chunk size mismatch".to_owned(),
1432 ));
1433 }
1434 let num_objects = oid_l_len / 20;
1435 if ridx_len != num_objects.saturating_mul(4) {
1436 return Err(Error::CorruptObject(
1437 "MIDX reverse index length does not match object count".to_owned(),
1438 ));
1439 }
1440 if num_objects == 0 {
1441 return Ok(None);
1442 }
1443
1444 let mut oids = Vec::with_capacity(num_objects);
1445 for i in 0..num_objects {
1446 let base = oidl_off + i * 20;
1447 oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
1448 }
1449
1450 let mut pack_and_offset = Vec::with_capacity(num_objects);
1451 for i in 0..num_objects {
1452 let ob = ooff_off + i * 8;
1453 let pack_id = read_be_u32(&data, ob)?;
1454 let off32 = read_be_u32(&data, ob + 4)?;
1455 pack_and_offset.push((pack_id, u64::from(off32)));
1456 }
1457
1458 let mut rid_order = Vec::with_capacity(num_objects);
1459 for i in 0..num_objects {
1460 let base = ridx_off + i * 4;
1461 rid_order.push(read_be_u32(&data, base)?);
1462 }
1463
1464 let mut oid_idx_to_rank = vec![0u32; num_objects];
1465 for (rank, &oid_idx) in rid_order.iter().enumerate() {
1466 let idx = usize::try_from(oid_idx)
1467 .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
1468 if idx >= num_objects {
1469 return Err(Error::CorruptObject(
1470 "MIDX reverse index out of range".to_owned(),
1471 ));
1472 }
1473 oid_idx_to_rank[idx] = u32::try_from(rank)
1474 .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
1475 }
1476
1477 Ok(Some(MidxReuseTables {
1478 oids,
1479 pack_and_offset,
1480 rid_order,
1481 oid_idx_to_rank,
1482 }))
1483}
1484
1485impl MidxReuseTables {
1486 #[must_use]
1488 pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
1489 let oid_idx = self.oids.binary_search(oid).ok()?;
1490 Some(self.oid_idx_to_rank[oid_idx])
1491 }
1492
1493 #[must_use]
1498 pub fn canonical_pack(&self, oid: &ObjectId) -> Option<u32> {
1499 let oid_idx = self.oids.binary_search(oid).ok()?;
1500 Some(self.pack_and_offset[oid_idx].0)
1501 }
1502}
1503
1504#[derive(Debug, Clone, Copy)]
1506pub struct MidxBtmpPackRange {
1507 pub pack_id: u32,
1509 pub bitmap_pos: u32,
1511 pub bitmap_nr: u32,
1513}
1514
1515pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
1519 let pack_dir = objects_dir.join("pack");
1520 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
1521 return Ok(Vec::new());
1522 };
1523 let data = fs::read(&path).map_err(Error::Io)?;
1524 let (_, hdr_end, _) = parse_midx_header(&data)?;
1525 let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1526 return Ok(Vec::new());
1527 };
1528 if btmp_len == 0 || btmp_len % 8 != 0 {
1529 return Err(Error::CorruptObject(
1530 "invalid MIDX BTMP chunk length".to_owned(),
1531 ));
1532 }
1533 let num_packs = read_be_u32(&data, 8)?;
1534 let n_entries = btmp_len / 8;
1535 if u32::try_from(n_entries).ok() != Some(num_packs) {
1536 return Err(Error::CorruptObject(
1537 "MIDX BTMP entry count does not match num_packs".to_owned(),
1538 ));
1539 }
1540 let mut out = Vec::with_capacity(n_entries);
1541 for i in 0..n_entries {
1542 let base = btmp_off + i * 8;
1543 let bitmap_pos = read_be_u32(&data, base)?;
1544 let bitmap_nr = read_be_u32(&data, base + 4)?;
1545 out.push(MidxBtmpPackRange {
1546 pack_id: u32::try_from(i)
1547 .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
1548 bitmap_pos,
1549 bitmap_nr,
1550 });
1551 }
1552 Ok(out)
1553}
1554
1555pub fn format_midx_bitmapped_packs(objects_dir: &Path) -> Result<String> {
1560 let pack_dir = objects_dir.join("pack");
1561 let path = resolve_tip_midx_path(&pack_dir)
1562 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1563 let data = fs::read(&path).map_err(Error::Io)?;
1564 let (_, hdr_end, _) = parse_midx_header(&data)?;
1565 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1566 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1567 let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
1568 return Err(Error::CorruptObject(
1569 "MIDX does not contain the BTMP chunk".to_owned(),
1570 ));
1571 };
1572 let n_entries = btmp_len / 8;
1573 let mut out = String::new();
1574 for i in 0..n_entries {
1575 let base = btmp_off + i * 8;
1576 let bitmap_pos = read_be_u32(&data, base)?;
1577 let bitmap_nr = read_be_u32(&data, base + 4)?;
1578 let idx_name = names.get(i).ok_or_else(|| {
1579 Error::CorruptObject("BTMP entry has no corresponding pack name".to_owned())
1580 })?;
1581 let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
1582 out.push_str(&format!("{stem}.pack\n"));
1583 out.push_str(&format!(" bitmap_pos: {bitmap_pos}\n"));
1584 out.push_str(&format!(" bitmap_nr: {bitmap_nr}\n"));
1585 }
1586 Ok(out)
1587}
1588
1589pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
1591 let pack_dir = objects_dir.join("pack");
1592 let path = resolve_tip_midx_path(&pack_dir)
1593 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1594 let data = fs::read(&path).map_err(Error::Io)?;
1595 let (_, hdr_end, _) = parse_midx_header(&data)?;
1596 let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1597 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1598 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1599 if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
1600 return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
1601 }
1602 let num_objects = oid_l_len / 20;
1603 let first = oid.as_bytes()[0] as usize;
1604 let j0 = if first == 0 {
1605 0usize
1606 } else {
1607 read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
1608 };
1609 let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
1610 let mut lo = j0;
1611 let mut hi = j1;
1612 while lo < hi {
1613 let mid = (lo + hi) / 2;
1614 let base = oidl_off + mid * 20;
1615 let cmp = data[base..base + 20].cmp(oid.as_bytes());
1616 if cmp == std::cmp::Ordering::Less {
1617 lo = mid + 1;
1618 } else {
1619 hi = mid;
1620 }
1621 }
1622 if lo >= num_objects {
1623 return Err(Error::CorruptObject(format!(
1624 "object {} not in multi-pack-index",
1625 oid.to_hex()
1626 )));
1627 }
1628 let base = oidl_off + lo * 20;
1629 if data[base..base + 20] != *oid.as_bytes() {
1630 return Err(Error::CorruptObject(format!(
1631 "object {} not in multi-pack-index",
1632 oid.to_hex()
1633 )));
1634 }
1635 let ob = ooff_off + lo * 8;
1636 let pack_id = read_be_u32(&data, ob)?;
1637 let off32 = read_be_u32(&data, ob + 4)?;
1638 Ok((pack_id, u64::from(off32)))
1639}
1640
1641pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
1645 let pack_dir = objects_dir.join("pack");
1646 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1647 return Ok(None);
1648 };
1649 let data = fs::read(&midx_path).map_err(Error::Io)?;
1650 let MidxReadView {
1651 oidf_off,
1652 oidl_off,
1653 num_objects,
1654 ..
1655 } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1656 MidxLoadResult::Ok(v) => v,
1657 MidxLoadResult::Skip => return Ok(None),
1658 };
1659
1660 let first = oid.as_bytes()[0] as usize;
1661 let lo = if first == 0 {
1662 0u32
1663 } else {
1664 read_be_u32(&data, oidf_off + (first - 1) * 4)?
1665 };
1666 let hi = read_be_u32(&data, oidf_off + first * 4)?;
1667
1668 let mut i = lo as usize;
1669 while i < hi as usize && i < num_objects {
1670 let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1671 match o.cmp(oid) {
1672 std::cmp::Ordering::Equal => return Ok(Some(true)),
1673 std::cmp::Ordering::Greater => return Ok(Some(false)),
1674 std::cmp::Ordering::Less => i += 1,
1675 }
1676 }
1677 Ok(Some(false))
1678}
1679
1680struct MidxReadView {
1682 oidf_off: usize,
1683 oidl_off: usize,
1684 ooff_off: usize,
1685 loff: Option<(usize, usize)>,
1686 num_objects: usize,
1687 pack_names: Vec<String>,
1688}
1689
1690enum MidxLoadResult {
1691 Ok(MidxReadView),
1692 Skip,
1695}
1696
1697fn midx_warn_once(line: &str) {
1703 use std::sync::Mutex;
1704 use std::sync::OnceLock;
1705 static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
1706 let seen = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
1707 if let Ok(mut set) = seen.lock() {
1708 if set.insert(line.to_string()) {
1709 eprintln!("{line}");
1710 }
1711 } else {
1712 eprintln!("{line}");
1713 }
1714}
1715
1716fn midx_die(lines: &[&str]) -> ! {
1719 use std::io::Write;
1720 let mut err = std::io::stderr().lock();
1721 let n = lines.len();
1722 for (i, l) in lines.iter().enumerate() {
1723 if i + 1 == n {
1724 let _ = writeln!(err, "fatal: {l}");
1725 } else {
1726 let _ = writeln!(err, "error: {l}");
1727 }
1728 }
1729 let _ = err.flush();
1730 std::process::exit(128);
1731}
1732
1733fn midx_load_for_read(data: &[u8], expected_hash_version: u8) -> MidxLoadResult {
1737 if data.len() < MIDX_HEADER_SIZE + 20 {
1738 return MidxLoadResult::Skip;
1739 }
1740 let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
1741 if sig != MIDX_SIGNATURE {
1742 midx_die(&[&format!(
1743 "multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
1744 )]);
1745 }
1746 let version = data[4];
1747 if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
1748 midx_die(&[&format!(
1749 "multi-pack-index version {version} not recognized"
1750 )]);
1751 }
1752 let hash_version = data[5];
1753 if hash_version != expected_hash_version {
1754 midx_warn_once(&format!(
1758 "error: multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
1759 ));
1760 return MidxLoadResult::Skip;
1761 }
1762 let hash_len = 20usize;
1763 let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
1764
1765 let mut toc_errors: Vec<String> = Vec::new();
1769 let chunks = match parse_midx_toc(data, hash_len, &mut toc_errors) {
1770 Ok(c) => c,
1771 Err(_) => {
1772 for e in &toc_errors {
1773 midx_warn_once(&format!("error: {e}"));
1774 }
1775 return MidxLoadResult::Skip;
1776 }
1777 };
1778
1779 let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
1781 else {
1782 midx_die(&["multi-pack-index required pack-name chunk missing or corrupted"]);
1783 };
1784
1785 let Some((oidf_off, oidf_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
1787 else {
1788 midx_die(&["multi-pack-index required OID fanout chunk missing or corrupted"]);
1789 };
1790 if oidf_len != 256 * 4 {
1791 midx_die(&[
1792 "multi-pack-index OID fanout is of the wrong size",
1793 "multi-pack-index required OID fanout chunk missing or corrupted",
1794 ]);
1795 }
1796 let fanout = |i: usize| -> u32 {
1797 let b = oidf_off + i * 4;
1798 u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
1799 };
1800 for i in 0..255 {
1801 let f1 = fanout(i);
1802 let f2 = fanout(i + 1);
1803 if f1 > f2 {
1804 midx_die(&[
1805 &format!(
1806 "oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
1807 i + 1
1808 ),
1809 "multi-pack-index required OID fanout chunk missing or corrupted",
1810 ]);
1811 }
1812 }
1813 let num_objects = fanout(255) as usize;
1814
1815 let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
1817 else {
1818 midx_die(&["multi-pack-index required OID lookup chunk missing or corrupted"]);
1819 };
1820 if oidl_len != hash_len * num_objects {
1821 midx_die(&[
1822 "multi-pack-index OID lookup chunk is the wrong size",
1823 "multi-pack-index required OID lookup chunk missing or corrupted",
1824 ]);
1825 }
1826
1827 let Some((ooff_off, ooff_len)) =
1829 toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
1830 else {
1831 midx_die(&["multi-pack-index required object offsets chunk missing or corrupted"]);
1832 };
1833 if ooff_len != num_objects * 8 {
1834 midx_die(&[
1835 "multi-pack-index object offset chunk is the wrong size",
1836 "multi-pack-index required object offsets chunk missing or corrupted",
1837 ]);
1838 }
1839
1840 let loff = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
1841
1842 if let Some((_, rlen)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_REVINDEX) {
1844 if rlen != num_objects * 4 {
1845 midx_warn_once("error: multi-pack-index reverse-index chunk is the wrong size");
1846 midx_warn_once("warning: multi-pack bitmap is missing required reverse index");
1847 }
1848 }
1849
1850 let mut pack_names: Vec<String> = Vec::with_capacity(num_packs);
1852 let blob = &data[pn_off..pn_off + pn_len];
1853 let mut start = 0usize;
1854 for _ in 0..num_packs {
1855 let Some(rel) = blob[start..].iter().position(|&b| b == 0) else {
1856 midx_die(&["multi-pack-index pack-name chunk is too short"]);
1857 };
1858 let name = match std::str::from_utf8(&blob[start..start + rel]) {
1859 Ok(s) => s.to_string(),
1860 Err(_) => midx_die(&["multi-pack-index pack-name chunk is too short"]),
1861 };
1862 if version == MIDX_VERSION_V1
1863 && !pack_names.is_empty()
1864 && name.as_str() <= pack_names.last().map(|s| s.as_str()).unwrap_or("")
1865 {
1866 midx_die(&[&format!(
1867 "multi-pack-index pack names out of order: '{}' before '{name}'",
1868 pack_names.last().cloned().unwrap_or_default()
1869 )]);
1870 }
1871 pack_names.push(name);
1872 start += rel + 1;
1873 }
1874
1875 MidxLoadResult::Ok(MidxReadView {
1876 oidf_off,
1877 oidl_off,
1878 ooff_off,
1879 loff,
1880 num_objects,
1881 pack_names,
1882 })
1883}
1884
1885pub fn validate_midx_referenced_packs(objects_dir: &Path) {
1894 use std::sync::Mutex;
1895 use std::sync::OnceLock;
1896 static DONE: OnceLock<Mutex<HashSet<std::path::PathBuf>>> = OnceLock::new();
1897 let done = DONE.get_or_init(|| Mutex::new(HashSet::new()));
1898 if let Ok(mut set) = done.lock() {
1899 if !set.insert(objects_dir.to_path_buf()) {
1900 return;
1901 }
1902 }
1903
1904 let pack_dir = objects_dir.join("pack");
1905 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1906 return;
1907 };
1908 let Ok(data) = fs::read(&midx_path) else {
1909 return;
1910 };
1911 let MidxReadView { pack_names, .. } =
1912 match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1913 MidxLoadResult::Ok(v) => v,
1914 MidxLoadResult::Skip => return,
1915 };
1916 for idx_name in &pack_names {
1917 let idx_path = pack_dir.join(idx_name);
1918 if !idx_path.exists() {
1922 continue;
1923 }
1924 if crate::pack::read_pack_index_no_verify(&idx_path).is_err() {
1930 let mut pack_path = idx_path.clone();
1931 pack_path.set_extension("pack");
1932 midx_warn_once(&format!(
1933 "error: packfile {} index unavailable",
1934 pack_path.display()
1935 ));
1936 }
1937 }
1938}
1939
1940pub fn try_read_object_via_midx(
1945 objects_dir: &Path,
1946 oid: &ObjectId,
1947) -> Result<Option<crate::objects::Object>> {
1948 let pack_dir = objects_dir.join("pack");
1949 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1950 return Ok(None);
1951 };
1952 let data = fs::read(&midx_path).map_err(Error::Io)?;
1953
1954 let MidxReadView {
1958 oidf_off,
1959 oidl_off,
1960 ooff_off,
1961 loff,
1962 num_objects,
1963 pack_names,
1964 } = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
1965 MidxLoadResult::Ok(v) => v,
1966 MidxLoadResult::Skip => return Ok(None),
1967 };
1968
1969 let first = oid.as_bytes()[0] as usize;
1970 let lo = if first == 0 {
1971 0u32
1972 } else {
1973 read_be_u32(&data, oidf_off + (first - 1) * 4)?
1974 };
1975 let hi = read_be_u32(&data, oidf_off + first * 4)?;
1976
1977 let mut pos = None;
1978 let mut i = lo as usize;
1979 while i < hi as usize && i < num_objects {
1980 let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1981 let c = o.cmp(oid);
1982 if c == std::cmp::Ordering::Equal {
1983 pos = Some(i);
1984 break;
1985 }
1986 if c == std::cmp::Ordering::Greater {
1987 break;
1988 }
1989 i += 1;
1990 }
1991 let Some(pos) = pos else {
1992 return Ok(None);
1993 };
1994
1995 let obase = ooff_off + pos * 8;
1996 let pack_id = read_be_u32(&data, obase)?;
1997 let raw_off = read_be_u32(&data, obase + 4)?;
1998 let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
1999 let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
2000 let need = (idx + 1) * 8;
2001 match loff {
2002 Some((loff_off, loff_len)) if loff_len >= need => {
2003 read_be_u64(&data, loff_off + idx * 8)?
2004 }
2005 _ => {
2006 midx_die(&["multi-pack-index large offset out of bounds"]);
2008 }
2009 }
2010 } else {
2011 u64::from(raw_off)
2012 };
2013
2014 let idx_name = pack_names
2015 .get(pack_id as usize)
2016 .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
2017 let idx_path = pack_dir.join(idx_name);
2018 if !idx_path.exists() {
2023 return Ok(None);
2024 }
2025 let idx = match crate::pack::read_pack_index_no_verify(&idx_path) {
2033 Ok(idx) => idx,
2034 Err(_) => {
2035 let mut pack_path = idx_path.clone();
2036 pack_path.set_extension("pack");
2037 midx_warn_once(&format!(
2038 "error: packfile {} index unavailable",
2039 pack_path.display()
2040 ));
2041 return Ok(None);
2042 }
2043 };
2044 crate::pack::read_object_from_pack(&idx, oid).map(Some)
2045}
2046
2047pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
2048 let pack_dir = objects_dir.join("pack");
2049 let path = resolve_tip_midx_path(&pack_dir)
2050 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
2051 let data = fs::read(&path).map_err(Error::Io)?;
2052 let (_, hdr_end, _) = parse_midx_header(&data)?;
2053 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2054 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
2055 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
2056 let (ridx_off, ridx_len) = match find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) {
2062 Ok(r) => r,
2063 Err(_) => {
2064 return Err(Error::CorruptObject(
2065 "could not determine MIDX preferred pack".to_owned(),
2066 ));
2067 }
2068 };
2069
2070 if ridx_len < 4 || ooff_len < 8 {
2071 return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
2072 }
2073 let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
2074 let entry_base = ooff_off + first_oid_idx * 8;
2075 if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
2076 return Err(Error::CorruptObject(
2077 "bad MIDX object-offsets index".to_owned(),
2078 ));
2079 }
2080 let pack_id = read_be_u32(&data, entry_base)?;
2081 let idx = usize::try_from(pack_id)
2082 .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
2083 names
2084 .get(idx)
2085 .cloned()
2086 .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
2087}
2088
2089pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
2096 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2097 scrub_root_midx_sidecars_except(pack_dir, None)?;
2098 let midx_d = midx_d_dir(pack_dir);
2099 if midx_d.exists() {
2100 let _ = fs::remove_dir_all(&midx_d);
2101 }
2102 Ok(())
2103}
2104
2105pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
2106 write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
2107}
2108
2109pub fn write_multi_pack_index_with_options(
2111 pack_dir: &Path,
2112 opts: &WriteMultiPackIndexOptions,
2113) -> Result<()> {
2114 if opts.pack_names_subset_ordered.is_none() {
2117 if let Some(existing) = resolve_tip_midx_path(pack_dir) {
2118 if let Ok(bytes) = fs::read(&existing) {
2119 if midx_checksum_is_valid(&bytes) {
2120 if let Ok((_, existing_names)) = oids_and_packs_from_midx_data(&bytes) {
2124 for (i, name) in existing_names.iter().enumerate() {
2125 let stem = name.strip_suffix(".idx").unwrap_or(name);
2126 if !pack_dir.join(format!("{stem}.pack")).exists() {
2127 eprintln!("error: could not load pack {i}");
2128 return Err(Error::CorruptObject(format!(
2129 "could not load pack {i}"
2130 )));
2131 }
2132 }
2133 }
2134 } else {
2135 eprintln!("warning: ignoring existing multi-pack-index; checksum mismatch");
2136 }
2137 }
2138 }
2139 }
2140
2141 let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
2146 .map(|rd| {
2147 rd.filter_map(|e| e.ok())
2148 .filter_map(|e| {
2149 let name = e.file_name().to_string_lossy().to_string();
2150 let stem = name.strip_suffix(".idx")?;
2151 if pack_dir.join(format!("{stem}.pack")).exists() {
2152 Some(name)
2153 } else {
2154 None
2155 }
2156 })
2157 .collect()
2158 })
2159 .unwrap_or_default();
2160 idx_names.sort();
2161
2162 let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
2163 let mut out = Vec::new();
2164 for line in sub {
2165 let want = normalize_pack_idx_basename(line)?;
2166 if let Some(found) = idx_names.iter().find(|n| **n == want).cloned() {
2167 if !out.contains(&found) {
2168 out.push(found);
2169 }
2170 }
2171 }
2174 out
2175 } else {
2176 idx_names
2177 };
2178
2179 let mut preferred_warned = false;
2182 if let Some(raw) = opts.preferred_pack_name.as_deref() {
2183 if opts.preferred_pack_idx.is_none()
2184 && !idx_names
2185 .iter()
2186 .any(|n| cmp_idx_or_pack_name(raw, n).is_eq())
2187 {
2188 eprintln!("warning: unknown preferred pack: '{raw}'");
2189 preferred_warned = true;
2190 }
2191 }
2192
2193 if idx_names.is_empty() {
2194 eprintln!("error: no pack files to index.");
2196 return Err(Error::CorruptObject("no pack files to index.".to_owned()));
2197 }
2198
2199 let (base_oids, base_pack_names) = if opts.incremental {
2200 collect_incremental_base(pack_dir)?
2201 } else {
2202 (HashSet::new(), HashSet::new())
2203 };
2204
2205 let layer_idx_names: Vec<String> = if opts.incremental {
2206 idx_names
2207 .iter()
2208 .filter(|n| {
2209 !base_pack_names
2210 .iter()
2211 .any(|bp| pack_names_match_layer(bp, n))
2212 })
2213 .cloned()
2214 .collect()
2215 } else {
2216 idx_names.clone()
2217 };
2218
2219 if opts.incremental && layer_idx_names.is_empty() {
2220 return Ok(());
2221 }
2222
2223 let work_names = if opts.incremental {
2224 &layer_idx_names[..]
2225 } else {
2226 &idx_names[..]
2227 };
2228
2229 let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
2230 if preferred_idx.is_none() && !preferred_warned {
2231 if let Some(raw) = opts.preferred_pack_name.as_deref() {
2232 preferred_idx = work_names
2234 .iter()
2235 .position(|n| cmp_idx_or_pack_name(raw, n).is_eq());
2236 }
2237 }
2238 if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
2239 preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
2240 }
2241 if let Some(p) = preferred_idx {
2242 if p >= work_names.len() {
2243 return Err(Error::CorruptObject(
2244 "preferred pack index out of range".to_owned(),
2245 ));
2246 }
2247 }
2248
2249 let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
2250 for name in work_names {
2251 let path = pack_dir.join(name);
2252 indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2255 }
2256
2257 if let Some(p) = preferred_idx {
2259 if indexes.get(p).map(|i| i.entries.len()).unwrap_or(0) == 0 {
2260 let name = work_names.get(p).cloned().unwrap_or_default();
2261 let pack_name = name.strip_suffix(".idx").unwrap_or(&name);
2262 eprintln!("error: cannot select preferred pack {pack_name}.pack with no objects");
2263 return Err(Error::CorruptObject(
2264 "cannot select preferred pack with no objects".to_owned(),
2265 ));
2266 }
2267 }
2268
2269 let pack_mtimes_layer: Vec<std::time::SystemTime> =
2270 indexes.iter().map(pack_mtime_for_midx).collect();
2271 let preferred_u32 = preferred_idx.map(|p| p as u32);
2272
2273 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
2274 for (pack_id, idx) in indexes.iter().enumerate() {
2275 let pack_id = u32::try_from(pack_id).map_err(|_| {
2276 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
2277 })?;
2278 let mtime = pack_mtimes_layer[pack_id as usize];
2279 for e in &idx.entries {
2280 if e.oid.len() != 20 {
2281 continue;
2282 }
2283 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
2284 continue;
2285 };
2286 if opts.incremental && base_oids.contains(&oid) {
2287 continue;
2288 }
2289 let cand = MidxEntry {
2290 oid,
2291 pack_id,
2292 offset: e.offset,
2293 pack_mtime: mtime,
2294 };
2295 match best.get(&oid) {
2296 None => {
2297 best.insert(oid, cand);
2298 }
2299 Some(cur) => {
2300 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
2301 best.insert(oid, cand);
2302 }
2303 }
2304 }
2305 }
2306 }
2307
2308 let bitmap_placeholders =
2309 opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
2310
2311 let omit_embedded_ridx = opts.write_rev_placeholder;
2312 let exclude = if opts.incremental && !base_oids.is_empty() {
2316 Some(&base_oids)
2317 } else {
2318 None
2319 };
2320 let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2321 work_names,
2322 &indexes,
2323 preferred_idx,
2324 bitmap_placeholders,
2325 omit_embedded_ridx,
2326 opts.version.unwrap_or(MIDX_VERSION_V2),
2327 repo_midx_hash_version(pack_dir),
2328 exclude,
2329 )?;
2330
2331 let hash = &out[out.len() - 20..];
2332 let hash_hex = hex::encode(hash);
2333 let hash_arr: [u8; 20] = hash
2334 .try_into()
2335 .map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
2336
2337 if opts.incremental {
2338 let root_midx = pack_dir.join("multi-pack-index");
2339 let chain_path = chain_file_path(pack_dir);
2340 let chain_existed = chain_path.exists();
2341
2342 let mut chain = if root_midx.exists() && !chain_existed {
2343 let root_hex = midx_checksum_hex_from_path(&root_midx)?;
2344 link_root_midx_into_chain(pack_dir, &root_hex)?;
2345 vec![root_hex]
2346 } else {
2347 read_chain_layer_hashes(pack_dir).unwrap_or_default()
2348 };
2349
2350 chain.push(hash_hex.clone());
2351
2352 let midx_d = midx_d_dir(pack_dir);
2353 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2354
2355 let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2356 fs::write(&layer_path, &out).map_err(Error::Io)?;
2357
2358 let mut chain_data = String::new();
2359 for h in &chain {
2360 chain_data.push_str(h);
2361 chain_data.push('\n');
2362 }
2363 fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2364
2365 clear_stale_split_layers(pack_dir, &chain)?;
2366
2367 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
2368 scrub_root_midx_sidecars(pack_dir)?;
2369 if bitmap_placeholders {
2370 let full = hex::encode(hash);
2371 fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
2372 .map_err(Error::Io)?;
2373 if opts.write_rev_placeholder {
2374 let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
2375 if let Some(order) = rev_sidecar_order.as_ref() {
2376 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2377 } else {
2378 fs::write(rev_path, []).map_err(Error::Io)?;
2379 }
2380 }
2381 }
2382 } else {
2383 let dest = pack_dir.join("multi-pack-index");
2389
2390 let bitmap_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap"));
2394 let bitmap_ok = !opts.write_bitmap_placeholders || bitmap_path.exists();
2395 if bitmap_ok && !chain_file_path(pack_dir).exists() {
2399 if let Ok(existing) = fs::read(&dest) {
2400 if existing == out {
2401 return Ok(());
2402 }
2403 }
2404 }
2405
2406 clear_incremental_midx_files(pack_dir)?;
2407
2408 fs::write(&dest, &out).map_err(Error::Io)?;
2409
2410 scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
2411
2412 if opts.write_bitmap_placeholders {
2413 fs::write(
2414 pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2415 [],
2416 )
2417 .map_err(Error::Io)?;
2418 if opts.write_rev_placeholder {
2419 let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
2420 if let Some(order) = rev_sidecar_order.as_ref() {
2421 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2422 } else {
2423 fs::write(rev_path, []).map_err(Error::Io)?;
2424 }
2425 }
2426 }
2427 }
2428
2429 Ok(())
2430}
2431
2432fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
2433 if base_name == disk_idx {
2434 return true;
2435 }
2436 cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
2437}
2438
2439#[derive(Debug)]
2442pub enum CompactError {
2443 NoChain,
2445 MissingEndpoint(String),
2448 IdenticalEndpoints,
2450 NotAncestor(String, String),
2453 V1Format,
2455 Other(String),
2457}
2458
2459impl std::fmt::Display for CompactError {
2460 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2461 match self {
2462 CompactError::NoChain => write!(f, "no multi-pack-index chain to compact"),
2463 CompactError::MissingEndpoint(s) => write!(f, "could not find MIDX: {s}"),
2464 CompactError::IdenticalEndpoints => {
2465 write!(f, "MIDX compaction endpoints must be unique")
2466 }
2467 CompactError::NotAncestor(from, to) => {
2468 write!(f, "MIDX {from} must be an ancestor of {to}")
2469 }
2470 CompactError::V1Format => write!(f, "cannot perform MIDX compaction with v1 format"),
2471 CompactError::Other(s) => write!(f, "{s}"),
2472 }
2473 }
2474}
2475
2476impl From<Error> for CompactError {
2477 fn from(e: Error) -> Self {
2478 CompactError::Other(e.to_string())
2479 }
2480}
2481
2482fn collect_layer_oids(pack_dir: &Path, hashes: &[String]) -> Result<HashSet<ObjectId>> {
2485 let mut oids = HashSet::new();
2486 for h in hashes {
2487 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
2488 let data = load_midx_file(&p)?;
2489 let (layer_oids, _) = oids_and_packs_from_midx_data(&data)?;
2490 oids.extend(layer_oids);
2491 }
2492 Ok(oids)
2493}
2494
2495fn layer_pack_names(pack_dir: &Path, hash: &str) -> Result<Vec<String>> {
2497 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{hash}.midx"));
2498 let data = load_midx_file(&p)?;
2499 let (_, hdr_end, _) = parse_midx_header(&data)?;
2500 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
2501 parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
2502}
2503
2504pub fn compact_multi_pack_index(
2513 pack_dir: &Path,
2514 from_arg: &str,
2515 to_arg: &str,
2516 write_bitmaps: bool,
2517 write_rev: bool,
2518 version: Option<u8>,
2519) -> std::result::Result<(), CompactError> {
2520 if version == Some(MIDX_VERSION_V1) {
2521 return Err(CompactError::V1Format);
2522 }
2523
2524 let chain = read_chain_layer_hashes(pack_dir).map_err(|_| CompactError::NoChain)?;
2525 if chain.is_empty() {
2526 return Err(CompactError::NoChain);
2527 }
2528
2529 let from_hex = from_arg.to_ascii_lowercase();
2530 let to_hex = to_arg.to_ascii_lowercase();
2531
2532 let from_pos = chain.iter().position(|h| *h == from_hex);
2533 let to_pos = chain.iter().position(|h| *h == to_hex);
2534
2535 let Some(from_pos) = from_pos else {
2537 return Err(CompactError::MissingEndpoint(from_arg.to_string()));
2538 };
2539 let Some(to_pos) = to_pos else {
2540 return Err(CompactError::MissingEndpoint(to_arg.to_string()));
2541 };
2542
2543 if from_pos == to_pos {
2544 return Err(CompactError::IdenticalEndpoints);
2545 }
2546 if from_pos > to_pos {
2550 return Err(CompactError::NotAncestor(
2551 from_arg.to_string(),
2552 to_arg.to_string(),
2553 ));
2554 }
2555
2556 let base_hashes = &chain[..from_pos];
2559 let merged_hashes = &chain[from_pos..=to_pos];
2560 let upper_hashes = &chain[to_pos + 1..];
2561
2562 let base_oids = collect_layer_oids(pack_dir, base_hashes)?;
2563
2564 let mut ordered_idx_names: Vec<String> = Vec::new();
2567 for h in merged_hashes {
2568 for name in layer_pack_names(pack_dir, h)? {
2569 if !ordered_idx_names.contains(&name) {
2570 ordered_idx_names.push(name);
2571 }
2572 }
2573 }
2574
2575 if ordered_idx_names.is_empty() {
2576 return Err(CompactError::Other(
2577 "no packs found in compaction range".to_owned(),
2578 ));
2579 }
2580
2581 let mut indexes: Vec<PackIndex> = Vec::with_capacity(ordered_idx_names.len());
2583 for name in &ordered_idx_names {
2584 let path = pack_dir.join(name);
2585 indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
2586 }
2587
2588 let preferred_idx = if write_bitmaps { Some(0usize) } else { None };
2591
2592 let exclude = if base_oids.is_empty() {
2593 None
2594 } else {
2595 Some(&base_oids)
2596 };
2597
2598 let (out, rev_sidecar_order) = build_midx_bytes_filtered(
2599 &ordered_idx_names,
2600 &indexes,
2601 preferred_idx,
2602 write_bitmaps,
2603 write_rev,
2604 version.unwrap_or(MIDX_VERSION_V2),
2605 repo_midx_hash_version(pack_dir),
2606 exclude,
2607 )?;
2608
2609 let hash = &out[out.len() - 20..];
2610 let hash_hex = hex::encode(hash);
2611 let hash_arr: [u8; 20] = hash
2612 .try_into()
2613 .map_err(|_| CompactError::Other("midx hash length mismatch".to_owned()))?;
2614
2615 let midx_d = midx_d_dir(pack_dir);
2616 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
2617
2618 let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
2619 fs::write(&layer_path, &out).map_err(Error::Io)?;
2620
2621 let mut new_chain: Vec<String> = Vec::new();
2623 new_chain.extend(base_hashes.iter().cloned());
2624 new_chain.push(hash_hex.clone());
2625 new_chain.extend(upper_hashes.iter().cloned());
2626
2627 let mut chain_data = String::new();
2628 for h in &new_chain {
2629 chain_data.push_str(h);
2630 chain_data.push('\n');
2631 }
2632 fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
2633
2634 if write_bitmaps {
2635 fs::write(
2636 midx_d.join(format!("multi-pack-index-{hash_hex}.bitmap")),
2637 [],
2638 )
2639 .map_err(Error::Io)?;
2640 let rev_path = midx_d.join(format!("multi-pack-index-{hash_hex}.rev"));
2641 if write_rev {
2642 if let Some(order) = rev_sidecar_order.as_ref() {
2643 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
2644 } else {
2645 fs::write(rev_path, []).map_err(Error::Io)?;
2646 }
2647 }
2648 }
2649
2650 clear_stale_split_layers(pack_dir, &new_chain)?;
2652
2653 Ok(())
2654}
2655
2656fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
2657 scrub_root_midx_sidecars_except(pack_dir, None)
2658}
2659
2660fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
2661 let Ok(rd) = fs::read_dir(pack_dir) else {
2662 return Ok(());
2663 };
2664 for ent in rd {
2665 let ent = ent.map_err(Error::Io)?;
2666 let name = ent.file_name().to_string_lossy().to_string();
2667 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
2668 continue;
2669 };
2670 if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
2671 continue;
2672 }
2673 let hash_part = rest
2674 .strip_suffix(".bitmap")
2675 .or_else(|| rest.strip_suffix(".rev"))
2676 .unwrap_or(rest);
2677 if keep_hex.is_some_and(|k| k == hash_part) {
2681 continue;
2682 }
2683 let _ = fs::remove_file(ent.path());
2684 }
2685 Ok(())
2686}