1use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::ObjectId;
21use crate::pack::{read_pack_index, PackIndex};
22
23const MIDX_SIGNATURE: u32 = 0x4d49_4458;
24const MIDX_VERSION_V1: u8 = 1;
25const HASH_VERSION_SHA1: u8 = 1;
26const MIDX_HEADER_SIZE: usize = 12;
27const CHUNK_TOC_ENTRY_SIZE: usize = 12;
28const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
29const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
30const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
31const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
32const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
33const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
34const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
35
36const RIDX_SIGNATURE: u32 = 0x5249_4458;
38const RIDX_VERSION: u32 = 1;
39const RIDX_HEADER_SIZE: usize = 12;
40const MIDX_CHUNK_ALIGNMENT: usize = 4;
41
42const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
44
45struct MidxEntry {
46 oid: ObjectId,
47 pack_id: u32,
48 offset: u64,
49 pack_mtime: std::time::SystemTime,
50}
51
52#[derive(Debug, Clone, Default)]
54pub struct WriteMultiPackIndexOptions {
55 pub preferred_pack_idx: Option<u32>,
58 pub preferred_pack_name: Option<String>,
61 pub pack_names_subset_ordered: Option<Vec<String>>,
63 pub write_bitmap_placeholders: bool,
65 pub incremental: bool,
68 pub write_rev_placeholder: bool,
71}
72
73fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
74 let t = raw.trim();
75 let t = std::path::Path::new(t)
76 .file_name()
77 .and_then(|s| s.to_str())
78 .unwrap_or(t);
79 let t = t.strip_prefix("./").unwrap_or(t);
80 if t.ends_with(".idx") {
81 Ok(t.to_string())
82 } else if t.ends_with(".pack") {
83 Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
84 } else {
85 Ok(format!("{t}.idx"))
86 }
87}
88
89fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
95 let end = off.checked_add(4).filter(|&e| e <= data.len());
96 let Some(end) = end else {
97 return Err(Error::CorruptObject(
98 "truncated MIDX data reading u32".to_owned(),
99 ));
100 };
101 let bytes: [u8; 4] = data[off..end]
102 .try_into()
103 .map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
104 Ok(u32::from_be_bytes(bytes))
105}
106
107fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
113 let end = off.checked_add(8).filter(|&e| e <= data.len());
114 let Some(end) = end else {
115 return Err(Error::CorruptObject(
116 "truncated MIDX data reading u64".to_owned(),
117 ));
118 };
119 let bytes: [u8; 8] = data[off..end]
120 .try_into()
121 .map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
122 Ok(u64::from_be_bytes(bytes))
123}
124
125struct MidxFileHeader {
126 num_chunks: u8,
127}
128
129fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
130 if data.len() < MIDX_HEADER_SIZE + 20 {
131 return Err(Error::CorruptObject("midx file too small".to_owned()));
132 }
133 let sig = read_be_u32(data, 0)?;
134 if sig != MIDX_SIGNATURE {
135 return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
136 }
137 let version = data[4];
138 if version != MIDX_VERSION_V1 {
139 return Err(Error::CorruptObject(format!(
140 "unsupported MIDX version {version}"
141 )));
142 }
143 let object_hash_bytes = data[5];
144 let num_chunks = data[6];
145 let _num_packs = read_be_u32(data, 8)?;
146 Ok((
147 MidxFileHeader { num_chunks },
148 MIDX_HEADER_SIZE,
149 object_hash_bytes,
150 ))
151}
152
153fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
154 let mut names = Vec::new();
155 let mut start = 0usize;
156 for (i, &b) in pn.iter().enumerate() {
157 if b == 0 && i >= start {
158 if i > start {
159 let s = std::str::from_utf8(&pn[start..i])
160 .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
161 names.push(s.to_string());
162 }
163 start = i + 1;
164 }
165 }
166 Ok(names)
167}
168
169fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
171 let a = idx_or_pack_name.as_bytes();
172 let b = idx_name.as_bytes();
173 let mut i = 0usize;
174 let min = a.len().min(b.len());
175 while i < min && a[i] == b[i] {
176 i += 1;
177 }
178 let suf_a = &a[i..];
179 let suf_b = &b[i..];
180 if suf_b == b"idx" && suf_a == b"pack" {
181 return std::cmp::Ordering::Equal;
182 }
183 suf_a.cmp(suf_b)
184}
185
186fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
187 let mut best: Option<(usize, std::time::SystemTime)> = None;
188 for (i, n) in names.iter().enumerate() {
189 let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
190 let mtime = meta.modified().map_err(Error::Io)?;
191 match best {
192 None => best = Some((i, mtime)),
193 Some((_, t)) if mtime < t => best = Some((i, mtime)),
194 _ => {}
195 }
196 }
197 Ok(best.map(|(i, _)| i))
198}
199
200fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
201 pack_dir.join("multi-pack-index.d")
202}
203
204fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
205 midx_d_dir(pack_dir).join("multi-pack-index-chain")
206}
207
208fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
209 let path = chain_file_path(pack_dir);
210 let f = fs::File::open(&path).map_err(Error::Io)?;
211 let mut out = Vec::new();
212 for line in BufReader::new(f).lines() {
213 let line = line.map_err(Error::Io)?;
214 let t = line.trim();
215 if t.is_empty() {
216 continue;
217 }
218 if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
219 return Err(Error::CorruptObject(format!(
220 "invalid multi-pack-index chain line: {t}"
221 )));
222 }
223 out.push(t.to_ascii_lowercase());
224 }
225 Ok(out)
226}
227
228pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
230 let root = pack_dir.join("multi-pack-index");
231 if root.exists() {
232 return Some(root);
233 }
234 let hashes = read_chain_layer_hashes(pack_dir).ok()?;
235 let last = hashes.last()?;
236 Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
237}
238
239fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
240 let data = fs::read(path).map_err(Error::Io)?;
241 let _ = parse_midx_header(&data)?;
242 Ok(data)
243}
244
245fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
246 let (_, hdr_end, _) = parse_midx_header(data)?;
247 let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
248 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
249 let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
250 let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
251 let num_objects = ooff_len / 8;
252 if oidl_len != num_objects * 20 {
253 return Err(Error::CorruptObject(
254 "MIDX oid-lookup size mismatch".to_owned(),
255 ));
256 }
257 let mut oids = HashSet::with_capacity(num_objects);
258 for i in 0..num_objects {
259 let start = oidl_off + i * 20;
260 let oid = ObjectId::from_bytes(&data[start..start + 20])?;
261 oids.insert(oid);
262 }
263 Ok((oids, pack_names))
264}
265
266fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
267 let mut oids = HashSet::new();
268 let mut packs = HashSet::new();
269 let root = pack_dir.join("multi-pack-index");
270 let chain_path = chain_file_path(pack_dir);
271 if chain_path.exists() {
272 for h in read_chain_layer_hashes(pack_dir)? {
273 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
274 let data = load_midx_file(&p)?;
275 let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
276 oids.extend(layer_oids);
277 for n in names {
278 packs.insert(n);
279 }
280 }
281 return Ok((oids, packs));
282 }
283 if root.exists() {
284 let data = load_midx_file(&root)?;
285 let (o, names) = oids_and_packs_from_midx_data(&data)?;
286 oids = o;
287 for n in names {
288 packs.insert(n);
289 }
290 }
291 Ok((oids, packs))
292}
293
294fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
295 let data = fs::read(path).map_err(Error::Io)?;
296 if data.len() < 20 {
297 return Err(Error::CorruptObject(
298 "midx too small for checksum".to_owned(),
299 ));
300 }
301 let hash = &data[data.len() - 20..];
302 Ok(hex::encode(hash))
303}
304
305fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
306 let _ = fs::remove_file(dst);
307 if fs::hard_link(src, dst).is_ok() {
308 return Ok(());
309 }
310 fs::copy(src, dst).map_err(Error::Io)?;
311 Ok(())
312}
313
314fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
315 let midx_d = midx_d_dir(pack_dir);
316 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
317 let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
318 hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
319 let exts = ["bitmap", "rev"];
320 for ext in exts {
321 let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
322 if src.exists() {
323 let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
324 hard_link_or_copy(&src, &dst)?;
325 }
326 }
327 Ok(())
328}
329
330fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
331 let midx_d = midx_d_dir(pack_dir);
332 if !midx_d.exists() {
333 return Ok(());
334 }
335 let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
336 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
337 let ent = ent.map_err(Error::Io)?;
338 let name = ent.file_name().to_string_lossy().to_string();
339 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
340 continue;
341 };
342 let Some((hash_part, _ext)) = rest.split_once('.') else {
343 continue;
344 };
345 if hash_part.len() == 40 && !keep.contains(hash_part) {
346 let _ = fs::remove_file(ent.path());
347 }
348 }
349 Ok(())
350}
351
352fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
353 fs::metadata(&idx.pack_path)
354 .and_then(|m| m.modified())
355 .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
356}
357
358fn midx_pick_better_entry(
359 cur: &MidxEntry,
360 cand_pack: u32,
361 cand_offset: u64,
362 cand_mtime: std::time::SystemTime,
363 preferred_pack: Option<u32>,
364) -> bool {
365 let cur_pref = preferred_pack == Some(cur.pack_id);
366 let new_pref = preferred_pack == Some(cand_pack);
367 if new_pref && !cur_pref {
368 return true;
369 }
370 if cur_pref && !new_pref {
371 return false;
372 }
373 match cand_mtime.cmp(&cur.pack_mtime) {
374 std::cmp::Ordering::Greater => true,
375 std::cmp::Ordering::Less => false,
376 std::cmp::Ordering::Equal => {
377 if cand_pack != cur.pack_id {
378 cand_pack < cur.pack_id
379 } else {
380 cand_offset < cur.offset
381 }
382 }
383 }
384}
385
386fn build_midx_bytes(
387 idx_names: &[String],
388 indexes: &[PackIndex],
389 preferred_idx: Option<usize>,
390 write_bitmap_placeholders: bool,
391 omit_embedded_ridx_chunk: bool,
392) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
393 let preferred_pack_idx = preferred_idx.map(|p| p as u32);
394 let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
395
396 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
397 for (pack_id, idx) in indexes.iter().enumerate() {
398 let pack_id = u32::try_from(pack_id).map_err(|_| {
399 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
400 })?;
401 let mtime = pack_mtimes[pack_id as usize];
402 for e in &idx.entries {
403 if e.oid.len() != 20 {
404 continue;
405 }
406 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
407 continue;
408 };
409 let cand = MidxEntry {
410 oid,
411 pack_id,
412 offset: e.offset,
413 pack_mtime: mtime,
414 };
415 match best.get(&oid) {
416 None => {
417 best.insert(oid, cand);
418 }
419 Some(cur) => {
420 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
421 best.insert(oid, cand);
422 }
423 }
424 }
425 }
426 }
427
428 let mut entries: Vec<MidxEntry> = best.into_values().collect();
429 entries.sort_by_key(|a| a.oid);
430
431 let mut large_offsets: Vec<u64> = Vec::new();
432 for e in &entries {
433 if e.offset > u64::from(u32::MAX) {
434 return Err(Error::CorruptObject(
435 "object offset does not fit in multi-pack-index".to_owned(),
436 ));
437 }
438 }
439
440 let num_packs = indexes.len() as u32;
441
442 let mut pack_names_blob = Vec::new();
443 for name in idx_names {
444 pack_names_blob.extend_from_slice(name.as_bytes());
445 pack_names_blob.push(0);
446 }
447 let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
448 % MIDX_CHUNK_ALIGNMENT;
449 pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
450 let chunk_pnam = pack_names_blob;
451
452 let mut chunk_oidf = vec![0u8; 256 * 4];
453 let mut j = 0usize;
454 for i in 0..256 {
455 while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
456 j += 1;
457 }
458 chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
459 }
460
461 let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
462 for e in &entries {
463 chunk_oidl.extend_from_slice(e.oid.as_bytes());
464 }
465
466 let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
467 for e in &entries {
468 chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
469 let needs_large = e.offset >= u64::from(MIDX_LARGE_OFFSET_NEEDED);
470 let encoded = if needs_large {
471 let slot = u32::try_from(large_offsets.len()).map_err(|_| {
472 Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
473 })?;
474 large_offsets.push(e.offset);
475 MIDX_LARGE_OFFSET_NEEDED | slot
476 } else {
477 u32::try_from(e.offset).map_err(|_| {
478 Error::CorruptObject("object offset overflow in multi-pack-index".to_owned())
479 })?
480 };
481 chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
482 }
483
484 let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
485 Vec::new()
486 } else {
487 let mut v = Vec::with_capacity(large_offsets.len() * 8);
488 for off in &large_offsets {
489 v.extend_from_slice(&off.to_be_bytes());
490 }
491 v
492 };
493
494 let pref = preferred_pack_idx;
495 let mut order: Vec<u32> = (0..entries.len() as u32).collect();
496 order.sort_by(|&ai, &bi| {
497 let a = &entries[ai as usize];
498 let b = &entries[bi as usize];
499 let a_pref = pref == Some(a.pack_id);
500 let b_pref = pref == Some(b.pack_id);
501 b_pref
502 .cmp(&a_pref)
503 .then_with(|| a.pack_id.cmp(&b.pack_id))
504 .then_with(|| a.offset.cmp(&b.offset))
505 .then_with(|| ai.cmp(&bi))
506 });
507
508 let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
509 for oid_idx in &order {
510 chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
511 }
512
513 let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
516 Some(order.clone())
517 } else {
518 None
519 };
520 let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
521 let mut v = Vec::new();
522 let mut cumulative = 0u32;
523 for idx in indexes {
524 let n = u32::try_from(idx.entries.len()).map_err(|_| {
525 Error::CorruptObject("too many objects in pack for MIDX BTMP".to_owned())
526 })?;
527 v.extend_from_slice(&cumulative.to_be_bytes());
528 v.extend_from_slice(&n.to_be_bytes());
529 cumulative = cumulative.saturating_add(n);
530 }
531 let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
532 v.extend(std::iter::repeat_n(0u8, pad));
533 v
534 } else {
535 Vec::new()
536 };
537
538 let mut chunks: Vec<(u32, Vec<u8>)> = vec![
539 (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
540 (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
541 (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
542 (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
543 ];
544 if !chunk_loff.is_empty() {
545 chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
546 }
547 if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
548 chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
549 }
550 if write_bitmap_placeholders {
551 chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
552 }
553
554 let num_chunks: u8 = chunks
555 .len()
556 .try_into()
557 .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
558
559 let mut body = Vec::new();
560 let mut cur_offset =
561 MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
562
563 for (id, data) in &chunks {
564 body.extend_from_slice(&id.to_be_bytes());
565 body.extend_from_slice(&cur_offset.to_be_bytes());
566 cur_offset += data.len() as u64;
567 }
568 body.extend_from_slice(&0u32.to_be_bytes());
569 body.extend_from_slice(&cur_offset.to_be_bytes());
570
571 for (_, data) in &chunks {
572 body.extend_from_slice(data);
573 }
574
575 let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
576 out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
577 out.push(MIDX_VERSION_V1);
578 out.push(HASH_VERSION_SHA1);
579 out.push(num_chunks);
580 out.push(0);
581 out.extend_from_slice(&num_packs.to_be_bytes());
582 out.extend_from_slice(&body);
583
584 let mut hasher = Sha1::new();
585 hasher.update(&out);
586 let hash = hasher.finalize();
587 out.extend_from_slice(&hash);
588
589 Ok((out, rev_sidecar_order))
590}
591
592fn write_midx_rev_sidecar(
594 path: &Path,
595 pack_order: &[u32],
596 midx_file_hash: &[u8; 20],
597) -> Result<()> {
598 let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
599 body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
600 body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
601 body.extend_from_slice(&1u32.to_be_bytes());
602 for idx in pack_order {
603 body.extend_from_slice(&idx.to_be_bytes());
604 }
605 body.extend_from_slice(midx_file_hash);
606 fs::write(path, body).map_err(Error::Io)
607}
608
609fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
610 let (hdr, _, _) = parse_midx_header(data)?;
611 let n = hdr.num_chunks as usize;
612 let pos = header_end;
613 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
614 if data.len() < toc_end + 20 {
615 return Err(Error::CorruptObject(
616 "truncated MIDX chunk table".to_owned(),
617 ));
618 }
619 for i in 0..n {
620 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
621 let id = read_be_u32(data, base)?;
622 let off = read_be_u64(data, base + 4)? as usize;
623 if id == chunk_id {
624 let next_off = if i + 1 < n {
625 let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
626 read_be_u64(data, nb + 4)? as usize
627 } else {
628 let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
629 read_be_u64(data, term + 4)? as usize
630 };
631 return Ok((off, next_off.saturating_sub(off)));
632 }
633 }
634 Err(Error::CorruptObject(format!(
635 "MIDX chunk {chunk_id:08x} not found"
636 )))
637}
638
639pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
646 let pack_dir = objects_dir.join("pack");
647 let path = resolve_tip_midx_path(&pack_dir)
648 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
649 let data = fs::read(&path).map_err(Error::Io)?;
650 let (_, hdr_end, _) = parse_midx_header(&data)?;
651 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
652 parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
653}
654
655pub struct MidxObjectRef {
657 pub oid: ObjectId,
658 pub pack_int_id: usize,
660}
661
662pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
666 let pack_dir = objects_dir.join("pack");
667 let path = resolve_tip_midx_path(&pack_dir)
668 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
669 let data = fs::read(&path).map_err(Error::Io)?;
670 let (_, hdr_end, _) = parse_midx_header(&data)?;
671 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
672 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
673 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
674 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
675 if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
676 return Err(Error::CorruptObject(
677 "bad MIDX oid-lookup / object-offsets size".to_owned(),
678 ));
679 }
680 let num = oidl_len / 20;
681 if num * 8 != ooff_len {
682 return Err(Error::CorruptObject(
683 "MIDX oid count does not match object-offsets".to_owned(),
684 ));
685 }
686 let mut objects = Vec::with_capacity(num);
687 for i in 0..num {
688 let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
689 .map_err(|e| Error::CorruptObject(e.to_string()))?;
690 let base = ooff_off + i * 8;
691 let pack_id = read_be_u32(&data, base)? as usize;
692 objects.push(MidxObjectRef {
693 oid,
694 pack_int_id: pack_id,
695 });
696 }
697 Ok((names, objects))
698}
699
700pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
702 let pack_dir = objects_dir.join("pack");
703 let path = resolve_tip_midx_path(&pack_dir)
704 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
705 midx_checksum_hex_from_path(&path)
706}
707
708pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
711 let mut out = format_midx_dump(objects_dir)?;
712 let pack_dir = objects_dir.join("pack");
713 let path = resolve_tip_midx_path(&pack_dir)
714 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
715 let data = fs::read(&path).map_err(Error::Io)?;
716 let (_, hdr_end, _) = parse_midx_header(&data)?;
717 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
718 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
719 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
720 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
721 if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
722 return Err(Error::CorruptObject(
723 "bad MIDX oid-lookup / object-offsets size".to_owned(),
724 ));
725 }
726 let num = oidl_len / 20;
727 if num * 8 != ooff_len {
728 return Err(Error::CorruptObject(
729 "MIDX oid count does not match object-offsets".to_owned(),
730 ));
731 }
732 for i in 0..num {
733 let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
734 .map_err(|e| Error::CorruptObject(e.to_string()))?;
735 let base = ooff_off + i * 8;
736 let pack_id = read_be_u32(&data, base)? as usize;
737 let offset = u64::from(read_be_u32(&data, base + 4)?);
738 let pack_name = names
739 .get(pack_id)
740 .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
741 out.push_str(&format!("{} {}\t{}\n", oid.to_hex(), offset, pack_name));
742 }
743 Ok(out)
744}
745
746pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
747 let pack_dir = objects_dir.join("pack");
748 let path = resolve_tip_midx_path(&pack_dir)
749 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
750 let data = fs::read(&path).map_err(Error::Io)?;
751 let (hdr, hdr_end, _) = parse_midx_header(&data)?;
752 let sig = read_be_u32(&data, 0)?;
753 let version = data[4];
754 let hash_len = data[5];
755 let num_chunks = hdr.num_chunks;
756 let num_packs = read_be_u32(&data, 8)?;
757
758 let mut chunk_tags: Vec<&'static str> = Vec::new();
759 let n = num_chunks as usize;
760 let pos = hdr_end;
761 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
762 if data.len() < toc_end + 20 {
763 return Err(Error::CorruptObject(
764 "truncated MIDX chunk table".to_owned(),
765 ));
766 }
767 for i in 0..n {
768 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
769 let id = read_be_u32(&data, base)?;
770 let tag = match id {
771 x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
772 x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
773 x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
774 x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
775 x if x == MIDX_CHUNKID_REVINDEX => "revindex",
776 x if x == 0x4254_4d50 => "bitmapped-packs",
777 _ => "unknown",
778 };
779 chunk_tags.push(tag);
780 }
781
782 let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
783 let num_objects = ooff_len / 8;
784
785 let pack_names = read_midx_pack_idx_names(objects_dir)?;
786
787 let mut out = String::new();
788 out.push_str(&format!(
789 "header: {:08x} {} {} {} {}\n",
790 sig, version, hash_len, num_chunks, num_packs
791 ));
792 out.push_str("chunks:");
793 for t in &chunk_tags {
794 out.push(' ');
795 out.push_str(t);
796 }
797 out.push('\n');
798 out.push_str(&format!("num_objects: {num_objects}\n"));
799 out.push_str("packs:\n");
800 for n in &pack_names {
801 out.push_str(n);
802 out.push('\n');
803 }
804 out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
805 Ok(out)
806}
807
808#[derive(Debug, Clone)]
814pub struct MidxReuseTables {
815 pub oids: Vec<ObjectId>,
817 pub pack_and_offset: Vec<(u32, u64)>,
819 pub rid_order: Vec<u32>,
821 pub oid_idx_to_rank: Vec<u32>,
823}
824
825pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
829 let pack_dir = objects_dir.join("pack");
830 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
831 return Ok(None);
832 };
833 let data = fs::read(&path).map_err(Error::Io)?;
834 let (_, hdr_end, _) = parse_midx_header(&data)?;
835 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
836 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
837 let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
838 return Ok(None);
839 };
840 if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
841 return Err(Error::CorruptObject(
842 "MIDX OID / offset chunk size mismatch".to_owned(),
843 ));
844 }
845 let num_objects = oid_l_len / 20;
846 if ridx_len != num_objects.saturating_mul(4) {
847 return Err(Error::CorruptObject(
848 "MIDX reverse index length does not match object count".to_owned(),
849 ));
850 }
851 if num_objects == 0 {
852 return Ok(None);
853 }
854
855 let mut oids = Vec::with_capacity(num_objects);
856 for i in 0..num_objects {
857 let base = oidl_off + i * 20;
858 oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
859 }
860
861 let mut pack_and_offset = Vec::with_capacity(num_objects);
862 for i in 0..num_objects {
863 let ob = ooff_off + i * 8;
864 let pack_id = read_be_u32(&data, ob)?;
865 let off32 = read_be_u32(&data, ob + 4)?;
866 pack_and_offset.push((pack_id, u64::from(off32)));
867 }
868
869 let mut rid_order = Vec::with_capacity(num_objects);
870 for i in 0..num_objects {
871 let base = ridx_off + i * 4;
872 rid_order.push(read_be_u32(&data, base)?);
873 }
874
875 let mut oid_idx_to_rank = vec![0u32; num_objects];
876 for (rank, &oid_idx) in rid_order.iter().enumerate() {
877 let idx = usize::try_from(oid_idx)
878 .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
879 if idx >= num_objects {
880 return Err(Error::CorruptObject(
881 "MIDX reverse index out of range".to_owned(),
882 ));
883 }
884 oid_idx_to_rank[idx] = u32::try_from(rank)
885 .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
886 }
887
888 Ok(Some(MidxReuseTables {
889 oids,
890 pack_and_offset,
891 rid_order,
892 oid_idx_to_rank,
893 }))
894}
895
896impl MidxReuseTables {
897 #[must_use]
899 pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
900 let oid_idx = self.oids.binary_search(oid).ok()?;
901 Some(self.oid_idx_to_rank[oid_idx])
902 }
903}
904
905#[derive(Debug, Clone, Copy)]
907pub struct MidxBtmpPackRange {
908 pub pack_id: u32,
910 pub bitmap_pos: u32,
912 pub bitmap_nr: u32,
914}
915
916pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
920 let pack_dir = objects_dir.join("pack");
921 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
922 return Ok(Vec::new());
923 };
924 let data = fs::read(&path).map_err(Error::Io)?;
925 let (_, hdr_end, _) = parse_midx_header(&data)?;
926 let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
927 return Ok(Vec::new());
928 };
929 if btmp_len == 0 || btmp_len % 8 != 0 {
930 return Err(Error::CorruptObject(
931 "invalid MIDX BTMP chunk length".to_owned(),
932 ));
933 }
934 let num_packs = read_be_u32(&data, 8)?;
935 let n_entries = btmp_len / 8;
936 if u32::try_from(n_entries).ok() != Some(num_packs) {
937 return Err(Error::CorruptObject(
938 "MIDX BTMP entry count does not match num_packs".to_owned(),
939 ));
940 }
941 let mut out = Vec::with_capacity(n_entries);
942 for i in 0..n_entries {
943 let base = btmp_off + i * 8;
944 let bitmap_pos = read_be_u32(&data, base)?;
945 let bitmap_nr = read_be_u32(&data, base + 4)?;
946 out.push(MidxBtmpPackRange {
947 pack_id: u32::try_from(i)
948 .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
949 bitmap_pos,
950 bitmap_nr,
951 });
952 }
953 Ok(out)
954}
955
956pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
958 let pack_dir = objects_dir.join("pack");
959 let path = resolve_tip_midx_path(&pack_dir)
960 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
961 let data = fs::read(&path).map_err(Error::Io)?;
962 let (_, hdr_end, _) = parse_midx_header(&data)?;
963 let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
964 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
965 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
966 if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
967 return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
968 }
969 let num_objects = oid_l_len / 20;
970 let first = oid.as_bytes()[0] as usize;
971 let j0 = if first == 0 {
972 0usize
973 } else {
974 read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
975 };
976 let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
977 let mut lo = j0;
978 let mut hi = j1;
979 while lo < hi {
980 let mid = (lo + hi) / 2;
981 let base = oidl_off + mid * 20;
982 let cmp = data[base..base + 20].cmp(oid.as_bytes());
983 if cmp == std::cmp::Ordering::Less {
984 lo = mid + 1;
985 } else {
986 hi = mid;
987 }
988 }
989 if lo >= num_objects {
990 return Err(Error::CorruptObject(format!(
991 "object {} not in multi-pack-index",
992 oid.to_hex()
993 )));
994 }
995 let base = oidl_off + lo * 20;
996 if data[base..base + 20] != *oid.as_bytes() {
997 return Err(Error::CorruptObject(format!(
998 "object {} not in multi-pack-index",
999 oid.to_hex()
1000 )));
1001 }
1002 let ob = ooff_off + lo * 8;
1003 let pack_id = read_be_u32(&data, ob)?;
1004 let off32 = read_be_u32(&data, ob + 4)?;
1005 Ok((pack_id, u64::from(off32)))
1006}
1007
1008pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
1012 let pack_dir = objects_dir.join("pack");
1013 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1014 return Ok(None);
1015 };
1016 let data = fs::read(&midx_path).map_err(Error::Io)?;
1017 let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
1018 if hash_bytes != 1 {
1019 eprintln!(
1020 "error: multi-pack-index hash version {} does not match version 1",
1021 hash_bytes
1022 );
1023 return Err(Error::CorruptObject(
1024 "multi-pack-index hash version mismatch".to_owned(),
1025 ));
1026 }
1027 let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1028 if oidf_len != 256 * 4 {
1029 eprintln!("error: multi-pack-index OID fanout is of the wrong size");
1030 return Err(Error::CorruptObject(
1031 "multi-pack-index OID fanout is of the wrong size".to_owned(),
1032 ));
1033 }
1034 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1035 let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1036 let num_objects = ooff_len / 8;
1037 if oidl_len != num_objects * 20 || ooff_len != num_objects * 8 {
1038 if oidl_len != num_objects * 20 {
1039 eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
1040 } else {
1041 eprintln!("error: multi-pack-index object offset chunk is the wrong size");
1042 }
1043 return Err(Error::CorruptObject("midx chunk size mismatch".to_owned()));
1044 }
1045
1046 let first = oid.as_bytes()[0] as usize;
1047 let lo = if first == 0 {
1048 0u32
1049 } else {
1050 read_be_u32(&data, oidf_off + (first - 1) * 4)?
1051 };
1052 let hi = read_be_u32(&data, oidf_off + first * 4)?;
1053 if lo > hi || hi as usize > num_objects {
1054 eprintln!(
1055 "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
1056 first.saturating_sub(1),
1057 lo,
1058 hi,
1059 first
1060 );
1061 return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
1062 }
1063
1064 let mut i = lo as usize;
1065 while i < hi as usize {
1066 let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1067 match o.cmp(oid) {
1068 std::cmp::Ordering::Equal => return Ok(Some(true)),
1069 std::cmp::Ordering::Greater => return Ok(Some(false)),
1070 std::cmp::Ordering::Less => i += 1,
1071 }
1072 }
1073 Ok(Some(false))
1074}
1075
1076pub fn try_read_object_via_midx(
1081 objects_dir: &Path,
1082 oid: &ObjectId,
1083) -> Result<Option<crate::objects::Object>> {
1084 let pack_dir = objects_dir.join("pack");
1085 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1086 return Ok(None);
1087 };
1088 let data = fs::read(&midx_path).map_err(Error::Io)?;
1089 let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
1090 let num_packs_hdr = read_be_u32(&data, 8)?;
1091 if hash_bytes != 1 {
1092 eprintln!(
1093 "error: multi-pack-index hash version {} does not match version 1",
1094 hash_bytes
1095 );
1096 return Err(Error::CorruptObject(
1097 "multi-pack-index hash version mismatch".to_owned(),
1098 ));
1099 }
1100 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1101 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1102 if pack_names.len() != num_packs_hdr as usize {
1103 return Err(Error::CorruptObject(
1104 "multi-pack-index pack-name chunk is too short".to_owned(),
1105 ));
1106 }
1107 let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1108 if oidf_len != 256 * 4 {
1109 eprintln!("error: multi-pack-index OID fanout is of the wrong size");
1110 return Err(Error::CorruptObject(
1111 "multi-pack-index OID fanout is of the wrong size".to_owned(),
1112 ));
1113 }
1114 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1115 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1116 let num_objects = ooff_len / 8;
1117 if oidl_len != num_objects * 20 {
1118 eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
1119 return Err(Error::CorruptObject(
1120 "multi-pack-index OID lookup chunk is the wrong size".to_owned(),
1121 ));
1122 }
1123 if ooff_len != num_objects * 8 {
1124 eprintln!("error: multi-pack-index object offset chunk is the wrong size");
1125 return Err(Error::CorruptObject(
1126 "multi-pack-index object offset chunk is the wrong size".to_owned(),
1127 ));
1128 }
1129 let loff = find_chunk(&data, hdr_end, MIDX_CHUNKID_LARGEOFFSETS).ok();
1130 let ridx = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX).ok();
1131
1132 if let Some((_, rlen)) = ridx {
1133 if rlen != num_objects * 4 {
1134 eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
1135 eprintln!("warning: multi-pack bitmap is missing required reverse index");
1136 }
1137 }
1138
1139 let first = oid.as_bytes()[0] as usize;
1140 let lo = if first == 0 {
1141 0u32
1142 } else {
1143 read_be_u32(&data, oidf_off + (first - 1) * 4)?
1144 };
1145 let hi = read_be_u32(&data, oidf_off + first * 4)?;
1146 if lo > hi || hi as usize > num_objects {
1147 eprintln!(
1148 "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
1149 first.saturating_sub(1),
1150 lo,
1151 hi,
1152 first
1153 );
1154 return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
1155 }
1156
1157 let mut pos = None;
1158 let mut i = lo as usize;
1159 while i < hi as usize {
1160 let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1161 let c = o.cmp(oid);
1162 if c == std::cmp::Ordering::Equal {
1163 pos = Some(i);
1164 break;
1165 }
1166 if c == std::cmp::Ordering::Greater {
1167 break;
1168 }
1169 i += 1;
1170 }
1171 let Some(pos) = pos else {
1172 return Ok(None);
1173 };
1174
1175 let obase = ooff_off + pos * 8;
1176 let pack_id = read_be_u32(&data, obase)?;
1177 let raw_off = read_be_u32(&data, obase + 4)?;
1178 let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
1179 let Some((loff_off, loff_len)) = loff else {
1180 return Err(Error::CorruptObject(
1181 "multi-pack-index large offset missing LOFF chunk".to_owned(),
1182 ));
1183 };
1184 let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1185 let need = (idx + 1) * 8;
1186 if loff_len < need {
1187 return Err(Error::CorruptObject(
1188 "multi-pack-index large offset out of bounds".to_owned(),
1189 ));
1190 }
1191 read_be_u64(&data, loff_off + idx * 8)?
1192 } else {
1193 u64::from(raw_off)
1194 };
1195
1196 let idx_name = pack_names
1197 .get(pack_id as usize)
1198 .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
1199 let idx_path = pack_dir.join(idx_name);
1200 if !idx_path.exists() {
1205 return Ok(None);
1206 }
1207 let idx = crate::pack::read_pack_index(&idx_path)?;
1208 crate::pack::read_object_from_pack(&idx, oid).map(Some)
1209}
1210
1211pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
1212 let pack_dir = objects_dir.join("pack");
1213 let path = resolve_tip_midx_path(&pack_dir)
1214 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1215 let data = fs::read(&path).map_err(Error::Io)?;
1216 let (_, hdr_end, _) = parse_midx_header(&data)?;
1217 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1218 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1219 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1220 let (ridx_off, ridx_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX)?;
1221
1222 if ridx_len < 4 || ooff_len < 8 {
1223 return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
1224 }
1225 let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
1226 let entry_base = ooff_off + first_oid_idx * 8;
1227 if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
1228 return Err(Error::CorruptObject(
1229 "bad MIDX object-offsets index".to_owned(),
1230 ));
1231 }
1232 let pack_id = read_be_u32(&data, entry_base)?;
1233 let idx = usize::try_from(pack_id)
1234 .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
1235 names
1236 .get(idx)
1237 .cloned()
1238 .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
1239}
1240
1241pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
1248 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1249 scrub_root_midx_sidecars_except(pack_dir, None)?;
1250 let midx_d = midx_d_dir(pack_dir);
1251 if midx_d.exists() {
1252 let _ = fs::remove_dir_all(&midx_d);
1253 }
1254 Ok(())
1255}
1256
1257pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
1258 write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
1259}
1260
1261pub fn write_multi_pack_index_with_options(
1263 pack_dir: &Path,
1264 opts: &WriteMultiPackIndexOptions,
1265) -> Result<()> {
1266 let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
1271 .map_err(Error::Io)?
1272 .filter_map(|e| e.ok())
1273 .filter_map(|e| {
1274 let name = e.file_name().to_string_lossy().to_string();
1275 let stem = name.strip_suffix(".idx")?;
1276 if pack_dir.join(format!("{stem}.pack")).exists() {
1277 Some(name)
1278 } else {
1279 None
1280 }
1281 })
1282 .collect();
1283 idx_names.sort();
1284
1285 if idx_names.is_empty() {
1286 return Err(Error::CorruptObject(
1287 "no pack-*.idx files found in pack directory".to_owned(),
1288 ));
1289 }
1290
1291 let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
1292 let mut out = Vec::new();
1293 for line in sub {
1294 let want = normalize_pack_idx_basename(line)?;
1295 let found = idx_names
1296 .iter()
1297 .find(|n| **n == want)
1298 .cloned()
1299 .ok_or_else(|| {
1300 Error::CorruptObject(format!("pack index not in repository: {want}"))
1301 })?;
1302 if !out.contains(&found) {
1303 out.push(found);
1304 }
1305 }
1306 if out.is_empty() {
1307 return Err(Error::CorruptObject(
1308 "stdin-packs list produced empty pack set".to_owned(),
1309 ));
1310 }
1311 out
1312 } else {
1313 idx_names
1314 };
1315
1316 let (base_oids, base_pack_names) = if opts.incremental {
1317 collect_incremental_base(pack_dir)?
1318 } else {
1319 (HashSet::new(), HashSet::new())
1320 };
1321
1322 let layer_idx_names: Vec<String> = if opts.incremental {
1323 idx_names
1324 .iter()
1325 .filter(|n| {
1326 !base_pack_names
1327 .iter()
1328 .any(|bp| pack_names_match_layer(bp, n))
1329 })
1330 .cloned()
1331 .collect()
1332 } else {
1333 idx_names.clone()
1334 };
1335
1336 if opts.incremental && layer_idx_names.is_empty() {
1337 return Ok(());
1338 }
1339
1340 let work_names = if opts.incremental {
1341 &layer_idx_names[..]
1342 } else {
1343 &idx_names[..]
1344 };
1345
1346 let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
1347 if preferred_idx.is_none() {
1348 if let Some(raw) = opts.preferred_pack_name.as_deref() {
1349 let pos = work_names
1350 .iter()
1351 .position(|n| cmp_idx_or_pack_name(raw, n).is_eq())
1352 .ok_or_else(|| {
1353 Error::CorruptObject(format!(
1354 "preferred pack '{raw}' not found in multi-pack-index input"
1355 ))
1356 })?;
1357 preferred_idx = Some(pos);
1358 }
1359 }
1360 if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
1361 preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
1362 }
1363 if let Some(p) = preferred_idx {
1364 if p >= work_names.len() {
1365 return Err(Error::CorruptObject(
1366 "preferred pack index out of range".to_owned(),
1367 ));
1368 }
1369 }
1370
1371 let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
1372 for name in work_names {
1373 let path = pack_dir.join(name);
1374 indexes.push(read_pack_index(&path)?);
1375 }
1376
1377 let pack_mtimes_layer: Vec<std::time::SystemTime> =
1378 indexes.iter().map(pack_mtime_for_midx).collect();
1379 let preferred_u32 = preferred_idx.map(|p| p as u32);
1380
1381 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
1382 for (pack_id, idx) in indexes.iter().enumerate() {
1383 let pack_id = u32::try_from(pack_id).map_err(|_| {
1384 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
1385 })?;
1386 let mtime = pack_mtimes_layer[pack_id as usize];
1387 for e in &idx.entries {
1388 if e.oid.len() != 20 {
1389 continue;
1390 }
1391 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
1392 continue;
1393 };
1394 if opts.incremental && base_oids.contains(&oid) {
1395 continue;
1396 }
1397 let cand = MidxEntry {
1398 oid,
1399 pack_id,
1400 offset: e.offset,
1401 pack_mtime: mtime,
1402 };
1403 match best.get(&oid) {
1404 None => {
1405 best.insert(oid, cand);
1406 }
1407 Some(cur) => {
1408 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
1409 best.insert(oid, cand);
1410 }
1411 }
1412 }
1413 }
1414 }
1415
1416 let bitmap_placeholders =
1417 opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
1418
1419 let omit_embedded_ridx = opts.write_rev_placeholder;
1420 let (out, rev_sidecar_order) = build_midx_bytes(
1421 work_names,
1422 &indexes,
1423 preferred_idx,
1424 bitmap_placeholders,
1425 omit_embedded_ridx,
1426 )?;
1427
1428 let hash = &out[out.len() - 20..];
1429 let hash_hex = hex::encode(hash);
1430 let hash_arr: [u8; 20] = hash
1431 .try_into()
1432 .map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
1433
1434 if opts.incremental {
1435 let root_midx = pack_dir.join("multi-pack-index");
1436 let chain_path = chain_file_path(pack_dir);
1437 let chain_existed = chain_path.exists();
1438
1439 let mut chain = if root_midx.exists() && !chain_existed {
1440 let root_hex = midx_checksum_hex_from_path(&root_midx)?;
1441 link_root_midx_into_chain(pack_dir, &root_hex)?;
1442 vec![root_hex]
1443 } else {
1444 read_chain_layer_hashes(pack_dir).unwrap_or_default()
1445 };
1446
1447 chain.push(hash_hex.clone());
1448
1449 let midx_d = midx_d_dir(pack_dir);
1450 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1451
1452 let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
1453 fs::write(&layer_path, &out).map_err(Error::Io)?;
1454
1455 let mut chain_data = String::new();
1456 for h in &chain {
1457 chain_data.push_str(h);
1458 chain_data.push('\n');
1459 }
1460 fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
1461
1462 clear_stale_split_layers(pack_dir, &chain)?;
1463
1464 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1465 scrub_root_midx_sidecars(pack_dir)?;
1466 if bitmap_placeholders {
1467 let full = hex::encode(hash);
1468 fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
1469 .map_err(Error::Io)?;
1470 if opts.write_rev_placeholder {
1471 let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
1472 if let Some(order) = rev_sidecar_order.as_ref() {
1473 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1474 } else {
1475 fs::write(rev_path, []).map_err(Error::Io)?;
1476 }
1477 }
1478 }
1479 } else {
1480 let midx_d = midx_d_dir(pack_dir);
1481 if midx_d.exists() {
1482 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
1483 let ent = ent.map_err(Error::Io)?;
1484 let _ = if ent.file_type().map_err(Error::Io)?.is_dir() {
1485 fs::remove_dir_all(ent.path())
1486 } else {
1487 fs::remove_file(ent.path())
1488 };
1489 }
1490 }
1491 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1492
1493 let dest = pack_dir.join("multi-pack-index");
1494 fs::write(&dest, &out).map_err(Error::Io)?;
1495
1496 scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
1497
1498 if opts.write_bitmap_placeholders {
1499 fs::write(
1500 pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
1501 [],
1502 )
1503 .map_err(Error::Io)?;
1504 if opts.write_rev_placeholder {
1505 let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
1506 if let Some(order) = rev_sidecar_order.as_ref() {
1507 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1508 } else {
1509 fs::write(rev_path, []).map_err(Error::Io)?;
1510 }
1511 }
1512 }
1513 }
1514
1515 Ok(())
1516}
1517
1518fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
1519 if base_name == disk_idx {
1520 return true;
1521 }
1522 cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
1523}
1524
1525fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
1526 scrub_root_midx_sidecars_except(pack_dir, None)
1527}
1528
1529fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
1530 let Ok(rd) = fs::read_dir(pack_dir) else {
1531 return Ok(());
1532 };
1533 for ent in rd {
1534 let ent = ent.map_err(Error::Io)?;
1535 let name = ent.file_name().to_string_lossy().to_string();
1536 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
1537 continue;
1538 };
1539 if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
1540 continue;
1541 }
1542 let hash_part = rest
1543 .strip_suffix(".bitmap")
1544 .or_else(|| rest.strip_suffix(".rev"))
1545 .unwrap_or(rest);
1546 if hash_part.len() != 40 {
1547 continue;
1548 }
1549 if keep_hex.is_some_and(|k| k == hash_part) {
1550 continue;
1551 }
1552 let _ = fs::remove_file(ent.path());
1553 }
1554 Ok(())
1555}