1use std::collections::{HashMap, HashSet};
13use std::fs;
14use std::io::{BufRead, BufReader};
15use std::path::Path;
16
17use sha1::{Digest, Sha1};
18
19use crate::error::{Error, Result};
20use crate::objects::ObjectId;
21use crate::pack::{read_pack_index, PackIndex};
22
23const MIDX_SIGNATURE: u32 = 0x4d49_4458;
24const MIDX_VERSION_V1: u8 = 1;
25const HASH_VERSION_SHA1: u8 = 1;
26const MIDX_HEADER_SIZE: usize = 12;
27const CHUNK_TOC_ENTRY_SIZE: usize = 12;
28const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
29const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
30const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
31const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
32const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
33const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
34const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
35
36const RIDX_SIGNATURE: u32 = 0x5249_4458;
38const RIDX_VERSION: u32 = 1;
39const RIDX_HEADER_SIZE: usize = 12;
40const MIDX_CHUNK_ALIGNMENT: usize = 4;
41
42const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
44
45struct MidxEntry {
46 oid: ObjectId,
47 pack_id: u32,
48 offset: u64,
49 pack_mtime: std::time::SystemTime,
50}
51
52#[derive(Debug, Clone, Default)]
54pub struct WriteMultiPackIndexOptions {
55 pub preferred_pack_idx: Option<u32>,
58 pub preferred_pack_name: Option<String>,
61 pub pack_names_subset_ordered: Option<Vec<String>>,
63 pub write_bitmap_placeholders: bool,
65 pub incremental: bool,
68 pub write_rev_placeholder: bool,
71}
72
73fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
74 let t = raw.trim();
75 let t = std::path::Path::new(t)
76 .file_name()
77 .and_then(|s| s.to_str())
78 .unwrap_or(t);
79 let t = t.strip_prefix("./").unwrap_or(t);
80 if t.ends_with(".idx") {
81 Ok(t.to_string())
82 } else if t.ends_with(".pack") {
83 Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
84 } else {
85 Ok(format!("{t}.idx"))
86 }
87}
88
89struct MidxFileHeader {
90 num_chunks: u8,
91}
92
93fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
94 if data.len() < MIDX_HEADER_SIZE + 20 {
95 return Err(Error::CorruptObject("midx file too small".to_owned()));
96 }
97 let sig = u32::from_be_bytes(data[0..4].try_into().unwrap());
98 if sig != MIDX_SIGNATURE {
99 return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
100 }
101 let version = data[4];
102 if version != MIDX_VERSION_V1 {
103 return Err(Error::CorruptObject(format!(
104 "unsupported MIDX version {version}"
105 )));
106 }
107 let object_hash_bytes = data[5];
108 let num_chunks = data[6];
109 let _num_packs = u32::from_be_bytes(data[8..12].try_into().unwrap());
110 Ok((
111 MidxFileHeader { num_chunks },
112 MIDX_HEADER_SIZE,
113 object_hash_bytes,
114 ))
115}
116
117fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
118 let mut names = Vec::new();
119 let mut start = 0usize;
120 for (i, &b) in pn.iter().enumerate() {
121 if b == 0 && i >= start {
122 if i > start {
123 let s = std::str::from_utf8(&pn[start..i])
124 .map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
125 names.push(s.to_string());
126 }
127 start = i + 1;
128 }
129 }
130 Ok(names)
131}
132
133fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
135 let a = idx_or_pack_name.as_bytes();
136 let b = idx_name.as_bytes();
137 let mut i = 0usize;
138 let min = a.len().min(b.len());
139 while i < min && a[i] == b[i] {
140 i += 1;
141 }
142 let suf_a = &a[i..];
143 let suf_b = &b[i..];
144 if suf_b == b"idx" && suf_a == b"pack" {
145 return std::cmp::Ordering::Equal;
146 }
147 suf_a.cmp(suf_b)
148}
149
150fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
151 let mut best: Option<(usize, std::time::SystemTime)> = None;
152 for (i, n) in names.iter().enumerate() {
153 let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
154 let mtime = meta.modified().map_err(Error::Io)?;
155 match best {
156 None => best = Some((i, mtime)),
157 Some((_, t)) if mtime < t => best = Some((i, mtime)),
158 _ => {}
159 }
160 }
161 Ok(best.map(|(i, _)| i))
162}
163
164fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
165 pack_dir.join("multi-pack-index.d")
166}
167
168fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
169 midx_d_dir(pack_dir).join("multi-pack-index-chain")
170}
171
172fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
173 let path = chain_file_path(pack_dir);
174 let f = fs::File::open(&path).map_err(Error::Io)?;
175 let mut out = Vec::new();
176 for line in BufReader::new(f).lines() {
177 let line = line.map_err(Error::Io)?;
178 let t = line.trim();
179 if t.is_empty() {
180 continue;
181 }
182 if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
183 return Err(Error::CorruptObject(format!(
184 "invalid multi-pack-index chain line: {t}"
185 )));
186 }
187 out.push(t.to_ascii_lowercase());
188 }
189 Ok(out)
190}
191
192pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
194 let root = pack_dir.join("multi-pack-index");
195 if root.exists() {
196 return Some(root);
197 }
198 let hashes = read_chain_layer_hashes(pack_dir).ok()?;
199 let last = hashes.last()?;
200 Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
201}
202
203fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
204 let data = fs::read(path).map_err(Error::Io)?;
205 let _ = parse_midx_header(&data)?;
206 Ok(data)
207}
208
209fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
210 let (_, hdr_end, _) = parse_midx_header(data)?;
211 let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
212 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
213 let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
214 let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
215 let num_objects = ooff_len / 8;
216 if oidl_len != num_objects * 20 {
217 return Err(Error::CorruptObject(
218 "MIDX oid-lookup size mismatch".to_owned(),
219 ));
220 }
221 let mut oids = HashSet::with_capacity(num_objects);
222 for i in 0..num_objects {
223 let start = oidl_off + i * 20;
224 let oid = ObjectId::from_bytes(&data[start..start + 20])?;
225 oids.insert(oid);
226 }
227 Ok((oids, pack_names))
228}
229
230fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
231 let mut oids = HashSet::new();
232 let mut packs = HashSet::new();
233 let root = pack_dir.join("multi-pack-index");
234 let chain_path = chain_file_path(pack_dir);
235 if chain_path.exists() {
236 for h in read_chain_layer_hashes(pack_dir)? {
237 let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
238 let data = load_midx_file(&p)?;
239 let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
240 oids.extend(layer_oids);
241 for n in names {
242 packs.insert(n);
243 }
244 }
245 return Ok((oids, packs));
246 }
247 if root.exists() {
248 let data = load_midx_file(&root)?;
249 let (o, names) = oids_and_packs_from_midx_data(&data)?;
250 oids = o;
251 for n in names {
252 packs.insert(n);
253 }
254 }
255 Ok((oids, packs))
256}
257
258fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
259 let data = fs::read(path).map_err(Error::Io)?;
260 if data.len() < 20 {
261 return Err(Error::CorruptObject(
262 "midx too small for checksum".to_owned(),
263 ));
264 }
265 let hash = &data[data.len() - 20..];
266 Ok(hex::encode(hash))
267}
268
269fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
270 let _ = fs::remove_file(dst);
271 if fs::hard_link(src, dst).is_ok() {
272 return Ok(());
273 }
274 fs::copy(src, dst).map_err(Error::Io)?;
275 Ok(())
276}
277
278fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
279 let midx_d = midx_d_dir(pack_dir);
280 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
281 let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
282 hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
283 let exts = ["bitmap", "rev"];
284 for ext in exts {
285 let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
286 if src.exists() {
287 let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
288 hard_link_or_copy(&src, &dst)?;
289 }
290 }
291 Ok(())
292}
293
294fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
295 let midx_d = midx_d_dir(pack_dir);
296 if !midx_d.exists() {
297 return Ok(());
298 }
299 let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
300 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
301 let ent = ent.map_err(Error::Io)?;
302 let name = ent.file_name().to_string_lossy().to_string();
303 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
304 continue;
305 };
306 let Some((hash_part, _ext)) = rest.split_once('.') else {
307 continue;
308 };
309 if hash_part.len() == 40 && !keep.contains(hash_part) {
310 let _ = fs::remove_file(ent.path());
311 }
312 }
313 Ok(())
314}
315
316fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
317 fs::metadata(&idx.pack_path)
318 .and_then(|m| m.modified())
319 .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
320}
321
322fn midx_pick_better_entry(
323 cur: &MidxEntry,
324 cand_pack: u32,
325 cand_offset: u64,
326 cand_mtime: std::time::SystemTime,
327 preferred_pack: Option<u32>,
328) -> bool {
329 let cur_pref = preferred_pack == Some(cur.pack_id);
330 let new_pref = preferred_pack == Some(cand_pack);
331 if new_pref && !cur_pref {
332 return true;
333 }
334 if cur_pref && !new_pref {
335 return false;
336 }
337 match cand_mtime.cmp(&cur.pack_mtime) {
338 std::cmp::Ordering::Greater => true,
339 std::cmp::Ordering::Less => false,
340 std::cmp::Ordering::Equal => {
341 if cand_pack != cur.pack_id {
342 cand_pack < cur.pack_id
343 } else {
344 cand_offset < cur.offset
345 }
346 }
347 }
348}
349
350fn build_midx_bytes(
351 idx_names: &[String],
352 indexes: &[PackIndex],
353 preferred_idx: Option<usize>,
354 write_bitmap_placeholders: bool,
355 omit_embedded_ridx_chunk: bool,
356) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
357 let preferred_pack_idx = preferred_idx.map(|p| p as u32);
358 let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
359
360 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
361 for (pack_id, idx) in indexes.iter().enumerate() {
362 let pack_id = u32::try_from(pack_id).map_err(|_| {
363 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
364 })?;
365 let mtime = pack_mtimes[pack_id as usize];
366 for e in &idx.entries {
367 if e.oid.len() != 20 {
368 continue;
369 }
370 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
371 continue;
372 };
373 let cand = MidxEntry {
374 oid,
375 pack_id,
376 offset: e.offset,
377 pack_mtime: mtime,
378 };
379 match best.get(&oid) {
380 None => {
381 best.insert(oid, cand);
382 }
383 Some(cur) => {
384 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
385 best.insert(oid, cand);
386 }
387 }
388 }
389 }
390 }
391
392 let mut entries: Vec<MidxEntry> = best.into_values().collect();
393 entries.sort_by(|a, b| a.oid.cmp(&b.oid));
394
395 let mut large_offsets: Vec<u64> = Vec::new();
396 for e in &entries {
397 if e.offset > u64::from(u32::MAX) {
398 return Err(Error::CorruptObject(
399 "object offset does not fit in multi-pack-index".to_owned(),
400 ));
401 }
402 }
403
404 let num_packs = indexes.len() as u32;
405
406 let mut pack_names_blob = Vec::new();
407 for name in idx_names {
408 pack_names_blob.extend_from_slice(name.as_bytes());
409 pack_names_blob.push(0);
410 }
411 let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
412 % MIDX_CHUNK_ALIGNMENT;
413 pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
414 let chunk_pnam = pack_names_blob;
415
416 let mut chunk_oidf = vec![0u8; 256 * 4];
417 let mut j = 0usize;
418 for i in 0..256 {
419 while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
420 j += 1;
421 }
422 chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
423 }
424
425 let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
426 for e in &entries {
427 chunk_oidl.extend_from_slice(e.oid.as_bytes());
428 }
429
430 let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
431 for e in &entries {
432 chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
433 let needs_large = e.offset >= u64::from(MIDX_LARGE_OFFSET_NEEDED);
434 let encoded = if needs_large {
435 let slot = u32::try_from(large_offsets.len()).map_err(|_| {
436 Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
437 })?;
438 large_offsets.push(e.offset);
439 MIDX_LARGE_OFFSET_NEEDED | slot
440 } else {
441 u32::try_from(e.offset).map_err(|_| {
442 Error::CorruptObject("object offset overflow in multi-pack-index".to_owned())
443 })?
444 };
445 chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
446 }
447
448 let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
449 Vec::new()
450 } else {
451 let mut v = Vec::with_capacity(large_offsets.len() * 8);
452 for off in &large_offsets {
453 v.extend_from_slice(&off.to_be_bytes());
454 }
455 v
456 };
457
458 let pref = preferred_pack_idx;
459 let mut order: Vec<u32> = (0..entries.len() as u32).collect();
460 order.sort_by(|&ai, &bi| {
461 let a = &entries[ai as usize];
462 let b = &entries[bi as usize];
463 let a_pref = pref == Some(a.pack_id);
464 let b_pref = pref == Some(b.pack_id);
465 b_pref
466 .cmp(&a_pref)
467 .then_with(|| a.pack_id.cmp(&b.pack_id))
468 .then_with(|| a.offset.cmp(&b.offset))
469 .then_with(|| ai.cmp(&bi))
470 });
471
472 let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
473 for oid_idx in &order {
474 chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
475 }
476
477 let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
480 Some(order.clone())
481 } else {
482 None
483 };
484 let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
485 let mut v = Vec::new();
486 let mut cumulative = 0u32;
487 for idx in indexes {
488 let n = u32::try_from(idx.entries.len()).map_err(|_| {
489 Error::CorruptObject("too many objects in pack for MIDX BTMP".to_owned())
490 })?;
491 v.extend_from_slice(&cumulative.to_be_bytes());
492 v.extend_from_slice(&n.to_be_bytes());
493 cumulative = cumulative.saturating_add(n);
494 }
495 let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
496 v.extend(std::iter::repeat_n(0u8, pad));
497 v
498 } else {
499 Vec::new()
500 };
501
502 let mut chunks: Vec<(u32, Vec<u8>)> = vec![
503 (MIDX_CHUNKID_PACKNAMES, chunk_pnam),
504 (MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
505 (MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
506 (MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
507 ];
508 if !chunk_loff.is_empty() {
509 chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
510 }
511 if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
512 chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
513 }
514 if write_bitmap_placeholders {
515 chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
516 }
517
518 let num_chunks: u8 = chunks
519 .len()
520 .try_into()
521 .map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
522
523 let mut body = Vec::new();
524 let mut cur_offset =
525 MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
526
527 for (id, data) in &chunks {
528 body.extend_from_slice(&id.to_be_bytes());
529 body.extend_from_slice(&cur_offset.to_be_bytes());
530 cur_offset += data.len() as u64;
531 }
532 body.extend_from_slice(&0u32.to_be_bytes());
533 body.extend_from_slice(&cur_offset.to_be_bytes());
534
535 for (_, data) in &chunks {
536 body.extend_from_slice(data);
537 }
538
539 let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
540 out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
541 out.push(MIDX_VERSION_V1);
542 out.push(HASH_VERSION_SHA1);
543 out.push(num_chunks);
544 out.push(0);
545 out.extend_from_slice(&num_packs.to_be_bytes());
546 out.extend_from_slice(&body);
547
548 let mut hasher = Sha1::new();
549 hasher.update(&out);
550 let hash = hasher.finalize();
551 out.extend_from_slice(&hash);
552
553 Ok((out, rev_sidecar_order))
554}
555
556fn write_midx_rev_sidecar(
558 path: &Path,
559 pack_order: &[u32],
560 midx_file_hash: &[u8; 20],
561) -> Result<()> {
562 let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
563 body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
564 body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
565 body.extend_from_slice(&1u32.to_be_bytes());
566 for idx in pack_order {
567 body.extend_from_slice(&idx.to_be_bytes());
568 }
569 body.extend_from_slice(midx_file_hash);
570 fs::write(path, body).map_err(Error::Io)
571}
572
573fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
574 let (hdr, _, _) = parse_midx_header(data)?;
575 let n = hdr.num_chunks as usize;
576 let pos = header_end;
577 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
578 if data.len() < toc_end + 20 {
579 return Err(Error::CorruptObject(
580 "truncated MIDX chunk table".to_owned(),
581 ));
582 }
583 for i in 0..n {
584 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
585 let id = u32::from_be_bytes(data[base..base + 4].try_into().unwrap());
586 let off = u64::from_be_bytes(data[base + 4..base + 12].try_into().unwrap()) as usize;
587 if id == chunk_id {
588 let next_off = if i + 1 < n {
589 let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
590 u64::from_be_bytes(data[nb + 4..nb + 12].try_into().unwrap()) as usize
591 } else {
592 let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
593 u64::from_be_bytes(data[term + 4..term + 12].try_into().unwrap()) as usize
594 };
595 return Ok((off, next_off.saturating_sub(off)));
596 }
597 }
598 Err(Error::CorruptObject(format!(
599 "MIDX chunk {chunk_id:08x} not found"
600 )))
601}
602
603pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
610 let pack_dir = objects_dir.join("pack");
611 let path = resolve_tip_midx_path(&pack_dir)
612 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
613 let data = fs::read(&path).map_err(Error::Io)?;
614 let (_, hdr_end, _) = parse_midx_header(&data)?;
615 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
616 parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
617}
618
619pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
621 let pack_dir = objects_dir.join("pack");
622 let path = resolve_tip_midx_path(&pack_dir)
623 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
624 midx_checksum_hex_from_path(&path)
625}
626
627pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
630 let mut out = format_midx_dump(objects_dir)?;
631 let pack_dir = objects_dir.join("pack");
632 let path = resolve_tip_midx_path(&pack_dir)
633 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
634 let data = fs::read(&path).map_err(Error::Io)?;
635 let (_, hdr_end, _) = parse_midx_header(&data)?;
636 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
637 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
638 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
639 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
640 if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
641 return Err(Error::CorruptObject(
642 "bad MIDX oid-lookup / object-offsets size".to_owned(),
643 ));
644 }
645 let num = oidl_len / 20;
646 if num * 8 != ooff_len {
647 return Err(Error::CorruptObject(
648 "MIDX oid count does not match object-offsets".to_owned(),
649 ));
650 }
651 for i in 0..num {
652 let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
653 .map_err(|e| Error::CorruptObject(e.to_string()))?;
654 let base = ooff_off + i * 8;
655 let pack_id = u32::from_be_bytes(data[base..base + 4].try_into().unwrap()) as usize;
656 let offset = u32::from_be_bytes(data[base + 4..base + 8].try_into().unwrap()) as u64;
657 let pack_name = names
658 .get(pack_id)
659 .ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
660 out.push_str(&format!("{} {}\t{}\n", oid.to_hex(), offset, pack_name));
661 }
662 Ok(out)
663}
664
665pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
666 let pack_dir = objects_dir.join("pack");
667 let path = resolve_tip_midx_path(&pack_dir)
668 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
669 let data = fs::read(&path).map_err(Error::Io)?;
670 let (hdr, hdr_end, _) = parse_midx_header(&data)?;
671 let sig = u32::from_be_bytes(data[0..4].try_into().unwrap());
672 let version = data[4];
673 let hash_len = data[5];
674 let num_chunks = hdr.num_chunks;
675 let num_packs = u32::from_be_bytes(data[8..12].try_into().unwrap());
676
677 let mut chunk_tags: Vec<&'static str> = Vec::new();
678 let n = num_chunks as usize;
679 let pos = hdr_end;
680 let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
681 if data.len() < toc_end + 20 {
682 return Err(Error::CorruptObject(
683 "truncated MIDX chunk table".to_owned(),
684 ));
685 }
686 for i in 0..n {
687 let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
688 let id = u32::from_be_bytes(data[base..base + 4].try_into().unwrap());
689 let tag = match id {
690 x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
691 x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
692 x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
693 x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
694 x if x == MIDX_CHUNKID_REVINDEX => "revindex",
695 x if x == 0x4254_4d50 => "bitmapped-packs",
696 _ => "unknown",
697 };
698 chunk_tags.push(tag);
699 }
700
701 let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
702 let num_objects = ooff_len / 8;
703
704 let pack_names = read_midx_pack_idx_names(objects_dir)?;
705
706 let mut out = String::new();
707 out.push_str(&format!(
708 "header: {:08x} {} {} {} {}\n",
709 sig, version, hash_len, num_chunks, num_packs
710 ));
711 out.push_str("chunks:");
712 for t in &chunk_tags {
713 out.push(' ');
714 out.push_str(t);
715 }
716 out.push('\n');
717 out.push_str(&format!("num_objects: {num_objects}\n"));
718 out.push_str("packs:\n");
719 for n in &pack_names {
720 out.push_str(n);
721 out.push('\n');
722 }
723 out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
724 Ok(out)
725}
726
727#[derive(Debug, Clone)]
733pub struct MidxReuseTables {
734 pub oids: Vec<ObjectId>,
736 pub pack_and_offset: Vec<(u32, u64)>,
738 pub rid_order: Vec<u32>,
740 pub oid_idx_to_rank: Vec<u32>,
742}
743
744pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
748 let pack_dir = objects_dir.join("pack");
749 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
750 return Ok(None);
751 };
752 let data = fs::read(&path).map_err(Error::Io)?;
753 let (_, hdr_end, _) = parse_midx_header(&data)?;
754 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
755 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
756 let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
757 return Ok(None);
758 };
759 if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
760 return Err(Error::CorruptObject(
761 "MIDX OID / offset chunk size mismatch".to_owned(),
762 ));
763 }
764 let num_objects = oid_l_len / 20;
765 if ridx_len != num_objects.saturating_mul(4) {
766 return Err(Error::CorruptObject(
767 "MIDX reverse index length does not match object count".to_owned(),
768 ));
769 }
770 if num_objects == 0 {
771 return Ok(None);
772 }
773
774 let mut oids = Vec::with_capacity(num_objects);
775 for i in 0..num_objects {
776 let base = oidl_off + i * 20;
777 oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
778 }
779
780 let mut pack_and_offset = Vec::with_capacity(num_objects);
781 for i in 0..num_objects {
782 let ob = ooff_off + i * 8;
783 let pack_id = u32::from_be_bytes(data[ob..ob + 4].try_into().unwrap());
784 let off32 = u32::from_be_bytes(data[ob + 4..ob + 8].try_into().unwrap());
785 pack_and_offset.push((pack_id, u64::from(off32)));
786 }
787
788 let mut rid_order = Vec::with_capacity(num_objects);
789 for i in 0..num_objects {
790 let base = ridx_off + i * 4;
791 rid_order.push(u32::from_be_bytes(data[base..base + 4].try_into().unwrap()));
792 }
793
794 let mut oid_idx_to_rank = vec![0u32; num_objects];
795 for (rank, &oid_idx) in rid_order.iter().enumerate() {
796 let idx = usize::try_from(oid_idx)
797 .map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
798 if idx >= num_objects {
799 return Err(Error::CorruptObject(
800 "MIDX reverse index out of range".to_owned(),
801 ));
802 }
803 oid_idx_to_rank[idx] = u32::try_from(rank)
804 .map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
805 }
806
807 Ok(Some(MidxReuseTables {
808 oids,
809 pack_and_offset,
810 rid_order,
811 oid_idx_to_rank,
812 }))
813}
814
815impl MidxReuseTables {
816 #[must_use]
818 pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
819 let oid_idx = self.oids.binary_search(oid).ok()?;
820 Some(self.oid_idx_to_rank[oid_idx])
821 }
822}
823
824#[derive(Debug, Clone, Copy)]
826pub struct MidxBtmpPackRange {
827 pub pack_id: u32,
829 pub bitmap_pos: u32,
831 pub bitmap_nr: u32,
833}
834
835pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
839 let pack_dir = objects_dir.join("pack");
840 let Some(path) = resolve_tip_midx_path(&pack_dir) else {
841 return Ok(Vec::new());
842 };
843 let data = fs::read(&path).map_err(Error::Io)?;
844 let (_, hdr_end, _) = parse_midx_header(&data)?;
845 let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
846 return Ok(Vec::new());
847 };
848 if btmp_len == 0 || btmp_len % 8 != 0 {
849 return Err(Error::CorruptObject(
850 "invalid MIDX BTMP chunk length".to_owned(),
851 ));
852 }
853 let num_packs = u32::from_be_bytes(data[8..12].try_into().unwrap());
854 let n_entries = btmp_len / 8;
855 if u32::try_from(n_entries).ok() != Some(num_packs) {
856 return Err(Error::CorruptObject(
857 "MIDX BTMP entry count does not match num_packs".to_owned(),
858 ));
859 }
860 let mut out = Vec::with_capacity(n_entries);
861 for i in 0..n_entries {
862 let base = btmp_off + i * 8;
863 let bitmap_pos = u32::from_be_bytes(data[base..base + 4].try_into().unwrap());
864 let bitmap_nr = u32::from_be_bytes(data[base + 4..base + 8].try_into().unwrap());
865 out.push(MidxBtmpPackRange {
866 pack_id: u32::try_from(i)
867 .map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
868 bitmap_pos,
869 bitmap_nr,
870 });
871 }
872 Ok(out)
873}
874
875pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
877 let pack_dir = objects_dir.join("pack");
878 let path = resolve_tip_midx_path(&pack_dir)
879 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
880 let data = fs::read(&path).map_err(Error::Io)?;
881 let (_, hdr_end, _) = parse_midx_header(&data)?;
882 let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
883 let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
884 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
885 if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
886 return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
887 }
888 let num_objects = oid_l_len / 20;
889 let first = oid.as_bytes()[0] as usize;
890 let j0 = if first == 0 {
891 0usize
892 } else {
893 u32::from_be_bytes(
894 data[fanout_off + (first - 1) * 4..fanout_off + first * 4]
895 .try_into()
896 .unwrap(),
897 ) as usize
898 };
899 let j1 = u32::from_be_bytes(
900 data[fanout_off + first * 4..fanout_off + (first + 1) * 4]
901 .try_into()
902 .unwrap(),
903 ) as usize;
904 let mut lo = j0;
905 let mut hi = j1;
906 while lo < hi {
907 let mid = (lo + hi) / 2;
908 let base = oidl_off + mid * 20;
909 let cmp = data[base..base + 20].cmp(oid.as_bytes());
910 if cmp == std::cmp::Ordering::Less {
911 lo = mid + 1;
912 } else {
913 hi = mid;
914 }
915 }
916 if lo >= num_objects {
917 return Err(Error::CorruptObject(format!(
918 "object {} not in multi-pack-index",
919 oid.to_hex()
920 )));
921 }
922 let base = oidl_off + lo * 20;
923 if data[base..base + 20] != *oid.as_bytes() {
924 return Err(Error::CorruptObject(format!(
925 "object {} not in multi-pack-index",
926 oid.to_hex()
927 )));
928 }
929 let ob = ooff_off + lo * 8;
930 let pack_id = u32::from_be_bytes(data[ob..ob + 4].try_into().unwrap());
931 let off32 = u32::from_be_bytes(data[ob + 4..ob + 8].try_into().unwrap());
932 Ok((pack_id, u64::from(off32)))
933}
934
935pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
939 let pack_dir = objects_dir.join("pack");
940 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
941 return Ok(None);
942 };
943 let data = fs::read(&midx_path).map_err(Error::Io)?;
944 let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
945 if hash_bytes != 1 {
946 eprintln!(
947 "error: multi-pack-index hash version {} does not match version 1",
948 hash_bytes
949 );
950 return Err(Error::CorruptObject(
951 "multi-pack-index hash version mismatch".to_owned(),
952 ));
953 }
954 let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
955 if oidf_len != 256 * 4 {
956 eprintln!("error: multi-pack-index OID fanout is of the wrong size");
957 return Err(Error::CorruptObject(
958 "multi-pack-index OID fanout is of the wrong size".to_owned(),
959 ));
960 }
961 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
962 let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
963 let num_objects = ooff_len / 8;
964 if oidl_len != num_objects * 20 || ooff_len != num_objects * 8 {
965 if oidl_len != num_objects * 20 {
966 eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
967 } else {
968 eprintln!("error: multi-pack-index object offset chunk is the wrong size");
969 }
970 return Err(Error::CorruptObject("midx chunk size mismatch".to_owned()));
971 }
972
973 let first = oid.as_bytes()[0] as usize;
974 let lo = if first == 0 {
975 0u32
976 } else {
977 u32::from_be_bytes(
978 data[oidf_off + (first - 1) * 4..oidf_off + first * 4]
979 .try_into()
980 .unwrap(),
981 )
982 };
983 let hi = u32::from_be_bytes(
984 data[oidf_off + first * 4..oidf_off + (first + 1) * 4]
985 .try_into()
986 .unwrap(),
987 );
988 if lo > hi || hi as usize > num_objects {
989 eprintln!(
990 "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
991 first.saturating_sub(1),
992 lo,
993 hi,
994 first
995 );
996 return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
997 }
998
999 let mut i = lo as usize;
1000 while i < hi as usize {
1001 let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1002 match o.cmp(oid) {
1003 std::cmp::Ordering::Equal => return Ok(Some(true)),
1004 std::cmp::Ordering::Greater => return Ok(Some(false)),
1005 std::cmp::Ordering::Less => i += 1,
1006 }
1007 }
1008 Ok(Some(false))
1009}
1010
1011pub fn try_read_object_via_midx(
1016 objects_dir: &Path,
1017 oid: &ObjectId,
1018) -> Result<Option<crate::objects::Object>> {
1019 let pack_dir = objects_dir.join("pack");
1020 let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
1021 return Ok(None);
1022 };
1023 let data = fs::read(&midx_path).map_err(Error::Io)?;
1024 let (_, hdr_end, hash_bytes) = parse_midx_header(&data)?;
1025 let num_packs_hdr = u32::from_be_bytes(data[8..12].try_into().unwrap());
1026 if hash_bytes != 1 {
1027 eprintln!(
1028 "error: multi-pack-index hash version {} does not match version 1",
1029 hash_bytes
1030 );
1031 return Err(Error::CorruptObject(
1032 "multi-pack-index hash version mismatch".to_owned(),
1033 ));
1034 }
1035 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1036 let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1037 if pack_names.len() != num_packs_hdr as usize {
1038 return Err(Error::CorruptObject(
1039 "multi-pack-index pack-name chunk is too short".to_owned(),
1040 ));
1041 }
1042 let (oidf_off, oidf_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
1043 if oidf_len != 256 * 4 {
1044 eprintln!("error: multi-pack-index OID fanout is of the wrong size");
1045 return Err(Error::CorruptObject(
1046 "multi-pack-index OID fanout is of the wrong size".to_owned(),
1047 ));
1048 }
1049 let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
1050 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1051 let num_objects = ooff_len / 8;
1052 if oidl_len != num_objects * 20 {
1053 eprintln!("error: multi-pack-index OID lookup chunk is the wrong size");
1054 return Err(Error::CorruptObject(
1055 "multi-pack-index OID lookup chunk is the wrong size".to_owned(),
1056 ));
1057 }
1058 if ooff_len != num_objects * 8 {
1059 eprintln!("error: multi-pack-index object offset chunk is the wrong size");
1060 return Err(Error::CorruptObject(
1061 "multi-pack-index object offset chunk is the wrong size".to_owned(),
1062 ));
1063 }
1064 let loff = find_chunk(&data, hdr_end, MIDX_CHUNKID_LARGEOFFSETS).ok();
1065 let ridx = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX).ok();
1066
1067 if let Some((_, rlen)) = ridx {
1068 if rlen != num_objects * 4 {
1069 eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
1070 eprintln!("warning: multi-pack bitmap is missing required reverse index");
1071 }
1072 }
1073
1074 let first = oid.as_bytes()[0] as usize;
1075 let lo = if first == 0 {
1076 0u32
1077 } else {
1078 u32::from_be_bytes(
1079 data[oidf_off + (first - 1) * 4..oidf_off + first * 4]
1080 .try_into()
1081 .unwrap(),
1082 )
1083 };
1084 let hi = u32::from_be_bytes(
1085 data[oidf_off + first * 4..oidf_off + (first + 1) * 4]
1086 .try_into()
1087 .unwrap(),
1088 );
1089 if lo > hi || hi as usize > num_objects {
1090 eprintln!(
1091 "error: oid fanout out of order: fanout[{}] = {:08x} > {:08x} = fanout[{}]",
1092 first.saturating_sub(1),
1093 lo,
1094 hi,
1095 first
1096 );
1097 return Err(Error::CorruptObject("oid fanout out of order".to_owned()));
1098 }
1099
1100 let mut pos = None;
1101 let mut i = lo as usize;
1102 while i < hi as usize {
1103 let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
1104 let c = o.cmp(oid);
1105 if c == std::cmp::Ordering::Equal {
1106 pos = Some(i);
1107 break;
1108 }
1109 if c == std::cmp::Ordering::Greater {
1110 break;
1111 }
1112 i += 1;
1113 }
1114 let Some(pos) = pos else {
1115 return Ok(None);
1116 };
1117
1118 let obase = ooff_off + pos * 8;
1119 let pack_id = u32::from_be_bytes(data[obase..obase + 4].try_into().unwrap());
1120 let raw_off = u32::from_be_bytes(data[obase + 4..obase + 8].try_into().unwrap());
1121 let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
1122 let Some((loff_off, loff_len)) = loff else {
1123 return Err(Error::CorruptObject(
1124 "multi-pack-index large offset missing LOFF chunk".to_owned(),
1125 ));
1126 };
1127 let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
1128 let need = (idx + 1) * 8;
1129 if loff_len < need {
1130 return Err(Error::CorruptObject(
1131 "multi-pack-index large offset out of bounds".to_owned(),
1132 ));
1133 }
1134 u64::from_be_bytes(
1135 data[loff_off + idx * 8..loff_off + (idx + 1) * 8]
1136 .try_into()
1137 .unwrap(),
1138 )
1139 } else {
1140 raw_off as u64
1141 };
1142
1143 let idx_name = pack_names
1144 .get(pack_id as usize)
1145 .ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
1146 let idx_path = pack_dir.join(idx_name);
1147 let idx = crate::pack::read_pack_index(&idx_path)?;
1148 crate::pack::read_object_from_pack(&idx, oid).map(Some)
1149}
1150
1151pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
1152 let pack_dir = objects_dir.join("pack");
1153 let path = resolve_tip_midx_path(&pack_dir)
1154 .ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
1155 let data = fs::read(&path).map_err(Error::Io)?;
1156 let (_, hdr_end, _) = parse_midx_header(&data)?;
1157 let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
1158 let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
1159 let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
1160 let (ridx_off, ridx_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX)?;
1161
1162 if ridx_len < 4 || ooff_len < 8 {
1163 return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
1164 }
1165 let first_oid_idx =
1166 u32::from_be_bytes(data[ridx_off..ridx_off + 4].try_into().unwrap()) as usize;
1167 let entry_base = ooff_off + first_oid_idx * 8;
1168 if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
1169 return Err(Error::CorruptObject(
1170 "bad MIDX object-offsets index".to_owned(),
1171 ));
1172 }
1173 let pack_id = u32::from_be_bytes(data[entry_base..entry_base + 4].try_into().unwrap());
1174 let idx = usize::try_from(pack_id)
1175 .map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
1176 names
1177 .get(idx)
1178 .cloned()
1179 .ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
1180}
1181
1182pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
1189 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1190 scrub_root_midx_sidecars_except(pack_dir, None)?;
1191 let midx_d = midx_d_dir(pack_dir);
1192 if midx_d.exists() {
1193 let _ = fs::remove_dir_all(&midx_d);
1194 }
1195 Ok(())
1196}
1197
1198pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
1199 write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
1200}
1201
1202pub fn write_multi_pack_index_with_options(
1204 pack_dir: &Path,
1205 opts: &WriteMultiPackIndexOptions,
1206) -> Result<()> {
1207 let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
1208 .map_err(Error::Io)?
1209 .filter_map(|e| e.ok())
1210 .filter_map(|e| {
1211 let name = e.file_name().to_string_lossy().to_string();
1212 if name.ends_with(".idx") && name.starts_with("pack-") {
1213 Some(name)
1214 } else {
1215 None
1216 }
1217 })
1218 .collect();
1219 idx_names.sort();
1220
1221 if idx_names.is_empty() {
1222 return Err(Error::CorruptObject(
1223 "no pack-*.idx files found in pack directory".to_owned(),
1224 ));
1225 }
1226
1227 let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
1228 let mut out = Vec::new();
1229 for line in sub {
1230 let want = normalize_pack_idx_basename(line)?;
1231 let found = idx_names
1232 .iter()
1233 .find(|n| **n == want)
1234 .cloned()
1235 .ok_or_else(|| {
1236 Error::CorruptObject(format!("pack index not in repository: {want}"))
1237 })?;
1238 if !out.contains(&found) {
1239 out.push(found);
1240 }
1241 }
1242 if out.is_empty() {
1243 return Err(Error::CorruptObject(
1244 "stdin-packs list produced empty pack set".to_owned(),
1245 ));
1246 }
1247 out
1248 } else {
1249 idx_names
1250 };
1251
1252 let (base_oids, base_pack_names) = if opts.incremental {
1253 collect_incremental_base(pack_dir)?
1254 } else {
1255 (HashSet::new(), HashSet::new())
1256 };
1257
1258 let layer_idx_names: Vec<String> = if opts.incremental {
1259 idx_names
1260 .iter()
1261 .filter(|n| {
1262 !base_pack_names
1263 .iter()
1264 .any(|bp| pack_names_match_layer(bp, n))
1265 })
1266 .cloned()
1267 .collect()
1268 } else {
1269 idx_names.clone()
1270 };
1271
1272 if opts.incremental && layer_idx_names.is_empty() {
1273 return Ok(());
1274 }
1275
1276 let work_names = if opts.incremental {
1277 &layer_idx_names[..]
1278 } else {
1279 &idx_names[..]
1280 };
1281
1282 let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
1283 if preferred_idx.is_none() {
1284 if let Some(raw) = opts.preferred_pack_name.as_deref() {
1285 let pos = work_names
1286 .iter()
1287 .position(|n| cmp_idx_or_pack_name(raw, n).is_eq())
1288 .ok_or_else(|| {
1289 Error::CorruptObject(format!(
1290 "preferred pack '{raw}' not found in multi-pack-index input"
1291 ))
1292 })?;
1293 preferred_idx = Some(pos);
1294 }
1295 }
1296 if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
1297 preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
1298 }
1299 if let Some(p) = preferred_idx {
1300 if p >= work_names.len() {
1301 return Err(Error::CorruptObject(
1302 "preferred pack index out of range".to_owned(),
1303 ));
1304 }
1305 }
1306
1307 let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
1308 for name in work_names {
1309 let path = pack_dir.join(name);
1310 indexes.push(read_pack_index(&path)?);
1311 }
1312
1313 let pack_mtimes_layer: Vec<std::time::SystemTime> =
1314 indexes.iter().map(pack_mtime_for_midx).collect();
1315 let preferred_u32 = preferred_idx.map(|p| p as u32);
1316
1317 let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
1318 for (pack_id, idx) in indexes.iter().enumerate() {
1319 let pack_id = u32::try_from(pack_id).map_err(|_| {
1320 Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
1321 })?;
1322 let mtime = pack_mtimes_layer[pack_id as usize];
1323 for e in &idx.entries {
1324 if e.oid.len() != 20 {
1325 continue;
1326 }
1327 let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
1328 continue;
1329 };
1330 if opts.incremental && base_oids.contains(&oid) {
1331 continue;
1332 }
1333 let cand = MidxEntry {
1334 oid,
1335 pack_id,
1336 offset: e.offset,
1337 pack_mtime: mtime,
1338 };
1339 match best.get(&oid) {
1340 None => {
1341 best.insert(oid, cand);
1342 }
1343 Some(cur) => {
1344 if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
1345 best.insert(oid, cand);
1346 }
1347 }
1348 }
1349 }
1350 }
1351
1352 let bitmap_placeholders =
1353 opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
1354
1355 let omit_embedded_ridx = opts.write_rev_placeholder;
1356 let (out, rev_sidecar_order) = build_midx_bytes(
1357 work_names,
1358 &indexes,
1359 preferred_idx,
1360 bitmap_placeholders,
1361 omit_embedded_ridx,
1362 )?;
1363
1364 let hash = &out[out.len() - 20..];
1365 let hash_hex = hex::encode(hash);
1366 let hash_arr: [u8; 20] = hash
1367 .try_into()
1368 .map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
1369
1370 if opts.incremental {
1371 let root_midx = pack_dir.join("multi-pack-index");
1372 let chain_path = chain_file_path(pack_dir);
1373 let chain_existed = chain_path.exists();
1374
1375 let mut chain = if root_midx.exists() && !chain_existed {
1376 let root_hex = midx_checksum_hex_from_path(&root_midx)?;
1377 link_root_midx_into_chain(pack_dir, &root_hex)?;
1378 vec![root_hex]
1379 } else {
1380 read_chain_layer_hashes(pack_dir).unwrap_or_default()
1381 };
1382
1383 chain.push(hash_hex.clone());
1384
1385 let midx_d = midx_d_dir(pack_dir);
1386 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1387
1388 let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
1389 fs::write(&layer_path, &out).map_err(Error::Io)?;
1390
1391 let mut chain_data = String::new();
1392 for h in &chain {
1393 chain_data.push_str(h);
1394 chain_data.push('\n');
1395 }
1396 fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
1397
1398 clear_stale_split_layers(pack_dir, &chain)?;
1399
1400 let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
1401 scrub_root_midx_sidecars(pack_dir)?;
1402 if bitmap_placeholders {
1403 let full = hex::encode(hash);
1404 fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
1405 .map_err(Error::Io)?;
1406 if opts.write_rev_placeholder {
1407 let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
1408 if let Some(order) = rev_sidecar_order.as_ref() {
1409 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1410 } else {
1411 fs::write(rev_path, []).map_err(Error::Io)?;
1412 }
1413 }
1414 }
1415 } else {
1416 let midx_d = midx_d_dir(pack_dir);
1417 if midx_d.exists() {
1418 for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
1419 let ent = ent.map_err(Error::Io)?;
1420 let _ = if ent.file_type().map_err(Error::Io)?.is_dir() {
1421 fs::remove_dir_all(ent.path())
1422 } else {
1423 fs::remove_file(ent.path())
1424 };
1425 }
1426 }
1427 fs::create_dir_all(&midx_d).map_err(Error::Io)?;
1428
1429 let dest = pack_dir.join("multi-pack-index");
1430 fs::write(&dest, &out).map_err(Error::Io)?;
1431
1432 scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
1433
1434 if opts.write_bitmap_placeholders {
1435 fs::write(
1436 pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
1437 [],
1438 )
1439 .map_err(Error::Io)?;
1440 if opts.write_rev_placeholder {
1441 let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
1442 if let Some(order) = rev_sidecar_order.as_ref() {
1443 write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
1444 } else {
1445 fs::write(rev_path, []).map_err(Error::Io)?;
1446 }
1447 }
1448 }
1449 }
1450
1451 Ok(())
1452}
1453
1454fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
1455 if base_name == disk_idx {
1456 return true;
1457 }
1458 cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
1459}
1460
1461fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
1462 scrub_root_midx_sidecars_except(pack_dir, None)
1463}
1464
1465fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
1466 let Ok(rd) = fs::read_dir(pack_dir) else {
1467 return Ok(());
1468 };
1469 for ent in rd {
1470 let ent = ent.map_err(Error::Io)?;
1471 let name = ent.file_name().to_string_lossy().to_string();
1472 let Some(rest) = name.strip_prefix("multi-pack-index-") else {
1473 continue;
1474 };
1475 if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
1476 continue;
1477 }
1478 let hash_part = rest
1479 .strip_suffix(".bitmap")
1480 .or_else(|| rest.strip_suffix(".rev"))
1481 .unwrap_or(rest);
1482 if hash_part.len() != 40 {
1483 continue;
1484 }
1485 if keep_hex.is_some_and(|k| k == hash_part) {
1486 continue;
1487 }
1488 let _ = fs::remove_file(ent.path());
1489 }
1490 Ok(())
1491}