use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::Path;
use sha1::{Digest, Sha1};
use crate::error::{Error, Result};
use crate::objects::ObjectId;
use crate::pack::{read_pack_index_no_verify, PackIndex};
const MIDX_SIGNATURE: u32 = 0x4d49_4458;
const MIDX_VERSION_V1: u8 = 1;
const MIDX_VERSION_V2: u8 = 2;
const HASH_VERSION_SHA1: u8 = 1;
const HASH_VERSION_SHA256: u8 = 2;
const MIDX_HEADER_SIZE: usize = 12;
const CHUNK_TOC_ENTRY_SIZE: usize = 12;
const MIDX_CHUNKID_PACKNAMES: u32 = 0x504e_414d;
const MIDX_CHUNKID_OIDFANOUT: u32 = 0x4f49_4446;
const MIDX_CHUNKID_OIDLOOKUP: u32 = 0x4f49_444c;
const MIDX_CHUNKID_OBJECTOFFSETS: u32 = 0x4f4f_4646;
const MIDX_CHUNKID_LARGEOFFSETS: u32 = 0x4c4f_4646;
const MIDX_CHUNKID_REVINDEX: u32 = 0x5249_4458;
const MIDX_CHUNKID_BITMAPPED_PACKS: u32 = 0x4254_4d50;
const RIDX_SIGNATURE: u32 = 0x5249_4458;
const RIDX_VERSION: u32 = 1;
const RIDX_HEADER_SIZE: usize = 12;
const MIDX_CHUNK_ALIGNMENT: usize = 4;
const MIDX_LARGE_OFFSET_NEEDED: u32 = 0x8000_0000;
struct MidxEntry {
oid: ObjectId,
pack_id: u32,
offset: u64,
pack_mtime: std::time::SystemTime,
}
#[derive(Debug, Clone, Default)]
pub struct WriteMultiPackIndexOptions {
pub preferred_pack_idx: Option<u32>,
pub preferred_pack_name: Option<String>,
pub pack_names_subset_ordered: Option<Vec<String>>,
pub write_bitmap_placeholders: bool,
pub incremental: bool,
pub write_rev_placeholder: bool,
pub version: Option<u8>,
}
fn normalize_pack_idx_basename(raw: &str) -> Result<String> {
let t = raw.trim();
let t = std::path::Path::new(t)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or(t);
let t = t.strip_prefix("./").unwrap_or(t);
if t.ends_with(".idx") {
Ok(t.to_string())
} else if t.ends_with(".pack") {
Ok(format!("{}.idx", t.strip_suffix(".pack").unwrap_or(t)))
} else {
Ok(format!("{t}.idx"))
}
}
fn read_be_u32(data: &[u8], off: usize) -> Result<u32> {
let end = off.checked_add(4).filter(|&e| e <= data.len());
let Some(end) = end else {
return Err(Error::CorruptObject(
"truncated MIDX data reading u32".to_owned(),
));
};
let bytes: [u8; 4] = data[off..end]
.try_into()
.map_err(|_| Error::CorruptObject("truncated MIDX data reading u32".to_owned()))?;
Ok(u32::from_be_bytes(bytes))
}
fn read_be_u64(data: &[u8], off: usize) -> Result<u64> {
let end = off.checked_add(8).filter(|&e| e <= data.len());
let Some(end) = end else {
return Err(Error::CorruptObject(
"truncated MIDX data reading u64".to_owned(),
));
};
let bytes: [u8; 8] = data[off..end]
.try_into()
.map_err(|_| Error::CorruptObject("truncated MIDX data reading u64".to_owned()))?;
Ok(u64::from_be_bytes(bytes))
}
struct MidxFileHeader {
num_chunks: u8,
}
fn parse_midx_header(data: &[u8]) -> Result<(MidxFileHeader, usize, u8)> {
if data.len() < MIDX_HEADER_SIZE + 20 {
return Err(Error::CorruptObject("midx file too small".to_owned()));
}
let sig = read_be_u32(data, 0)?;
if sig != MIDX_SIGNATURE {
return Err(Error::CorruptObject("bad MIDX signature".to_owned()));
}
let version = data[4];
if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
return Err(Error::CorruptObject(format!(
"multi-pack-index version {version} not recognized"
)));
}
let object_hash_bytes = data[5];
let num_chunks = data[6];
let _num_packs = read_be_u32(data, 8)?;
Ok((
MidxFileHeader { num_chunks },
MIDX_HEADER_SIZE,
object_hash_bytes,
))
}
fn parse_pack_names_blob(pn: &[u8]) -> Result<Vec<String>> {
let mut names = Vec::new();
let mut start = 0usize;
for (i, &b) in pn.iter().enumerate() {
if b == 0 && i >= start {
if i > start {
let s = std::str::from_utf8(&pn[start..i])
.map_err(|_| Error::CorruptObject("non-utf8 pack name in MIDX".to_owned()))?;
names.push(s.to_string());
}
start = i + 1;
}
}
Ok(names)
}
fn cmp_idx_or_pack_name(idx_or_pack_name: &str, idx_name: &str) -> std::cmp::Ordering {
let a = idx_or_pack_name.as_bytes();
let b = idx_name.as_bytes();
let mut i = 0usize;
let min = a.len().min(b.len());
while i < min && a[i] == b[i] {
i += 1;
}
let suf_a = &a[i..];
let suf_b = &b[i..];
if suf_b == b"idx" && suf_a == b"pack" {
return std::cmp::Ordering::Equal;
}
suf_a.cmp(suf_b)
}
fn preferred_pack_index_by_mtime(pack_dir: &Path, names: &[String]) -> Result<Option<usize>> {
let mut best: Option<(usize, std::time::SystemTime)> = None;
for (i, n) in names.iter().enumerate() {
let meta = fs::metadata(pack_dir.join(n)).map_err(Error::Io)?;
let mtime = meta.modified().map_err(Error::Io)?;
match best {
None => best = Some((i, mtime)),
Some((_, t)) if mtime < t => best = Some((i, mtime)),
_ => {}
}
}
Ok(best.map(|(i, _)| i))
}
fn midx_d_dir(pack_dir: &Path) -> std::path::PathBuf {
pack_dir.join("multi-pack-index.d")
}
fn chain_file_path(pack_dir: &Path) -> std::path::PathBuf {
midx_d_dir(pack_dir).join("multi-pack-index-chain")
}
fn read_chain_layer_hashes(pack_dir: &Path) -> Result<Vec<String>> {
let path = chain_file_path(pack_dir);
let f = fs::File::open(&path).map_err(Error::Io)?;
let mut out = Vec::new();
for line in BufReader::new(f).lines() {
let line = line.map_err(Error::Io)?;
let t = line.trim();
if t.is_empty() {
continue;
}
if t.len() != 40 || !t.chars().all(|c| c.is_ascii_hexdigit()) {
return Err(Error::CorruptObject(format!(
"invalid multi-pack-index chain line: {t}"
)));
}
out.push(t.to_ascii_lowercase());
}
Ok(out)
}
fn repo_midx_hash_version(pack_dir: &Path) -> u8 {
let Some(objects_dir) = pack_dir.parent() else {
return HASH_VERSION_SHA1;
};
repo_midx_hash_version_for_objects_dir(objects_dir)
}
fn repo_midx_hash_version_for_objects_dir(objects_dir: &Path) -> u8 {
let Some(gitdir) = objects_dir.parent() else {
return HASH_VERSION_SHA1;
};
let config_path = gitdir.join("config");
let Ok(text) = fs::read_to_string(&config_path) else {
return HASH_VERSION_SHA1;
};
let mut in_extensions = false;
for raw in text.lines() {
let line = raw.trim();
if line.starts_with('[') {
let section = line.trim_start_matches('[').trim_end_matches(']');
let name = section.split_whitespace().next().unwrap_or("");
in_extensions = name.eq_ignore_ascii_case("extensions");
continue;
}
if !in_extensions {
continue;
}
if let Some((key, value)) = line.split_once('=') {
if key.trim().eq_ignore_ascii_case("objectformat")
&& value.trim().eq_ignore_ascii_case("sha256")
{
return HASH_VERSION_SHA256;
}
}
}
HASH_VERSION_SHA1
}
pub fn resolve_tip_midx_path(pack_dir: &Path) -> Option<std::path::PathBuf> {
let root = pack_dir.join("multi-pack-index");
if root.exists() {
return Some(root);
}
let hashes = read_chain_layer_hashes(pack_dir).ok()?;
let last = hashes.last()?;
Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{last}.midx")))
}
pub fn resolve_midx_layer_path(pack_dir: &Path, checksum: &str) -> Option<std::path::PathBuf> {
let checksum = checksum.to_ascii_lowercase();
if let Ok(hashes) = read_chain_layer_hashes(pack_dir) {
if hashes.contains(&checksum) {
return Some(midx_d_dir(pack_dir).join(format!("multi-pack-index-{checksum}.midx")));
}
}
let root = pack_dir.join("multi-pack-index");
if root.exists() {
if let Ok(hex) = midx_checksum_hex_from_path(&root) {
if hex == checksum {
return Some(root);
}
}
}
None
}
fn load_midx_file(path: &Path) -> Result<Vec<u8>> {
let data = fs::read(path).map_err(Error::Io)?;
let _ = parse_midx_header(&data)?;
Ok(data)
}
fn oids_and_packs_from_midx_data(data: &[u8]) -> Result<(HashSet<ObjectId>, Vec<String>)> {
let (_, hdr_end, _) = parse_midx_header(data)?;
let (pn_off, pn_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
let (_ooff_off, ooff_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
let (oidl_off, oidl_len) = find_chunk(data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
let num_objects = ooff_len / 8;
if oidl_len != num_objects * 20 {
return Err(Error::CorruptObject(
"MIDX oid-lookup size mismatch".to_owned(),
));
}
let mut oids = HashSet::with_capacity(num_objects);
for i in 0..num_objects {
let start = oidl_off + i * 20;
let oid = ObjectId::from_bytes(&data[start..start + 20])?;
oids.insert(oid);
}
Ok((oids, pack_names))
}
fn collect_incremental_base(pack_dir: &Path) -> Result<(HashSet<ObjectId>, HashSet<String>)> {
let mut oids = HashSet::new();
let mut packs = HashSet::new();
let root = pack_dir.join("multi-pack-index");
let chain_path = chain_file_path(pack_dir);
if chain_path.exists() {
for h in read_chain_layer_hashes(pack_dir)? {
let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
let data = load_midx_file(&p)?;
let (layer_oids, names) = oids_and_packs_from_midx_data(&data)?;
oids.extend(layer_oids);
for n in names {
packs.insert(n);
}
}
return Ok((oids, packs));
}
if root.exists() {
let data = load_midx_file(&root)?;
let (o, names) = oids_and_packs_from_midx_data(&data)?;
oids = o;
for n in names {
packs.insert(n);
}
}
Ok((oids, packs))
}
fn midx_checksum_hex_from_path(path: &Path) -> Result<String> {
let data = fs::read(path).map_err(Error::Io)?;
if data.len() < 20 {
return Err(Error::CorruptObject(
"midx too small for checksum".to_owned(),
));
}
let hash = &data[data.len() - 20..];
Ok(hex::encode(hash))
}
fn hard_link_or_copy(src: &Path, dst: &Path) -> Result<()> {
let _ = fs::remove_file(dst);
if fs::hard_link(src, dst).is_ok() {
return Ok(());
}
fs::copy(src, dst).map_err(Error::Io)?;
Ok(())
}
fn link_root_midx_into_chain(pack_dir: &Path, root_checksum_hex: &str) -> Result<()> {
let midx_d = midx_d_dir(pack_dir);
fs::create_dir_all(&midx_d).map_err(Error::Io)?;
let dst_midx = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.midx"));
hard_link_or_copy(&pack_dir.join("multi-pack-index"), &dst_midx)?;
let exts = ["bitmap", "rev"];
for ext in exts {
let src = pack_dir.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
if src.exists() {
let dst = midx_d.join(format!("multi-pack-index-{root_checksum_hex}.{ext}"));
hard_link_or_copy(&src, &dst)?;
}
}
Ok(())
}
fn clear_stale_split_layers(pack_dir: &Path, keep: &[String]) -> Result<()> {
let midx_d = midx_d_dir(pack_dir);
if !midx_d.exists() {
return Ok(());
}
let keep: HashSet<&str> = keep.iter().map(|s| s.as_str()).collect();
for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
let ent = ent.map_err(Error::Io)?;
let name = ent.file_name().to_string_lossy().to_string();
let Some(rest) = name.strip_prefix("multi-pack-index-") else {
continue;
};
let Some((hash_part, _ext)) = rest.split_once('.') else {
continue;
};
if hash_part.len() == 40 && !keep.contains(hash_part) {
let _ = fs::remove_file(ent.path());
}
}
Ok(())
}
fn clear_incremental_midx_files(pack_dir: &Path) -> Result<()> {
let midx_d = midx_d_dir(pack_dir);
let _ = fs::remove_file(chain_file_path(pack_dir));
if !midx_d.exists() {
return Ok(());
}
for ent in fs::read_dir(&midx_d).map_err(Error::Io)? {
let ent = ent.map_err(Error::Io)?;
let name = ent.file_name().to_string_lossy().to_string();
if name.starts_with("multi-pack-index-")
&& (name.ends_with(".midx") || name.ends_with(".bitmap") || name.ends_with(".rev"))
{
let _ = fs::remove_file(ent.path());
}
}
Ok(())
}
fn pack_mtime_for_midx(idx: &PackIndex) -> std::time::SystemTime {
fs::metadata(&idx.pack_path)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH)
}
fn midx_pick_better_entry(
cur: &MidxEntry,
cand_pack: u32,
cand_offset: u64,
cand_mtime: std::time::SystemTime,
preferred_pack: Option<u32>,
) -> bool {
let cur_pref = preferred_pack == Some(cur.pack_id);
let new_pref = preferred_pack == Some(cand_pack);
if new_pref && !cur_pref {
return true;
}
if cur_pref && !new_pref {
return false;
}
match cand_mtime.cmp(&cur.pack_mtime) {
std::cmp::Ordering::Greater => true,
std::cmp::Ordering::Less => false,
std::cmp::Ordering::Equal => {
if cand_pack != cur.pack_id {
cand_pack < cur.pack_id
} else {
cand_offset < cur.offset
}
}
}
}
#[allow(clippy::too_many_arguments)]
fn build_midx_bytes_filtered(
idx_names: &[String],
indexes: &[PackIndex],
preferred_idx: Option<usize>,
write_bitmap_placeholders: bool,
omit_embedded_ridx_chunk: bool,
version: u8,
hash_version: u8,
exclude_oids: Option<&HashSet<ObjectId>>,
) -> Result<(Vec<u8>, Option<Vec<u32>>)> {
let preferred_pack_idx = preferred_idx.map(|p| p as u32);
let pack_mtimes: Vec<std::time::SystemTime> = indexes.iter().map(pack_mtime_for_midx).collect();
let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
for (pack_id, idx) in indexes.iter().enumerate() {
let pack_id = u32::try_from(pack_id).map_err(|_| {
Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
})?;
let mtime = pack_mtimes[pack_id as usize];
for e in &idx.entries {
if e.oid.len() != 20 {
continue;
}
let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
continue;
};
if let Some(ex) = exclude_oids {
if ex.contains(&oid) {
continue;
}
}
let cand = MidxEntry {
oid,
pack_id,
offset: e.offset,
pack_mtime: mtime,
};
match best.get(&oid) {
None => {
best.insert(oid, cand);
}
Some(cur) => {
if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_pack_idx) {
best.insert(oid, cand);
}
}
}
}
}
let mut entries: Vec<MidxEntry> = best.into_values().collect();
entries.sort_by_key(|a| a.oid);
let large_offsets_needed = entries.iter().any(|e| e.offset > u64::from(u32::MAX));
let num_packs = indexes.len() as u32;
let mut pack_names_blob = Vec::new();
for name in idx_names {
pack_names_blob.extend_from_slice(name.as_bytes());
pack_names_blob.push(0);
}
let pad = (MIDX_CHUNK_ALIGNMENT - (pack_names_blob.len() % MIDX_CHUNK_ALIGNMENT))
% MIDX_CHUNK_ALIGNMENT;
pack_names_blob.extend(std::iter::repeat_n(0u8, pad));
let chunk_pnam = pack_names_blob;
let mut chunk_oidf = vec![0u8; 256 * 4];
let mut j = 0usize;
for i in 0..256 {
while j < entries.len() && entries[j].oid.as_bytes()[0] <= i as u8 {
j += 1;
}
chunk_oidf[i * 4..(i + 1) * 4].copy_from_slice(&(j as u32).to_be_bytes());
}
let mut chunk_oidl = Vec::with_capacity(entries.len() * 20);
for e in &entries {
chunk_oidl.extend_from_slice(e.oid.as_bytes());
}
let mut large_offsets: Vec<u64> = Vec::new();
let mut chunk_ooff = Vec::with_capacity(entries.len() * 8);
for e in &entries {
chunk_ooff.extend_from_slice(&e.pack_id.to_be_bytes());
let encoded = if large_offsets_needed && e.offset >> 31 != 0 {
let slot = u32::try_from(large_offsets.len()).map_err(|_| {
Error::CorruptObject("too many large offsets in multi-pack-index".to_owned())
})?;
large_offsets.push(e.offset);
MIDX_LARGE_OFFSET_NEEDED | slot
} else {
e.offset as u32
};
chunk_ooff.extend_from_slice(&encoded.to_be_bytes());
}
let chunk_loff: Vec<u8> = if large_offsets.is_empty() {
Vec::new()
} else {
let mut v = Vec::with_capacity(large_offsets.len() * 8);
for off in &large_offsets {
v.extend_from_slice(&off.to_be_bytes());
}
v
};
let pref = preferred_pack_idx;
let mut order: Vec<u32> = (0..entries.len() as u32).collect();
order.sort_by(|&ai, &bi| {
let a = &entries[ai as usize];
let b = &entries[bi as usize];
let a_pref = pref == Some(a.pack_id);
let b_pref = pref == Some(b.pack_id);
b_pref
.cmp(&a_pref)
.then_with(|| a.pack_id.cmp(&b.pack_id))
.then_with(|| a.offset.cmp(&b.offset))
.then_with(|| ai.cmp(&bi))
});
let mut chunk_ridx = Vec::with_capacity(entries.len() * 4);
for oid_idx in &order {
chunk_ridx.extend_from_slice(&oid_idx.to_be_bytes());
}
let rev_sidecar_order = if omit_embedded_ridx_chunk && write_bitmap_placeholders {
Some(order.clone())
} else {
None
};
let chunk_btmp: Vec<u8> = if write_bitmap_placeholders {
let num_packs_usize = indexes.len();
let mut bitmap_pos = vec![u32::MAX; num_packs_usize];
let mut bitmap_nr = vec![0u32; num_packs_usize];
for (rank, &oid_idx) in order.iter().enumerate() {
let pack = entries[oid_idx as usize].pack_id as usize;
if let Some(p) = bitmap_pos.get_mut(pack) {
if *p == u32::MAX {
*p = rank as u32;
}
}
if let Some(n) = bitmap_nr.get_mut(pack) {
*n += 1;
}
}
let mut v = Vec::new();
for pack in 0..num_packs_usize {
let pos = if bitmap_pos[pack] == u32::MAX {
0
} else {
bitmap_pos[pack]
};
v.extend_from_slice(&pos.to_be_bytes());
v.extend_from_slice(&bitmap_nr[pack].to_be_bytes());
}
let pad = (MIDX_CHUNK_ALIGNMENT - (v.len() % MIDX_CHUNK_ALIGNMENT)) % MIDX_CHUNK_ALIGNMENT;
v.extend(std::iter::repeat_n(0u8, pad));
v
} else {
Vec::new()
};
let mut chunks: Vec<(u32, Vec<u8>)> = vec![
(MIDX_CHUNKID_PACKNAMES, chunk_pnam),
(MIDX_CHUNKID_OIDFANOUT, chunk_oidf),
(MIDX_CHUNKID_OIDLOOKUP, chunk_oidl),
(MIDX_CHUNKID_OBJECTOFFSETS, chunk_ooff),
];
if !chunk_loff.is_empty() {
chunks.push((MIDX_CHUNKID_LARGEOFFSETS, chunk_loff));
}
if (pref.is_some() || write_bitmap_placeholders) && !omit_embedded_ridx_chunk {
chunks.push((MIDX_CHUNKID_REVINDEX, chunk_ridx));
}
if write_bitmap_placeholders {
chunks.push((MIDX_CHUNKID_BITMAPPED_PACKS, chunk_btmp));
}
let num_chunks: u8 = chunks
.len()
.try_into()
.map_err(|_| Error::CorruptObject("too many MIDX chunks".to_owned()))?;
let mut body = Vec::new();
let mut cur_offset =
MIDX_HEADER_SIZE as u64 + ((chunks.len() + 1) * CHUNK_TOC_ENTRY_SIZE) as u64;
for (id, data) in &chunks {
body.extend_from_slice(&id.to_be_bytes());
body.extend_from_slice(&cur_offset.to_be_bytes());
cur_offset += data.len() as u64;
}
body.extend_from_slice(&0u32.to_be_bytes());
body.extend_from_slice(&cur_offset.to_be_bytes());
for (_, data) in &chunks {
body.extend_from_slice(data);
}
let mut out = Vec::with_capacity(MIDX_HEADER_SIZE + body.len() + 20);
out.extend_from_slice(&MIDX_SIGNATURE.to_be_bytes());
out.push(if version == MIDX_VERSION_V1 {
MIDX_VERSION_V1
} else {
MIDX_VERSION_V2
});
out.push(hash_version);
out.push(num_chunks);
out.push(0);
out.extend_from_slice(&num_packs.to_be_bytes());
out.extend_from_slice(&body);
let mut hasher = Sha1::new();
hasher.update(&out);
let hash = hasher.finalize();
out.extend_from_slice(&hash);
Ok((out, rev_sidecar_order))
}
fn write_midx_rev_sidecar(
path: &Path,
pack_order: &[u32],
midx_file_hash: &[u8; 20],
) -> Result<()> {
let mut body = Vec::with_capacity(RIDX_HEADER_SIZE + pack_order.len() * 4 + 20);
body.extend_from_slice(&RIDX_SIGNATURE.to_be_bytes());
body.extend_from_slice(&RIDX_VERSION.to_be_bytes());
body.extend_from_slice(&1u32.to_be_bytes());
for idx in pack_order {
body.extend_from_slice(&idx.to_be_bytes());
}
body.extend_from_slice(midx_file_hash);
fs::write(path, body).map_err(Error::Io)
}
fn find_chunk(data: &[u8], header_end: usize, chunk_id: u32) -> Result<(usize, usize)> {
let (hdr, _, _) = parse_midx_header(data)?;
let n = hdr.num_chunks as usize;
let pos = header_end;
let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
if data.len() < toc_end + 20 {
return Err(Error::CorruptObject(
"truncated MIDX chunk table".to_owned(),
));
}
for i in 0..n {
let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
let id = read_be_u32(data, base)?;
let off = read_be_u64(data, base + 4)? as usize;
if id == chunk_id {
let next_off = if i + 1 < n {
let nb = pos + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
read_be_u64(data, nb + 4)? as usize
} else {
let term = pos + n * CHUNK_TOC_ENTRY_SIZE;
read_be_u64(data, term + 4)? as usize
};
return Ok((off, next_off.saturating_sub(off)));
}
}
Err(Error::CorruptObject(format!(
"MIDX chunk {chunk_id:08x} not found"
)))
}
#[derive(Debug, Clone)]
pub struct MidxLoadError(pub String);
impl std::fmt::Display for MidxLoadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
struct TocEntry {
id: u32,
offset: usize,
}
fn parse_midx_toc(
data: &[u8],
hash_len: usize,
errors: &mut Vec<String>,
) -> std::result::Result<Vec<TocEntry>, MidxLoadError> {
if data.len() < MIDX_HEADER_SIZE + hash_len {
return Err(MidxLoadError("multi-pack-index file too small".to_owned()));
}
let num_chunks = data[6] as usize;
let toc_off = MIDX_HEADER_SIZE;
let needed = toc_off + (num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE;
if data.len() < needed {
return Err(MidxLoadError(
"multi-pack-index chunk table is truncated".to_owned(),
));
}
let file_size = data.len();
let mut chunks: Vec<TocEntry> = Vec::with_capacity(num_chunks);
let read_be64 = |off: usize| -> u64 {
let mut b = [0u8; 8];
b.copy_from_slice(&data[off..off + 8]);
u64::from_be_bytes(b)
};
let read_be32 = |off: usize| -> u32 {
let mut b = [0u8; 4];
b.copy_from_slice(&data[off..off + 4]);
u32::from_be_bytes(b)
};
for i in 0..num_chunks {
let entry = toc_off + i * CHUNK_TOC_ENTRY_SIZE;
let chunk_id = read_be32(entry);
let chunk_offset = read_be64(entry + 4);
if chunk_id == 0 {
errors.push("terminating chunk id appears earlier than expected".to_owned());
return Err(MidxLoadError(
"multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
));
}
if !(chunk_offset as usize).is_multiple_of(MIDX_CHUNK_ALIGNMENT) {
errors.push(format!(
"chunk id {chunk_id:x} not {MIDX_CHUNK_ALIGNMENT}-byte aligned"
));
return Err(MidxLoadError(
"multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
));
}
let next_entry = toc_off + (i + 1) * CHUNK_TOC_ENTRY_SIZE;
let next_chunk_offset = read_be64(next_entry + 4);
if next_chunk_offset < chunk_offset
|| next_chunk_offset > (file_size as u64).saturating_sub(hash_len as u64)
{
errors.push(format!(
"improper chunk offset(s) {chunk_offset:x} and {next_chunk_offset:x}"
));
return Err(MidxLoadError(
"multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
));
}
if chunks.iter().any(|c| c.id == chunk_id) {
errors.push(format!("duplicate chunk ID {chunk_id:x} found"));
return Err(MidxLoadError(
"multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
));
}
chunks.push(TocEntry {
id: chunk_id,
offset: chunk_offset as usize,
});
}
let term_entry = toc_off + num_chunks * CHUNK_TOC_ENTRY_SIZE;
let final_id = read_be32(term_entry);
if final_id != 0 {
errors.push(format!("final chunk has non-zero id {final_id:x}"));
return Err(MidxLoadError(
"multi-pack-index required pack-name chunk missing or corrupted".to_owned(),
));
}
Ok(chunks)
}
fn toc_chunk_range(chunks: &[TocEntry], data_len: usize, id: u32) -> Option<(usize, usize)> {
for (i, c) in chunks.iter().enumerate() {
if c.id == id {
let next = if i + 1 < chunks.len() {
chunks[i + 1].offset
} else {
data_len.saturating_sub(20)
};
return Some((c.offset, next.saturating_sub(c.offset)));
}
}
None
}
pub fn verify_midx(objects_dir: &Path) -> std::result::Result<(), Vec<String>> {
let pack_dir = objects_dir.join("pack");
let path = match resolve_tip_midx_path(&pack_dir) {
Some(p) => p,
None => return Ok(()),
};
let data = match fs::read(&path) {
Ok(d) => d,
Err(_) => return Ok(()),
};
let mut fatal: Vec<String> = Vec::new();
let mut errors: Vec<String> = Vec::new();
if data.len() < MIDX_HEADER_SIZE + 20 {
return Err(vec!["multi-pack-index file is too small".to_owned()]);
}
let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
if sig != MIDX_SIGNATURE {
return Err(vec![format!(
"multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
)]);
}
let version = data[4];
if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
return Err(vec![format!(
"multi-pack-index version {version} not recognized"
)]);
}
let hash_version = data[5];
let expected_hash_version = repo_midx_hash_version_for_objects_dir(objects_dir);
if hash_version != expected_hash_version {
return Err(vec![format!(
"multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
)]);
}
let hash_len = 20usize;
let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
let chunks = match parse_midx_toc(&data, hash_len, &mut errors) {
Ok(c) => c,
Err(e) => {
errors.push(e.0);
return Err(errors);
}
};
let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
else {
errors.push("multi-pack-index required pack-name chunk missing or corrupted".to_owned());
return Err(errors);
};
let Some((fan_off, fan_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
else {
errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
return Err(errors);
};
if fan_len != 256 * 4 {
errors.push("multi-pack-index OID fanout is of the wrong size".to_owned());
errors.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
return Err(errors);
}
let fanout = |i: usize| -> u32 {
let b = fan_off + i * 4;
u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
};
for i in 0..255 {
let f1 = fanout(i);
let f2 = fanout(i + 1);
if f1 > f2 {
errors.push(format!(
"oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
i + 1
));
errors
.push("multi-pack-index required OID fanout chunk missing or corrupted".to_owned());
return Err(errors);
}
}
let num_objects = fanout(255) as usize;
let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
else {
errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
return Err(errors);
};
if oidl_len != hash_len * num_objects {
errors.push("multi-pack-index OID lookup chunk is the wrong size".to_owned());
errors.push("multi-pack-index required OID lookup chunk missing or corrupted".to_owned());
return Err(errors);
}
let Some((ooff_off, ooff_len)) =
toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
else {
errors
.push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
return Err(errors);
};
if ooff_len != num_objects * 8 {
errors.push("multi-pack-index object offset chunk is the wrong size".to_owned());
errors
.push("multi-pack-index required object offsets chunk missing or corrupted".to_owned());
return Err(errors);
}
let large_off = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
let names = match parse_pack_names_blob(&data[pn_off..pn_off + pn_len]) {
Ok(n) => n,
Err(_) => {
errors.push("multi-pack-index pack-name chunk is too short".to_owned());
return Err(errors);
}
};
if version == MIDX_VERSION_V1 {
for i in 1..names.len() {
if names[i] <= names[i - 1] {
fatal.push(format!(
"multi-pack-index pack names out of order: '{}' before '{}'",
names[i - 1],
names[i]
));
errors.extend(fatal);
return Err(errors);
}
}
}
if !midx_checksum_is_valid(&data) {
errors.push("incorrect checksum".to_owned());
}
let mut pack_indexes: Vec<Option<PackIndex>> = Vec::with_capacity(num_packs);
for i in 0..num_packs {
let loaded = match names.get(i) {
Some(name) => read_pack_index_no_verify(&pack_dir.join(name)).ok(),
None => None,
};
if loaded.is_none() {
errors.push(format!("failed to load pack in position {i}"));
}
pack_indexes.push(loaded);
}
if num_objects == 0 {
errors.push("the midx contains no oid".to_owned());
if errors.is_empty() {
return Ok(());
}
return Err(errors);
}
let oid_at =
|i: usize| -> &[u8] { &data[oidl_off + i * hash_len..oidl_off + (i + 1) * hash_len] };
for i in 0..num_objects.saturating_sub(1) {
let a = oid_at(i);
let b = oid_at(i + 1);
if a >= b {
errors.push(format!(
"oid lookup out of order: oid[{i}] = {} >= {} = oid[{}]",
hex::encode(a),
hex::encode(b),
i + 1
));
}
}
for i in 0..num_objects {
let ob = ooff_off + i * 8;
let pack_int_id = u32::from_be_bytes([data[ob], data[ob + 1], data[ob + 2], data[ob + 3]]);
let off_raw = u32::from_be_bytes([data[ob + 4], data[ob + 5], data[ob + 6], data[ob + 7]]);
let oid_hex = hex::encode(oid_at(i));
if pack_int_id as usize >= num_packs {
errors.push(format!(
"bad pack-int-id: {pack_int_id} ({num_packs} total packs)"
));
errors.push(format!(
"failed to load pack entry for oid[{i}] = {oid_hex}"
));
continue;
}
let m_offset: u64 = if off_raw & MIDX_LARGE_OFFSET_NEEDED != 0 {
let slot = (off_raw & !MIDX_LARGE_OFFSET_NEEDED) as usize;
match large_off {
Some((lo_off, lo_len)) if (slot + 1) * 8 <= lo_len => {
let b = lo_off + slot * 8;
let mut arr = [0u8; 8];
arr.copy_from_slice(&data[b..b + 8]);
u64::from_be_bytes(arr)
}
_ => {
errors.push("multi-pack-index large offset out of bounds".to_owned());
continue;
}
}
} else {
u64::from(off_raw)
};
let Some(Some(idx)) = pack_indexes.get(pack_int_id as usize) else {
errors.push(format!(
"failed to load pack entry for oid[{i}] = {oid_hex}"
));
continue;
};
let Ok(oid) = ObjectId::from_bytes(oid_at(i)) else {
errors.push(format!(
"failed to load pack entry for oid[{i}] = {oid_hex}"
));
continue;
};
match idx.find_offset(&oid) {
Some(p_offset) => {
if m_offset != p_offset {
errors.push(format!(
"incorrect object offset for oid[{i}] = {oid_hex}: {m_offset:x} != {p_offset:x}"
));
}
}
None => {
errors.push(format!(
"failed to load pack entry for oid[{i}] = {oid_hex}"
));
}
}
}
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
fn midx_checksum_is_valid(data: &[u8]) -> bool {
if data.len() < 20 {
return false;
}
let body = &data[..data.len() - 20];
let stored = &data[data.len() - 20..];
let mut hasher = Sha1::new();
hasher.update(body);
let got = hasher.finalize();
got.as_slice() == stored
}
pub fn read_midx_pack_idx_names(objects_dir: &Path) -> Result<Vec<String>> {
let pack_dir = objects_dir.join("pack");
let path = resolve_tip_midx_path(&pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
}
pub struct MidxObjectRef {
pub oid: ObjectId,
pub pack_int_id: usize,
}
pub fn read_midx_objects(objects_dir: &Path) -> Result<(Vec<String>, Vec<MidxObjectRef>)> {
let pack_dir = objects_dir.join("pack");
let path = resolve_tip_midx_path(&pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
return Err(Error::CorruptObject(
"bad MIDX oid-lookup / object-offsets size".to_owned(),
));
}
let num = oidl_len / 20;
if num * 8 != ooff_len {
return Err(Error::CorruptObject(
"MIDX oid count does not match object-offsets".to_owned(),
));
}
let mut objects = Vec::with_capacity(num);
for i in 0..num {
let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
.map_err(|e| Error::CorruptObject(e.to_string()))?;
let base = ooff_off + i * 8;
let pack_id = read_be_u32(&data, base)? as usize;
objects.push(MidxObjectRef {
oid,
pack_int_id: pack_id,
});
}
Ok((names, objects))
}
pub fn midx_checksum_hex(objects_dir: &Path) -> Result<String> {
let pack_dir = objects_dir.join("pack");
let path = resolve_tip_midx_path(&pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
midx_checksum_hex_from_path(&path)
}
fn resolve_read_midx_path(pack_dir: &Path, checksum: Option<&str>) -> Result<std::path::PathBuf> {
match checksum {
Some(cs) => resolve_midx_layer_path(pack_dir, cs)
.ok_or_else(|| Error::CorruptObject(format!("could not find MIDX with checksum {cs}"))),
None => resolve_tip_midx_path(pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned())),
}
}
pub fn format_midx_show_objects(objects_dir: &Path) -> Result<String> {
format_midx_show_objects_layer(objects_dir, None)
}
pub fn format_midx_show_objects_layer(
objects_dir: &Path,
checksum: Option<&str>,
) -> Result<String> {
let mut out = format_midx_dump_layer(objects_dir, checksum)?;
let pack_dir = objects_dir.join("pack");
let path = resolve_read_midx_path(&pack_dir, checksum)?;
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
let (oidl_off, oidl_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
if oidl_len % 20 != 0 || ooff_len % 8 != 0 {
return Err(Error::CorruptObject(
"bad MIDX oid-lookup / object-offsets size".to_owned(),
));
}
let num = oidl_len / 20;
if num * 8 != ooff_len {
return Err(Error::CorruptObject(
"MIDX oid count does not match object-offsets".to_owned(),
));
}
for i in 0..num {
let oid = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])
.map_err(|e| Error::CorruptObject(e.to_string()))?;
let base = ooff_off + i * 8;
let pack_id = read_be_u32(&data, base)? as usize;
let offset = u64::from(read_be_u32(&data, base + 4)?);
let idx_name = names
.get(pack_id)
.ok_or_else(|| Error::CorruptObject("pack id out of range in MIDX".to_owned()))?;
let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
let dir_disp = objects_dir.display().to_string();
let dir_disp = if objects_dir.is_absolute() || dir_disp.starts_with("./") {
dir_disp
} else {
format!("./{dir_disp}")
};
out.push_str(&format!(
"{} {}\t{}/pack/{}.pack\n",
oid.to_hex(),
offset,
dir_disp,
stem
));
}
Ok(out)
}
pub fn format_midx_dump(objects_dir: &Path) -> Result<String> {
format_midx_dump_layer(objects_dir, None)
}
pub fn format_midx_dump_layer(objects_dir: &Path, checksum: Option<&str>) -> Result<String> {
let pack_dir = objects_dir.join("pack");
let path = resolve_read_midx_path(&pack_dir, checksum)?;
let data = fs::read(&path).map_err(Error::Io)?;
let (hdr, hdr_end, _) = parse_midx_header(&data)?;
let sig = read_be_u32(&data, 0)?;
let version = data[4];
let hash_len: u8 = match data[5] {
1 => 20,
2 => 32,
other => other,
};
let num_chunks = hdr.num_chunks;
let num_packs = read_be_u32(&data, 8)?;
let mut chunk_tags: Vec<&'static str> = Vec::new();
let n = num_chunks as usize;
let pos = hdr_end;
let toc_end = pos + (n + 1) * CHUNK_TOC_ENTRY_SIZE;
if data.len() < toc_end + 20 {
return Err(Error::CorruptObject(
"truncated MIDX chunk table".to_owned(),
));
}
for i in 0..n {
let base = pos + i * CHUNK_TOC_ENTRY_SIZE;
let id = read_be_u32(&data, base)?;
let tag = match id {
x if x == MIDX_CHUNKID_PACKNAMES => "pack-names",
x if x == MIDX_CHUNKID_OIDFANOUT => "oid-fanout",
x if x == MIDX_CHUNKID_OIDLOOKUP => "oid-lookup",
x if x == MIDX_CHUNKID_OBJECTOFFSETS => "object-offsets",
x if x == MIDX_CHUNKID_LARGEOFFSETS => "large-offsets",
x if x == MIDX_CHUNKID_REVINDEX => "revindex",
x if x == 0x4254_4d50 => "bitmapped-packs",
_ => "unknown",
};
chunk_tags.push(tag);
}
let (_ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
let num_objects = ooff_len / 8;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
let pack_names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
let mut out = String::new();
out.push_str(&format!(
"header: {:08x} {} {} {} {}\n",
sig, version, hash_len, num_chunks, num_packs
));
out.push_str("chunks:");
for t in &chunk_tags {
out.push(' ');
out.push_str(t);
}
out.push('\n');
out.push_str(&format!("num_objects: {num_objects}\n"));
out.push_str("packs:\n");
for n in &pack_names {
out.push_str(n);
out.push('\n');
}
out.push_str(&format!("object-dir: {}\n", objects_dir.display()));
Ok(out)
}
#[derive(Debug, Clone)]
pub struct MidxReuseTables {
pub oids: Vec<ObjectId>,
pub pack_and_offset: Vec<(u32, u64)>,
pub rid_order: Vec<u32>,
pub oid_idx_to_rank: Vec<u32>,
}
pub fn load_midx_reuse_tables(objects_dir: &Path) -> Result<Option<MidxReuseTables>> {
let pack_dir = objects_dir.join("pack");
let Some(path) = resolve_tip_midx_path(&pack_dir) else {
return Ok(None);
};
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
let Ok((ridx_off, ridx_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) else {
return Ok(None);
};
if oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
return Err(Error::CorruptObject(
"MIDX OID / offset chunk size mismatch".to_owned(),
));
}
let num_objects = oid_l_len / 20;
if ridx_len != num_objects.saturating_mul(4) {
return Err(Error::CorruptObject(
"MIDX reverse index length does not match object count".to_owned(),
));
}
if num_objects == 0 {
return Ok(None);
}
let mut oids = Vec::with_capacity(num_objects);
for i in 0..num_objects {
let base = oidl_off + i * 20;
oids.push(ObjectId::from_bytes(&data[base..base + 20])?);
}
let mut pack_and_offset = Vec::with_capacity(num_objects);
for i in 0..num_objects {
let ob = ooff_off + i * 8;
let pack_id = read_be_u32(&data, ob)?;
let off32 = read_be_u32(&data, ob + 4)?;
pack_and_offset.push((pack_id, u64::from(off32)));
}
let mut rid_order = Vec::with_capacity(num_objects);
for i in 0..num_objects {
let base = ridx_off + i * 4;
rid_order.push(read_be_u32(&data, base)?);
}
let mut oid_idx_to_rank = vec![0u32; num_objects];
for (rank, &oid_idx) in rid_order.iter().enumerate() {
let idx = usize::try_from(oid_idx)
.map_err(|_| Error::CorruptObject("bad MIDX reverse index entry".to_owned()))?;
if idx >= num_objects {
return Err(Error::CorruptObject(
"MIDX reverse index out of range".to_owned(),
));
}
oid_idx_to_rank[idx] = u32::try_from(rank)
.map_err(|_| Error::CorruptObject("too many MIDX objects".to_owned()))?;
}
Ok(Some(MidxReuseTables {
oids,
pack_and_offset,
rid_order,
oid_idx_to_rank,
}))
}
impl MidxReuseTables {
#[must_use]
pub fn global_bitmap_bit(&self, oid: &ObjectId) -> Option<u32> {
let oid_idx = self.oids.binary_search(oid).ok()?;
Some(self.oid_idx_to_rank[oid_idx])
}
#[must_use]
pub fn canonical_pack(&self, oid: &ObjectId) -> Option<u32> {
let oid_idx = self.oids.binary_search(oid).ok()?;
Some(self.pack_and_offset[oid_idx].0)
}
}
#[derive(Debug, Clone, Copy)]
pub struct MidxBtmpPackRange {
pub pack_id: u32,
pub bitmap_pos: u32,
pub bitmap_nr: u32,
}
pub fn read_midx_btmp_ranges(objects_dir: &Path) -> Result<Vec<MidxBtmpPackRange>> {
let pack_dir = objects_dir.join("pack");
let Some(path) = resolve_tip_midx_path(&pack_dir) else {
return Ok(Vec::new());
};
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
return Ok(Vec::new());
};
if btmp_len == 0 || btmp_len % 8 != 0 {
return Err(Error::CorruptObject(
"invalid MIDX BTMP chunk length".to_owned(),
));
}
let num_packs = read_be_u32(&data, 8)?;
let n_entries = btmp_len / 8;
if u32::try_from(n_entries).ok() != Some(num_packs) {
return Err(Error::CorruptObject(
"MIDX BTMP entry count does not match num_packs".to_owned(),
));
}
let mut out = Vec::with_capacity(n_entries);
for i in 0..n_entries {
let base = btmp_off + i * 8;
let bitmap_pos = read_be_u32(&data, base)?;
let bitmap_nr = read_be_u32(&data, base + 4)?;
out.push(MidxBtmpPackRange {
pack_id: u32::try_from(i)
.map_err(|_| Error::CorruptObject("too many packs in MIDX BTMP".to_owned()))?,
bitmap_pos,
bitmap_nr,
});
}
Ok(out)
}
pub fn format_midx_bitmapped_packs(objects_dir: &Path) -> Result<String> {
let pack_dir = objects_dir.join("pack");
let path = resolve_tip_midx_path(&pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
let Ok((btmp_off, btmp_len)) = find_chunk(&data, hdr_end, MIDX_CHUNKID_BITMAPPED_PACKS) else {
return Err(Error::CorruptObject(
"MIDX does not contain the BTMP chunk".to_owned(),
));
};
let n_entries = btmp_len / 8;
let mut out = String::new();
for i in 0..n_entries {
let base = btmp_off + i * 8;
let bitmap_pos = read_be_u32(&data, base)?;
let bitmap_nr = read_be_u32(&data, base + 4)?;
let idx_name = names.get(i).ok_or_else(|| {
Error::CorruptObject("BTMP entry has no corresponding pack name".to_owned())
})?;
let stem = idx_name.strip_suffix(".idx").unwrap_or(idx_name);
out.push_str(&format!("{stem}.pack\n"));
out.push_str(&format!(" bitmap_pos: {bitmap_pos}\n"));
out.push_str(&format!(" bitmap_nr: {bitmap_nr}\n"));
}
Ok(out)
}
pub fn midx_lookup_pack_and_offset(objects_dir: &Path, oid: &ObjectId) -> Result<(u32, u64)> {
let pack_dir = objects_dir.join("pack");
let path = resolve_tip_midx_path(&pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (fanout_off, fanout_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDFANOUT)?;
let (oidl_off, oid_l_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OIDLOOKUP)?;
let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
if fanout_len != 256 * 4 || oid_l_len % 20 != 0 || ooff_len != oid_l_len / 20 * 8 {
return Err(Error::CorruptObject("truncated MIDX OID chunks".to_owned()));
}
let num_objects = oid_l_len / 20;
let first = oid.as_bytes()[0] as usize;
let j0 = if first == 0 {
0usize
} else {
read_be_u32(&data, fanout_off + (first - 1) * 4)? as usize
};
let j1 = read_be_u32(&data, fanout_off + first * 4)? as usize;
let mut lo = j0;
let mut hi = j1;
while lo < hi {
let mid = (lo + hi) / 2;
let base = oidl_off + mid * 20;
let cmp = data[base..base + 20].cmp(oid.as_bytes());
if cmp == std::cmp::Ordering::Less {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo >= num_objects {
return Err(Error::CorruptObject(format!(
"object {} not in multi-pack-index",
oid.to_hex()
)));
}
let base = oidl_off + lo * 20;
if data[base..base + 20] != *oid.as_bytes() {
return Err(Error::CorruptObject(format!(
"object {} not in multi-pack-index",
oid.to_hex()
)));
}
let ob = ooff_off + lo * 8;
let pack_id = read_be_u32(&data, ob)?;
let off32 = read_be_u32(&data, ob + 4)?;
Ok((pack_id, u64::from(off32)))
}
pub fn midx_oid_listed_in_tip(objects_dir: &Path, oid: &ObjectId) -> Result<Option<bool>> {
let pack_dir = objects_dir.join("pack");
let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
return Ok(None);
};
let data = fs::read(&midx_path).map_err(Error::Io)?;
let MidxReadView {
oidf_off,
oidl_off,
num_objects,
..
} = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
MidxLoadResult::Ok(v) => v,
MidxLoadResult::Skip => return Ok(None),
};
let first = oid.as_bytes()[0] as usize;
let lo = if first == 0 {
0u32
} else {
read_be_u32(&data, oidf_off + (first - 1) * 4)?
};
let hi = read_be_u32(&data, oidf_off + first * 4)?;
let mut i = lo as usize;
while i < hi as usize && i < num_objects {
let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
match o.cmp(oid) {
std::cmp::Ordering::Equal => return Ok(Some(true)),
std::cmp::Ordering::Greater => return Ok(Some(false)),
std::cmp::Ordering::Less => i += 1,
}
}
Ok(Some(false))
}
struct MidxReadView {
oidf_off: usize,
oidl_off: usize,
ooff_off: usize,
loff: Option<(usize, usize)>,
num_objects: usize,
pack_names: Vec<String>,
}
enum MidxLoadResult {
Ok(MidxReadView),
Skip,
}
fn midx_warn_once(line: &str) {
use std::sync::Mutex;
use std::sync::OnceLock;
static SEEN: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
let seen = SEEN.get_or_init(|| Mutex::new(HashSet::new()));
if let Ok(mut set) = seen.lock() {
if set.insert(line.to_string()) {
eprintln!("{line}");
}
} else {
eprintln!("{line}");
}
}
fn midx_die(lines: &[&str]) -> ! {
use std::io::Write;
let mut err = std::io::stderr().lock();
let n = lines.len();
for (i, l) in lines.iter().enumerate() {
if i + 1 == n {
let _ = writeln!(err, "fatal: {l}");
} else {
let _ = writeln!(err, "error: {l}");
}
}
let _ = err.flush();
std::process::exit(128);
}
fn midx_load_for_read(data: &[u8], expected_hash_version: u8) -> MidxLoadResult {
if data.len() < MIDX_HEADER_SIZE + 20 {
return MidxLoadResult::Skip;
}
let sig = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
if sig != MIDX_SIGNATURE {
midx_die(&[&format!(
"multi-pack-index signature 0x{sig:08x} does not match signature 0x{MIDX_SIGNATURE:08x}"
)]);
}
let version = data[4];
if version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2 {
midx_die(&[&format!(
"multi-pack-index version {version} not recognized"
)]);
}
let hash_version = data[5];
if hash_version != expected_hash_version {
midx_warn_once(&format!(
"error: multi-pack-index hash version {hash_version} does not match version {expected_hash_version}"
));
return MidxLoadResult::Skip;
}
let hash_len = 20usize;
let num_packs = u32::from_be_bytes([data[8], data[9], data[10], data[11]]) as usize;
let mut toc_errors: Vec<String> = Vec::new();
let chunks = match parse_midx_toc(data, hash_len, &mut toc_errors) {
Ok(c) => c,
Err(_) => {
for e in &toc_errors {
midx_warn_once(&format!("error: {e}"));
}
return MidxLoadResult::Skip;
}
};
let Some((pn_off, pn_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_PACKNAMES)
else {
midx_die(&["multi-pack-index required pack-name chunk missing or corrupted"]);
};
let Some((oidf_off, oidf_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDFANOUT)
else {
midx_die(&["multi-pack-index required OID fanout chunk missing or corrupted"]);
};
if oidf_len != 256 * 4 {
midx_die(&[
"multi-pack-index OID fanout is of the wrong size",
"multi-pack-index required OID fanout chunk missing or corrupted",
]);
}
let fanout = |i: usize| -> u32 {
let b = oidf_off + i * 4;
u32::from_be_bytes([data[b], data[b + 1], data[b + 2], data[b + 3]])
};
for i in 0..255 {
let f1 = fanout(i);
let f2 = fanout(i + 1);
if f1 > f2 {
midx_die(&[
&format!(
"oid fanout out of order: fanout[{i}] = {f1:x} > {f2:x} = fanout[{}]",
i + 1
),
"multi-pack-index required OID fanout chunk missing or corrupted",
]);
}
}
let num_objects = fanout(255) as usize;
let Some((oidl_off, oidl_len)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OIDLOOKUP)
else {
midx_die(&["multi-pack-index required OID lookup chunk missing or corrupted"]);
};
if oidl_len != hash_len * num_objects {
midx_die(&[
"multi-pack-index OID lookup chunk is the wrong size",
"multi-pack-index required OID lookup chunk missing or corrupted",
]);
}
let Some((ooff_off, ooff_len)) =
toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_OBJECTOFFSETS)
else {
midx_die(&["multi-pack-index required object offsets chunk missing or corrupted"]);
};
if ooff_len != num_objects * 8 {
midx_die(&[
"multi-pack-index object offset chunk is the wrong size",
"multi-pack-index required object offsets chunk missing or corrupted",
]);
}
let loff = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_LARGEOFFSETS);
if let Some((_, rlen)) = toc_chunk_range(&chunks, data.len(), MIDX_CHUNKID_REVINDEX) {
if rlen != num_objects * 4 {
midx_warn_once("error: multi-pack-index reverse-index chunk is the wrong size");
midx_warn_once("warning: multi-pack bitmap is missing required reverse index");
}
}
let mut pack_names: Vec<String> = Vec::with_capacity(num_packs);
let blob = &data[pn_off..pn_off + pn_len];
let mut start = 0usize;
for _ in 0..num_packs {
let Some(rel) = blob[start..].iter().position(|&b| b == 0) else {
midx_die(&["multi-pack-index pack-name chunk is too short"]);
};
let name = match std::str::from_utf8(&blob[start..start + rel]) {
Ok(s) => s.to_string(),
Err(_) => midx_die(&["multi-pack-index pack-name chunk is too short"]),
};
if version == MIDX_VERSION_V1
&& !pack_names.is_empty()
&& name.as_str() <= pack_names.last().map(|s| s.as_str()).unwrap_or("")
{
midx_die(&[&format!(
"multi-pack-index pack names out of order: '{}' before '{name}'",
pack_names.last().cloned().unwrap_or_default()
)]);
}
pack_names.push(name);
start += rel + 1;
}
MidxLoadResult::Ok(MidxReadView {
oidf_off,
oidl_off,
ooff_off,
loff,
num_objects,
pack_names,
})
}
pub fn validate_midx_referenced_packs(objects_dir: &Path) {
use std::sync::Mutex;
use std::sync::OnceLock;
static DONE: OnceLock<Mutex<HashSet<std::path::PathBuf>>> = OnceLock::new();
let done = DONE.get_or_init(|| Mutex::new(HashSet::new()));
if let Ok(mut set) = done.lock() {
if !set.insert(objects_dir.to_path_buf()) {
return;
}
}
let pack_dir = objects_dir.join("pack");
let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
return;
};
let Ok(data) = fs::read(&midx_path) else {
return;
};
let MidxReadView { pack_names, .. } =
match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
MidxLoadResult::Ok(v) => v,
MidxLoadResult::Skip => return,
};
for idx_name in &pack_names {
let idx_path = pack_dir.join(idx_name);
if !idx_path.exists() {
continue;
}
if crate::pack::read_pack_index_no_verify(&idx_path).is_err() {
let mut pack_path = idx_path.clone();
pack_path.set_extension("pack");
midx_warn_once(&format!(
"error: packfile {} index unavailable",
pack_path.display()
));
}
}
}
pub fn try_read_object_via_midx(
objects_dir: &Path,
oid: &ObjectId,
) -> Result<Option<crate::objects::Object>> {
let pack_dir = objects_dir.join("pack");
let Some(midx_path) = resolve_tip_midx_path(&pack_dir) else {
return Ok(None);
};
let data = fs::read(&midx_path).map_err(Error::Io)?;
let MidxReadView {
oidf_off,
oidl_off,
ooff_off,
loff,
num_objects,
pack_names,
} = match midx_load_for_read(&data, repo_midx_hash_version_for_objects_dir(objects_dir)) {
MidxLoadResult::Ok(v) => v,
MidxLoadResult::Skip => return Ok(None),
};
let first = oid.as_bytes()[0] as usize;
let lo = if first == 0 {
0u32
} else {
read_be_u32(&data, oidf_off + (first - 1) * 4)?
};
let hi = read_be_u32(&data, oidf_off + first * 4)?;
let mut pos = None;
let mut i = lo as usize;
while i < hi as usize && i < num_objects {
let o = ObjectId::from_bytes(&data[oidl_off + i * 20..oidl_off + (i + 1) * 20])?;
let c = o.cmp(oid);
if c == std::cmp::Ordering::Equal {
pos = Some(i);
break;
}
if c == std::cmp::Ordering::Greater {
break;
}
i += 1;
}
let Some(pos) = pos else {
return Ok(None);
};
let obase = ooff_off + pos * 8;
let pack_id = read_be_u32(&data, obase)?;
let raw_off = read_be_u32(&data, obase + 4)?;
let _offset = if (raw_off & MIDX_LARGE_OFFSET_NEEDED) != 0 {
let idx = (raw_off & !MIDX_LARGE_OFFSET_NEEDED) as usize;
let need = (idx + 1) * 8;
match loff {
Some((loff_off, loff_len)) if loff_len >= need => {
read_be_u64(&data, loff_off + idx * 8)?
}
_ => {
midx_die(&["multi-pack-index large offset out of bounds"]);
}
}
} else {
u64::from(raw_off)
};
let idx_name = pack_names
.get(pack_id as usize)
.ok_or_else(|| Error::CorruptObject("bad pack-int-id".to_owned()))?;
let idx_path = pack_dir.join(idx_name);
if !idx_path.exists() {
return Ok(None);
}
let idx = match crate::pack::read_pack_index_no_verify(&idx_path) {
Ok(idx) => idx,
Err(_) => {
let mut pack_path = idx_path.clone();
pack_path.set_extension("pack");
midx_warn_once(&format!(
"error: packfile {} index unavailable",
pack_path.display()
));
return Ok(None);
}
};
crate::pack::read_object_from_pack(&idx, oid).map(Some)
}
pub fn read_midx_preferred_idx_name(objects_dir: &Path) -> Result<String> {
let pack_dir = objects_dir.join("pack");
let path = resolve_tip_midx_path(&pack_dir)
.ok_or_else(|| Error::CorruptObject("no multi-pack-index found".to_owned()))?;
let data = fs::read(&path).map_err(Error::Io)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
let names = parse_pack_names_blob(&data[pn_off..pn_off + pn_len])?;
let (ooff_off, ooff_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_OBJECTOFFSETS)?;
let (ridx_off, ridx_len) = match find_chunk(&data, hdr_end, MIDX_CHUNKID_REVINDEX) {
Ok(r) => r,
Err(_) => {
return Err(Error::CorruptObject(
"could not determine MIDX preferred pack".to_owned(),
));
}
};
if ridx_len < 4 || ooff_len < 8 {
return Err(Error::CorruptObject("truncated MIDX RIDX/OOFF".to_owned()));
}
let first_oid_idx = read_be_u32(&data, ridx_off)? as usize;
let entry_base = ooff_off + first_oid_idx * 8;
if entry_base + 8 > data.len() || entry_base + 8 > ooff_off + ooff_len {
return Err(Error::CorruptObject(
"bad MIDX object-offsets index".to_owned(),
));
}
let pack_id = read_be_u32(&data, entry_base)?;
let idx = usize::try_from(pack_id)
.map_err(|_| Error::CorruptObject("pack id overflow in multi-pack-index".to_owned()))?;
names
.get(idx)
.cloned()
.ok_or_else(|| Error::CorruptObject("preferred pack id out of range".to_owned()))
}
pub fn clear_pack_midx_state(pack_dir: &Path) -> Result<()> {
let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
scrub_root_midx_sidecars_except(pack_dir, None)?;
let midx_d = midx_d_dir(pack_dir);
if midx_d.exists() {
let _ = fs::remove_dir_all(&midx_d);
}
Ok(())
}
pub fn write_multi_pack_index(pack_dir: &Path) -> Result<()> {
write_multi_pack_index_with_options(pack_dir, &WriteMultiPackIndexOptions::default())
}
pub fn write_multi_pack_index_with_options(
pack_dir: &Path,
opts: &WriteMultiPackIndexOptions,
) -> Result<()> {
if opts.pack_names_subset_ordered.is_none() {
if let Some(existing) = resolve_tip_midx_path(pack_dir) {
if let Ok(bytes) = fs::read(&existing) {
if midx_checksum_is_valid(&bytes) {
if let Ok((_, existing_names)) = oids_and_packs_from_midx_data(&bytes) {
for (i, name) in existing_names.iter().enumerate() {
let stem = name.strip_suffix(".idx").unwrap_or(name);
if !pack_dir.join(format!("{stem}.pack")).exists() {
eprintln!("error: could not load pack {i}");
return Err(Error::CorruptObject(format!(
"could not load pack {i}"
)));
}
}
}
} else {
eprintln!("warning: ignoring existing multi-pack-index; checksum mismatch");
}
}
}
}
let mut idx_names: Vec<String> = fs::read_dir(pack_dir)
.map(|rd| {
rd.filter_map(|e| e.ok())
.filter_map(|e| {
let name = e.file_name().to_string_lossy().to_string();
let stem = name.strip_suffix(".idx")?;
if pack_dir.join(format!("{stem}.pack")).exists() {
Some(name)
} else {
None
}
})
.collect()
})
.unwrap_or_default();
idx_names.sort();
let idx_names: Vec<String> = if let Some(sub) = &opts.pack_names_subset_ordered {
let mut out = Vec::new();
for line in sub {
let want = normalize_pack_idx_basename(line)?;
if let Some(found) = idx_names.iter().find(|n| **n == want).cloned() {
if !out.contains(&found) {
out.push(found);
}
}
}
out
} else {
idx_names
};
let mut preferred_warned = false;
if let Some(raw) = opts.preferred_pack_name.as_deref() {
if opts.preferred_pack_idx.is_none()
&& !idx_names
.iter()
.any(|n| cmp_idx_or_pack_name(raw, n).is_eq())
{
eprintln!("warning: unknown preferred pack: '{raw}'");
preferred_warned = true;
}
}
if idx_names.is_empty() {
eprintln!("error: no pack files to index.");
return Err(Error::CorruptObject("no pack files to index.".to_owned()));
}
let (base_oids, base_pack_names) = if opts.incremental {
collect_incremental_base(pack_dir)?
} else {
(HashSet::new(), HashSet::new())
};
let layer_idx_names: Vec<String> = if opts.incremental {
idx_names
.iter()
.filter(|n| {
!base_pack_names
.iter()
.any(|bp| pack_names_match_layer(bp, n))
})
.cloned()
.collect()
} else {
idx_names.clone()
};
if opts.incremental && layer_idx_names.is_empty() {
return Ok(());
}
let work_names = if opts.incremental {
&layer_idx_names[..]
} else {
&idx_names[..]
};
let mut preferred_idx = opts.preferred_pack_idx.map(|p| p as usize);
if preferred_idx.is_none() && !preferred_warned {
if let Some(raw) = opts.preferred_pack_name.as_deref() {
preferred_idx = work_names
.iter()
.position(|n| cmp_idx_or_pack_name(raw, n).is_eq());
}
}
if preferred_idx.is_none() && opts.write_bitmap_placeholders && !work_names.is_empty() {
preferred_idx = preferred_pack_index_by_mtime(pack_dir, work_names)?;
}
if let Some(p) = preferred_idx {
if p >= work_names.len() {
return Err(Error::CorruptObject(
"preferred pack index out of range".to_owned(),
));
}
}
let mut indexes: Vec<PackIndex> = Vec::with_capacity(work_names.len());
for name in work_names {
let path = pack_dir.join(name);
indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
}
if let Some(p) = preferred_idx {
if indexes.get(p).map(|i| i.entries.len()).unwrap_or(0) == 0 {
let name = work_names.get(p).cloned().unwrap_or_default();
let pack_name = name.strip_suffix(".idx").unwrap_or(&name);
eprintln!("error: cannot select preferred pack {pack_name}.pack with no objects");
return Err(Error::CorruptObject(
"cannot select preferred pack with no objects".to_owned(),
));
}
}
let pack_mtimes_layer: Vec<std::time::SystemTime> =
indexes.iter().map(pack_mtime_for_midx).collect();
let preferred_u32 = preferred_idx.map(|p| p as u32);
let mut best: HashMap<ObjectId, MidxEntry> = HashMap::new();
for (pack_id, idx) in indexes.iter().enumerate() {
let pack_id = u32::try_from(pack_id).map_err(|_| {
Error::CorruptObject("too many pack files for multi-pack-index".to_owned())
})?;
let mtime = pack_mtimes_layer[pack_id as usize];
for e in &idx.entries {
if e.oid.len() != 20 {
continue;
}
let Ok(oid) = ObjectId::from_bytes(&e.oid) else {
continue;
};
if opts.incremental && base_oids.contains(&oid) {
continue;
}
let cand = MidxEntry {
oid,
pack_id,
offset: e.offset,
pack_mtime: mtime,
};
match best.get(&oid) {
None => {
best.insert(oid, cand);
}
Some(cur) => {
if midx_pick_better_entry(cur, pack_id, e.offset, mtime, preferred_u32) {
best.insert(oid, cand);
}
}
}
}
}
let bitmap_placeholders =
opts.write_bitmap_placeholders && (!opts.incremental || !best.is_empty());
let omit_embedded_ridx = opts.write_rev_placeholder;
let exclude = if opts.incremental && !base_oids.is_empty() {
Some(&base_oids)
} else {
None
};
let (out, rev_sidecar_order) = build_midx_bytes_filtered(
work_names,
&indexes,
preferred_idx,
bitmap_placeholders,
omit_embedded_ridx,
opts.version.unwrap_or(MIDX_VERSION_V2),
repo_midx_hash_version(pack_dir),
exclude,
)?;
let hash = &out[out.len() - 20..];
let hash_hex = hex::encode(hash);
let hash_arr: [u8; 20] = hash
.try_into()
.map_err(|_| Error::CorruptObject("midx hash length mismatch".to_owned()))?;
if opts.incremental {
let root_midx = pack_dir.join("multi-pack-index");
let chain_path = chain_file_path(pack_dir);
let chain_existed = chain_path.exists();
let mut chain = if root_midx.exists() && !chain_existed {
let root_hex = midx_checksum_hex_from_path(&root_midx)?;
link_root_midx_into_chain(pack_dir, &root_hex)?;
vec![root_hex]
} else {
read_chain_layer_hashes(pack_dir).unwrap_or_default()
};
chain.push(hash_hex.clone());
let midx_d = midx_d_dir(pack_dir);
fs::create_dir_all(&midx_d).map_err(Error::Io)?;
let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
fs::write(&layer_path, &out).map_err(Error::Io)?;
let mut chain_data = String::new();
for h in &chain {
chain_data.push_str(h);
chain_data.push('\n');
}
fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
clear_stale_split_layers(pack_dir, &chain)?;
let _ = fs::remove_file(pack_dir.join("multi-pack-index"));
scrub_root_midx_sidecars(pack_dir)?;
if bitmap_placeholders {
let full = hex::encode(hash);
fs::write(midx_d.join(format!("multi-pack-index-{full}.bitmap")), [])
.map_err(Error::Io)?;
if opts.write_rev_placeholder {
let rev_path = midx_d.join(format!("multi-pack-index-{full}.rev"));
if let Some(order) = rev_sidecar_order.as_ref() {
write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
} else {
fs::write(rev_path, []).map_err(Error::Io)?;
}
}
}
} else {
let dest = pack_dir.join("multi-pack-index");
let bitmap_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap"));
let bitmap_ok = !opts.write_bitmap_placeholders || bitmap_path.exists();
if bitmap_ok && !chain_file_path(pack_dir).exists() {
if let Ok(existing) = fs::read(&dest) {
if existing == out {
return Ok(());
}
}
}
clear_incremental_midx_files(pack_dir)?;
fs::write(&dest, &out).map_err(Error::Io)?;
scrub_root_midx_sidecars_except(pack_dir, Some(&hash_hex))?;
if opts.write_bitmap_placeholders {
fs::write(
pack_dir.join(format!("multi-pack-index-{hash_hex}.bitmap")),
[],
)
.map_err(Error::Io)?;
if opts.write_rev_placeholder {
let rev_path = pack_dir.join(format!("multi-pack-index-{hash_hex}.rev"));
if let Some(order) = rev_sidecar_order.as_ref() {
write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
} else {
fs::write(rev_path, []).map_err(Error::Io)?;
}
}
}
}
Ok(())
}
fn pack_names_match_layer(base_name: &str, disk_idx: &str) -> bool {
if base_name == disk_idx {
return true;
}
cmp_idx_or_pack_name(disk_idx, base_name).is_eq()
}
#[derive(Debug)]
pub enum CompactError {
NoChain,
MissingEndpoint(String),
IdenticalEndpoints,
NotAncestor(String, String),
V1Format,
Other(String),
}
impl std::fmt::Display for CompactError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CompactError::NoChain => write!(f, "no multi-pack-index chain to compact"),
CompactError::MissingEndpoint(s) => write!(f, "could not find MIDX: {s}"),
CompactError::IdenticalEndpoints => {
write!(f, "MIDX compaction endpoints must be unique")
}
CompactError::NotAncestor(from, to) => {
write!(f, "MIDX {from} must be an ancestor of {to}")
}
CompactError::V1Format => write!(f, "cannot perform MIDX compaction with v1 format"),
CompactError::Other(s) => write!(f, "{s}"),
}
}
}
impl From<Error> for CompactError {
fn from(e: Error) -> Self {
CompactError::Other(e.to_string())
}
}
fn collect_layer_oids(pack_dir: &Path, hashes: &[String]) -> Result<HashSet<ObjectId>> {
let mut oids = HashSet::new();
for h in hashes {
let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{h}.midx"));
let data = load_midx_file(&p)?;
let (layer_oids, _) = oids_and_packs_from_midx_data(&data)?;
oids.extend(layer_oids);
}
Ok(oids)
}
fn layer_pack_names(pack_dir: &Path, hash: &str) -> Result<Vec<String>> {
let p = midx_d_dir(pack_dir).join(format!("multi-pack-index-{hash}.midx"));
let data = load_midx_file(&p)?;
let (_, hdr_end, _) = parse_midx_header(&data)?;
let (pn_off, pn_len) = find_chunk(&data, hdr_end, MIDX_CHUNKID_PACKNAMES)?;
parse_pack_names_blob(&data[pn_off..pn_off + pn_len])
}
pub fn compact_multi_pack_index(
pack_dir: &Path,
from_arg: &str,
to_arg: &str,
write_bitmaps: bool,
write_rev: bool,
version: Option<u8>,
) -> std::result::Result<(), CompactError> {
if version == Some(MIDX_VERSION_V1) {
return Err(CompactError::V1Format);
}
let chain = read_chain_layer_hashes(pack_dir).map_err(|_| CompactError::NoChain)?;
if chain.is_empty() {
return Err(CompactError::NoChain);
}
let from_hex = from_arg.to_ascii_lowercase();
let to_hex = to_arg.to_ascii_lowercase();
let from_pos = chain.iter().position(|h| *h == from_hex);
let to_pos = chain.iter().position(|h| *h == to_hex);
let Some(from_pos) = from_pos else {
return Err(CompactError::MissingEndpoint(from_arg.to_string()));
};
let Some(to_pos) = to_pos else {
return Err(CompactError::MissingEndpoint(to_arg.to_string()));
};
if from_pos == to_pos {
return Err(CompactError::IdenticalEndpoints);
}
if from_pos > to_pos {
return Err(CompactError::NotAncestor(
from_arg.to_string(),
to_arg.to_string(),
));
}
let base_hashes = &chain[..from_pos];
let merged_hashes = &chain[from_pos..=to_pos];
let upper_hashes = &chain[to_pos + 1..];
let base_oids = collect_layer_oids(pack_dir, base_hashes)?;
let mut ordered_idx_names: Vec<String> = Vec::new();
for h in merged_hashes {
for name in layer_pack_names(pack_dir, h)? {
if !ordered_idx_names.contains(&name) {
ordered_idx_names.push(name);
}
}
}
if ordered_idx_names.is_empty() {
return Err(CompactError::Other(
"no packs found in compaction range".to_owned(),
));
}
let mut indexes: Vec<PackIndex> = Vec::with_capacity(ordered_idx_names.len());
for name in &ordered_idx_names {
let path = pack_dir.join(name);
indexes.push(crate::pack::read_pack_index_no_verify(&path)?);
}
let preferred_idx = if write_bitmaps { Some(0usize) } else { None };
let exclude = if base_oids.is_empty() {
None
} else {
Some(&base_oids)
};
let (out, rev_sidecar_order) = build_midx_bytes_filtered(
&ordered_idx_names,
&indexes,
preferred_idx,
write_bitmaps,
write_rev,
version.unwrap_or(MIDX_VERSION_V2),
repo_midx_hash_version(pack_dir),
exclude,
)?;
let hash = &out[out.len() - 20..];
let hash_hex = hex::encode(hash);
let hash_arr: [u8; 20] = hash
.try_into()
.map_err(|_| CompactError::Other("midx hash length mismatch".to_owned()))?;
let midx_d = midx_d_dir(pack_dir);
fs::create_dir_all(&midx_d).map_err(Error::Io)?;
let layer_path = midx_d.join(format!("multi-pack-index-{hash_hex}.midx"));
fs::write(&layer_path, &out).map_err(Error::Io)?;
let mut new_chain: Vec<String> = Vec::new();
new_chain.extend(base_hashes.iter().cloned());
new_chain.push(hash_hex.clone());
new_chain.extend(upper_hashes.iter().cloned());
let mut chain_data = String::new();
for h in &new_chain {
chain_data.push_str(h);
chain_data.push('\n');
}
fs::write(chain_file_path(pack_dir), chain_data.as_bytes()).map_err(Error::Io)?;
if write_bitmaps {
fs::write(
midx_d.join(format!("multi-pack-index-{hash_hex}.bitmap")),
[],
)
.map_err(Error::Io)?;
let rev_path = midx_d.join(format!("multi-pack-index-{hash_hex}.rev"));
if write_rev {
if let Some(order) = rev_sidecar_order.as_ref() {
write_midx_rev_sidecar(&rev_path, order, &hash_arr)?;
} else {
fs::write(rev_path, []).map_err(Error::Io)?;
}
}
}
clear_stale_split_layers(pack_dir, &new_chain)?;
Ok(())
}
fn scrub_root_midx_sidecars(pack_dir: &Path) -> Result<()> {
scrub_root_midx_sidecars_except(pack_dir, None)
}
fn scrub_root_midx_sidecars_except(pack_dir: &Path, keep_hex: Option<&str>) -> Result<()> {
let Ok(rd) = fs::read_dir(pack_dir) else {
return Ok(());
};
for ent in rd {
let ent = ent.map_err(Error::Io)?;
let name = ent.file_name().to_string_lossy().to_string();
let Some(rest) = name.strip_prefix("multi-pack-index-") else {
continue;
};
if !(rest.ends_with(".bitmap") || rest.ends_with(".rev")) {
continue;
}
let hash_part = rest
.strip_suffix(".bitmap")
.or_else(|| rest.strip_suffix(".rev"))
.unwrap_or(rest);
if keep_hex.is_some_and(|k| k == hash_part) {
continue;
}
let _ = fs::remove_file(ent.path());
}
Ok(())
}