use crate::error::{Error, Result};
use crate::objects::{Object, ObjectId, ObjectKind};
use crate::unpack_objects::apply_delta;
use flate2::read::ZlibDecoder;
use sha1::{Digest, Sha1};
use sha2::Sha256;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fs;
use std::io;
use std::io::Read;
use std::path::{Path, PathBuf};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct PackIndexEntry {
pub oid: Vec<u8>,
pub offset: u64,
}
#[derive(Debug, Clone)]
pub struct PackIndex {
pub idx_path: PathBuf,
pub pack_path: PathBuf,
pub hash_bytes: usize,
pub entries: Vec<PackIndexEntry>,
pub fanout: [u32; 256],
}
impl PackIndex {
#[must_use]
pub fn find_offset(&self, oid: &ObjectId) -> Option<u64> {
if self.hash_bytes != 20 {
return None;
}
let needle = oid.as_bytes();
let first_byte = needle[0] as usize;
let lo = if first_byte == 0 {
0
} else {
self.fanout[first_byte - 1] as usize
};
let hi = self.fanout[first_byte] as usize;
if lo >= hi || hi > self.entries.len() {
return None;
}
let slice = &self.entries[lo..hi];
slice
.binary_search_by(|e| e.oid.as_slice().cmp(needle.as_slice()))
.ok()
.map(|idx| slice[idx].offset)
}
#[must_use]
pub fn contains(&self, oid: &ObjectId) -> bool {
self.find_offset(oid).is_some()
}
}
#[derive(Debug, Clone)]
pub struct ShowIndexEntry {
pub oid: Vec<u8>,
pub offset: u64,
pub crc32: Option<u32>,
}
pub fn show_index_entries(reader: &mut dyn Read, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
let mut buf = Vec::new();
reader.read_to_end(&mut buf).map_err(Error::Io)?;
if buf.len() < 8 {
return Err(Error::CorruptObject(
"unable to read header: index file too small".to_owned(),
));
}
let mut pos = 0usize;
let first_u32 = read_u32_be(&buf, &mut pos)?;
const PACK_IDX_SIGNATURE: u32 = 0xff74_4f63;
if first_u32 == PACK_IDX_SIGNATURE {
let version = read_u32_be(&buf, &mut pos)?;
if version != 2 {
return Err(Error::CorruptObject(format!(
"unknown index version: {version}"
)));
}
show_index_v2(&buf, &mut pos, hash_size)
} else {
pos = 0;
show_index_v1(&buf, &mut pos, hash_size)
}
}
fn show_index_v1(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
if buf.len() < 256 * 4 {
return Err(Error::CorruptObject(
"unable to read index: v1 fanout too short".to_owned(),
));
}
let mut fanout = [0u32; 256];
for slot in &mut fanout {
*slot = read_u32_be(buf, pos)?;
}
let object_count = fanout[255] as usize;
let mut entries = Vec::with_capacity(object_count);
for i in 0..object_count {
if *pos + 4 + hash_size > buf.len() {
return Err(Error::CorruptObject(format!(
"unable to read entry {i}/{object_count}: truncated"
)));
}
let offset = read_u32_be(buf, pos)? as u64;
let oid = buf[*pos..*pos + hash_size].to_vec();
*pos += hash_size;
entries.push(ShowIndexEntry {
oid,
offset,
crc32: None,
});
}
Ok(entries)
}
fn show_index_v2(buf: &[u8], pos: &mut usize, hash_size: usize) -> Result<Vec<ShowIndexEntry>> {
if buf.len() < *pos + 256 * 4 {
return Err(Error::CorruptObject(
"unable to read index: v2 fanout too short".to_owned(),
));
}
let mut fanout = [0u32; 256];
for slot in &mut fanout {
*slot = read_u32_be(buf, pos)?;
}
let object_count = fanout[255] as usize;
let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
for i in 0..object_count {
if *pos + hash_size > buf.len() {
return Err(Error::CorruptObject(format!(
"unable to read oid {i}/{object_count}: truncated"
)));
}
let oid = buf[*pos..*pos + hash_size].to_vec();
*pos += hash_size;
oids.push(oid);
}
let mut crcs = Vec::with_capacity(object_count);
for i in 0..object_count {
if *pos + 4 > buf.len() {
return Err(Error::CorruptObject(format!(
"unable to read crc {i}/{object_count}: truncated"
)));
}
crcs.push(read_u32_be(buf, pos)?);
}
let mut offsets32 = Vec::with_capacity(object_count);
let mut large_count = 0usize;
for i in 0..object_count {
if *pos + 4 > buf.len() {
return Err(Error::CorruptObject(format!(
"unable to read 32b offset {i}/{object_count}: truncated"
)));
}
let v = read_u32_be(buf, pos)?;
if (v & 0x8000_0000) != 0 {
large_count += 1;
}
offsets32.push(v);
}
let mut large_offsets = Vec::with_capacity(large_count);
for i in 0..large_count {
if *pos + 8 > buf.len() {
return Err(Error::CorruptObject(format!(
"unable to read 64b offset {i}: truncated"
)));
}
large_offsets.push(read_u64_be(buf, pos)?);
}
let mut next_large = 0usize;
let mut entries = Vec::with_capacity(object_count);
for (i, oid) in oids.iter().enumerate() {
let raw = offsets32[i];
let offset = if (raw & 0x8000_0000) == 0 {
raw as u64
} else {
let idx = (raw & 0x7fff_ffff) as usize;
if idx != next_large {
return Err(Error::CorruptObject(format!(
"inconsistent 64b offset index at entry {i}"
)));
}
let off = large_offsets.get(next_large).copied().ok_or_else(|| {
Error::CorruptObject(format!("missing large offset entry {next_large}"))
})?;
next_large += 1;
off
};
entries.push(ShowIndexEntry {
oid: oid.clone(),
offset,
crc32: Some(crcs[i]),
});
}
Ok(entries)
}
#[derive(Debug, Clone, Default)]
pub struct LocalPackInfo {
pub pack_count: usize,
pub object_count: usize,
pub size_bytes: u64,
pub object_ids: HashSet<ObjectId>,
}
pub fn read_local_pack_indexes(objects_dir: &Path) -> Result<Vec<PackIndex>> {
let pack_dir = objects_dir.join("pack");
let rd = match fs::read_dir(&pack_dir) {
Ok(rd) => rd,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(err) => return Err(Error::Io(err)),
};
let mut out = Vec::new();
for entry in rd {
let entry = entry.map_err(Error::Io)?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) != Some("idx") {
continue;
}
if let Ok(idx) = read_pack_index(&path) {
if !idx.pack_path.is_file() {
continue;
}
out.push(idx);
}
}
Ok(out)
}
mod pack_cache {
use super::{read_pack_index_no_verify, Error, PackIndex, Result};
use std::collections::HashMap;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, OnceLock};
use std::time::SystemTime;
struct CachedDir {
dir_mtime: SystemTime,
indexes: Vec<Arc<PackIndex>>,
}
struct CachedIdx {
mtime: SystemTime,
size: u64,
idx: Arc<PackIndex>,
}
struct CachedPack {
mtime: SystemTime,
size: u64,
bytes: Arc<Vec<u8>>,
}
#[derive(Default)]
struct State {
by_dir: HashMap<PathBuf, CachedDir>,
by_idx: HashMap<PathBuf, CachedIdx>,
by_pack: HashMap<PathBuf, CachedPack>,
}
static CACHE: OnceLock<Mutex<State>> = OnceLock::new();
fn lock() -> std::sync::MutexGuard<'static, State> {
CACHE
.get_or_init(|| Mutex::new(State::default()))
.lock()
.unwrap_or_else(|p| p.into_inner())
}
fn dir_mtime(path: &Path) -> SystemTime {
fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::UNIX_EPOCH)
}
fn file_signature(path: &Path) -> Option<(SystemTime, u64)> {
let m = fs::metadata(path).ok()?;
let mtime = m.modified().unwrap_or(SystemTime::UNIX_EPOCH);
Some((mtime, m.len()))
}
pub fn get_index(idx_path: &Path) -> Result<Arc<PackIndex>> {
let sig = file_signature(idx_path);
if let Some((mtime, size)) = sig {
{
let g = lock();
if let Some(c) = g.by_idx.get(idx_path) {
if c.mtime == mtime && c.size == size {
return Ok(Arc::clone(&c.idx));
}
}
}
let parsed = Arc::new(read_pack_index_no_verify(idx_path)?);
let mut g = lock();
g.by_idx.insert(
idx_path.to_path_buf(),
CachedIdx {
mtime,
size,
idx: Arc::clone(&parsed),
},
);
Ok(parsed)
} else {
Err(Error::Io(io::Error::new(
io::ErrorKind::NotFound,
format!("idx not found: {}", idx_path.display()),
)))
}
}
pub fn get_dir_indexes(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
let pack_dir = objects_dir.join("pack");
let dir_mt = dir_mtime(&pack_dir);
{
let g = lock();
if let Some(c) = g.by_dir.get(&pack_dir) {
if c.dir_mtime == dir_mt {
return Ok(c.indexes.clone());
}
}
}
let rd = match fs::read_dir(&pack_dir) {
Ok(rd) => rd,
Err(err) if err.kind() == io::ErrorKind::NotFound => {
let mut g = lock();
g.by_dir.insert(
pack_dir.clone(),
CachedDir {
dir_mtime: dir_mt,
indexes: Vec::new(),
},
);
return Ok(Vec::new());
}
Err(err) => return Err(Error::Io(err)),
};
let mut out = Vec::new();
for entry in rd {
let entry = entry.map_err(Error::Io)?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) != Some("idx") {
continue;
}
let Ok(idx) = get_index(&path) else { continue };
if !idx.pack_path.is_file() {
continue;
}
out.push(idx);
}
let mut g = lock();
g.by_dir.insert(
pack_dir,
CachedDir {
dir_mtime: dir_mt,
indexes: out.clone(),
},
);
Ok(out)
}
pub fn get_pack_bytes(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
let sig = file_signature(pack_path);
if let Some((mtime, size)) = sig {
{
let g = lock();
if let Some(c) = g.by_pack.get(pack_path) {
if c.mtime == mtime && c.size == size {
return Ok(Arc::clone(&c.bytes));
}
}
}
let bytes = Arc::new(fs::read(pack_path).map_err(Error::Io)?);
let mut g = lock();
g.by_pack.insert(
pack_path.to_path_buf(),
CachedPack {
mtime,
size,
bytes: Arc::clone(&bytes),
},
);
Ok(bytes)
} else {
Err(Error::Io(io::Error::new(
io::ErrorKind::NotFound,
format!("pack not found: {}", pack_path.display()),
)))
}
}
pub fn clear() {
let mut g = lock();
g.by_dir.clear();
g.by_idx.clear();
g.by_pack.clear();
}
}
pub fn read_local_pack_indexes_cached(objects_dir: &Path) -> Result<Vec<Arc<PackIndex>>> {
pack_cache::get_dir_indexes(objects_dir)
}
pub fn read_pack_index_cached(idx_path: &Path) -> Result<Arc<PackIndex>> {
pack_cache::get_index(idx_path)
}
pub fn read_pack_bytes_cached(pack_path: &Path) -> Result<Arc<Vec<u8>>> {
pack_cache::get_pack_bytes(pack_path)
}
pub fn clear_pack_cache() {
pack_cache::clear();
}
pub fn collect_local_pack_info(objects_dir: &Path) -> Result<LocalPackInfo> {
let indexes = read_local_pack_indexes(objects_dir)?;
let mut info = LocalPackInfo::default();
for idx in indexes {
let pack_meta = fs::metadata(&idx.pack_path).map_err(Error::Io)?;
let idx_meta = fs::metadata(&idx.idx_path).map_err(Error::Io)?;
info.pack_count += 1;
info.object_count += idx.entries.len();
info.size_bytes += pack_meta.len() + idx_meta.len();
for entry in idx.entries {
if entry.oid.len() == 20 {
if let Ok(oid) = ObjectId::from_bytes(&entry.oid) {
info.object_ids.insert(oid);
}
}
}
}
Ok(info)
}
fn verify_idx_trailing_checksum(idx_path: &Path, bytes: &[u8]) -> Result<()> {
if bytes.len() < 20 {
return Err(Error::CorruptObject(format!(
"index file {} missing checksum",
idx_path.display()
)));
}
let idx_body_end = bytes.len() - 20;
let mut h = Sha1::new();
h.update(&bytes[..idx_body_end]);
let digest = h.finalize();
if digest.as_slice() != &bytes[idx_body_end..] {
return Err(Error::CorruptObject(format!(
"index checksum mismatch for {}",
idx_path.display()
)));
}
Ok(())
}
fn read_pack_index_v1(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
let mut pos = 0usize;
if bytes.len() < 256 * 4 + 20 {
return Err(Error::CorruptObject(format!(
"index file {} is too small",
idx_path.display()
)));
}
let mut fanout = [0u32; 256];
for slot in &mut fanout {
*slot = read_u32_be(bytes, &mut pos)?;
}
let object_count = fanout[255] as usize;
let need = pos
.saturating_add(object_count.saturating_mul(24))
.saturating_add(20);
if bytes.len() < need {
return Err(Error::CorruptObject(format!(
"truncated idx file {}",
idx_path.display()
)));
}
let mut entries: Vec<PackIndexEntry> = Vec::with_capacity(object_count);
for i in 0..object_count {
let offset = read_u32_be(bytes, &mut pos)? as u64;
let oid = bytes[pos..pos + 20].to_vec();
pos += 20;
if i > 0 && entries[i - 1].oid.cmp(&oid) != std::cmp::Ordering::Less {
return Err(Error::CorruptObject(format!(
"oid lookup out of order in {}",
idx_path.display()
)));
}
entries.push(PackIndexEntry { oid, offset });
}
if verify {
verify_idx_trailing_checksum(idx_path, bytes)?;
}
let mut pack_path = idx_path.to_path_buf();
pack_path.set_extension("pack");
let fanout = compute_fanout_from_entries(&entries);
Ok(PackIndex {
idx_path: idx_path.to_path_buf(),
pack_path,
hash_bytes: 20,
entries,
fanout,
})
}
fn compute_fanout_from_entries(entries: &[PackIndexEntry]) -> [u32; 256] {
let mut fanout = [0u32; 256];
let mut idx = 0usize;
for byte in 0u32..256 {
let needle = byte as u8;
while idx < entries.len() && entries[idx].oid.first().copied().unwrap_or(0) <= needle {
idx += 1;
}
fanout[byte as usize] = u32::try_from(idx).unwrap_or(u32::MAX);
}
fanout
}
fn read_pack_index_v2(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
if bytes.len() < 8 + 256 * 4 + 40 {
return Err(Error::CorruptObject(format!(
"index file {} is too small",
idx_path.display()
)));
}
let mut pos = 0usize;
pos += 4;
let version = read_u32_be(bytes, &mut pos)?;
if version != 2 {
return Err(Error::CorruptObject(format!(
"unsupported idx version {} in {}",
version,
idx_path.display()
)));
}
let mut fanout = [0u32; 256];
for slot in &mut fanout {
*slot = read_u32_be(bytes, &mut pos)?;
}
let object_count = fanout[255] as usize;
let idx_file_len = bytes.len();
let hash_bytes = detect_idx_hash_bytes_v2(idx_file_len, pos, object_count, idx_path)?;
let need = pos
.saturating_add(object_count * hash_bytes)
.saturating_add(object_count * 4)
.saturating_add(object_count * 4)
.saturating_add(40);
if bytes.len() < need {
return Err(Error::CorruptObject(format!(
"truncated idx file {}",
idx_path.display()
)));
}
let mut oids: Vec<Vec<u8>> = Vec::with_capacity(object_count);
for _ in 0..object_count {
let slice = &bytes[pos..pos + hash_bytes];
pos += hash_bytes;
oids.push(slice.to_vec());
}
pos += object_count * 4;
let mut offsets32 = Vec::with_capacity(object_count);
let mut large_count = 0usize;
for _ in 0..object_count {
let v = read_u32_be(bytes, &mut pos)?;
if (v & 0x8000_0000) != 0 {
large_count += 1;
}
offsets32.push(v);
}
if bytes.len() < pos + large_count * 8 + 40 {
return Err(Error::CorruptObject(format!(
"truncated large offset table in {}",
idx_path.display()
)));
}
let mut large_offsets = Vec::with_capacity(large_count);
for _ in 0..large_count {
large_offsets.push(read_u64_be(bytes, &mut pos)?);
}
let mut next_large = 0usize;
let mut entries = Vec::with_capacity(object_count);
for (i, oid) in oids.into_iter().enumerate() {
let raw = offsets32[i];
let offset = if (raw & 0x8000_0000) == 0 {
raw as u64
} else {
let off = large_offsets.get(next_large).copied().ok_or_else(|| {
Error::CorruptObject(format!("bad large offset index in {}", idx_path.display()))
})?;
next_large += 1;
off
};
entries.push(PackIndexEntry { oid, offset });
}
let mut pack_path = idx_path.to_path_buf();
pack_path.set_extension("pack");
if verify {
verify_idx_trailing_checksum(idx_path, bytes)?;
}
Ok(PackIndex {
idx_path: idx_path.to_path_buf(),
pack_path,
hash_bytes,
entries,
fanout,
})
}
fn detect_idx_hash_bytes_v2(
idx_file_len: usize,
fanout_end: usize,
object_count: usize,
idx_path: &Path,
) -> Result<usize> {
if object_count == 0 {
return Ok(20);
}
if idx_file_len < 20 {
return Err(Error::CorruptObject(format!(
"index file {} missing checksum",
idx_path.display()
)));
}
let body_without_checksum = idx_file_len.saturating_sub(20);
for &hb in &[20usize, 32] {
let min_body = fanout_end
.saturating_add(object_count.saturating_mul(hb + 4 + 4))
.saturating_add(hb);
if body_without_checksum < min_body {
continue;
}
let mut max_body = min_body;
if object_count > 0 {
max_body = max_body.saturating_add((object_count - 1).saturating_mul(8));
}
if body_without_checksum > max_body {
continue;
}
let extra = body_without_checksum.saturating_sub(min_body);
if extra % 8 != 0 {
continue;
}
return Ok(hb);
}
Err(Error::CorruptObject(format!(
"wrong index v2 file size in {}",
idx_path.display()
)))
}
#[must_use]
pub fn oid_bytes_to_hex(oid: &[u8]) -> String {
hex::encode(oid)
}
#[must_use]
pub fn pack_index_entry_matches_sha1_oid(entry: &PackIndexEntry, oid: &ObjectId) -> bool {
entry.oid.len() == 20 && entry.oid.as_slice() == oid.as_bytes().as_slice()
}
pub fn hash_object_bytes(kind: ObjectKind, data: &[u8], hash_bytes: usize) -> Result<Vec<u8>> {
let header = format!("{} {}\0", kind, data.len());
match hash_bytes {
20 => {
let mut hasher = Sha1::new();
hasher.update(header.as_bytes());
hasher.update(data);
Ok(hasher.finalize().to_vec())
}
32 => {
use sha2::Digest as _;
let mut hasher = Sha256::new();
hasher.update(header.as_bytes());
hasher.update(data);
Ok(hasher.finalize().to_vec())
}
other => Err(Error::CorruptObject(format!(
"unsupported object hash width: {other}"
))),
}
}
pub fn read_pack_index(idx_path: &Path) -> Result<PackIndex> {
let bytes = fs::read(idx_path).map_err(Error::Io)?;
parse_pack_index_bytes(idx_path, &bytes, true)
}
fn read_pack_index_no_verify(idx_path: &Path) -> Result<PackIndex> {
let bytes = fs::read(idx_path).map_err(Error::Io)?;
parse_pack_index_bytes(idx_path, &bytes, false)
}
fn parse_pack_index_bytes(idx_path: &Path, bytes: &[u8], verify: bool) -> Result<PackIndex> {
if bytes.len() < 8 {
return Err(Error::CorruptObject(format!(
"index file {} is too small",
idx_path.display()
)));
}
let magic = &bytes[0..4];
if magic == [0xff, b't', b'O', b'c'] {
read_pack_index_v2(idx_path, bytes, verify)
} else {
read_pack_index_v1(idx_path, bytes, verify)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PackedType {
Commit,
Tree,
Blob,
Tag,
OfsDelta,
RefDelta,
}
impl PackedType {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
Self::Commit => "commit",
Self::Tree => "tree",
Self::Blob => "blob",
Self::Tag => "tag",
Self::OfsDelta => "ofs-delta",
Self::RefDelta => "ref-delta",
}
}
}
#[derive(Debug, Clone)]
pub struct VerifyObjectRecord {
pub oid: Vec<u8>,
pub packed_type: PackedType,
pub size: u64,
pub size_in_pack: u64,
pub offset: u64,
pub depth: Option<u64>,
pub base_oid: Option<Vec<u8>>,
}
pub fn verify_pack_and_collect(idx_path: &Path) -> Result<Vec<VerifyObjectRecord>> {
let idx = read_pack_index(idx_path)?;
let idx_file_bytes = fs::read(idx_path).map_err(Error::Io)?;
let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
let hb = idx.hash_bytes;
if pack_bytes.len() < 12 + hb {
return Err(Error::CorruptObject(format!(
"pack file {} is too small",
idx.pack_path.display()
)));
}
let pack_end = pack_bytes.len() - hb;
match hb {
20 => {
let mut h = Sha1::new();
h.update(&pack_bytes[..pack_end]);
let digest = h.finalize();
if digest.as_slice() != &pack_bytes[pack_end..] {
return Err(Error::CorruptObject(format!(
"pack trailing checksum mismatch for {}",
idx.pack_path.display()
)));
}
}
32 => {
use sha2::Digest as _;
let mut h = Sha256::new();
h.update(&pack_bytes[..pack_end]);
let digest = h.finalize();
if digest.as_slice() != &pack_bytes[pack_end..] {
return Err(Error::CorruptObject(format!(
"pack trailing checksum mismatch for {}",
idx.pack_path.display()
)));
}
}
_ => {
return Err(Error::CorruptObject(format!(
"unsupported OID width {} for pack {}",
hb,
idx.pack_path.display()
)));
}
}
if idx_file_bytes.len() >= hb + 20 {
let embedded = &idx_file_bytes[idx_file_bytes.len() - (hb + 20)..idx_file_bytes.len() - 20];
if embedded != &pack_bytes[pack_end..] {
return Err(Error::CorruptObject(format!(
"pack checksum in index does not match {}",
idx.pack_path.display()
)));
}
}
if &pack_bytes[0..4] != b"PACK" {
return Err(Error::CorruptObject(format!(
"pack file {} has invalid signature",
idx.pack_path.display()
)));
}
let version = u32::from_be_bytes(pack_bytes[4..8].try_into().unwrap_or([0, 0, 0, 0]));
if version != 2 && version != 3 {
return Err(Error::CorruptObject(format!(
"unsupported pack version {} in {}",
version,
idx.pack_path.display()
)));
}
let count = u32::from_be_bytes(pack_bytes[8..12].try_into().unwrap_or([0, 0, 0, 0])) as usize;
if count != idx.entries.len() {
return Err(Error::CorruptObject(format!(
"pack/index object count mismatch for {}",
idx.pack_path.display()
)));
}
let mut by_offset: BTreeMap<u64, Vec<u8>> = BTreeMap::new();
for entry in &idx.entries {
by_offset.insert(entry.offset, entry.oid.clone());
}
let offsets: Vec<u64> = by_offset.keys().copied().collect();
if offsets.is_empty() {
return Ok(Vec::new());
}
let mut by_oid: HashMap<Vec<u8>, usize> = HashMap::new();
let mut records: Vec<VerifyObjectRecord> = Vec::with_capacity(offsets.len());
for (i, offset) in offsets.iter().copied().enumerate() {
let oid = by_offset.get(&offset).cloned().ok_or_else(|| {
Error::CorruptObject(format!("missing object id for offset {}", offset))
})?;
let next_off = offsets
.get(i + 1)
.copied()
.unwrap_or((pack_bytes.len() - hb) as u64);
if next_off <= offset || next_off > (pack_bytes.len() - hb) as u64 {
return Err(Error::CorruptObject(format!(
"invalid object boundaries at offset {} in {}",
offset,
idx.pack_path.display()
)));
}
let mut p = offset as usize;
let (packed_type, size) = parse_pack_object_header(&pack_bytes, &mut p)?;
let mut base_oid: Option<Vec<u8>> = None;
let mut depth = None;
match packed_type {
PackedType::RefDelta => {
if p + hb > pack_bytes.len() {
return Err(Error::CorruptObject(format!(
"truncated ref-delta base at offset {}",
offset
)));
}
base_oid = Some(pack_bytes[p..p + hb].to_vec());
}
PackedType::OfsDelta => {
let base_offset = parse_ofs_delta_base(&pack_bytes, &mut p, offset)?;
let base_depth = records
.iter()
.find(|r| r.offset == base_offset)
.and_then(|r| r.depth)
.unwrap_or(0);
depth = Some(base_depth + 1);
}
PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {}
}
let size_in_pack = next_off - offset;
records.push(VerifyObjectRecord {
oid: oid.clone(),
packed_type,
size,
size_in_pack,
offset,
depth,
base_oid,
});
by_oid.insert(oid, i);
}
for i in 0..records.len() {
if records[i].packed_type != PackedType::RefDelta {
continue;
}
let base = records[i]
.base_oid
.as_ref()
.ok_or_else(|| Error::CorruptObject("ref-delta missing base oid".to_owned()))?;
let base_depth = by_oid
.get(base)
.and_then(|ix| records.get(*ix))
.and_then(|r| r.depth)
.unwrap_or(0);
records[i].depth = Some(base_depth + 1);
}
for entry in &idx.entries {
let obj = read_object_from_pack_bytes(&pack_bytes, &idx, &entry.oid)?;
let computed = hash_object_bytes(obj.kind, &obj.data, hb)?;
if computed.as_slice() != entry.oid.as_slice() {
return Err(Error::CorruptObject(format!(
"pack object hash mismatch at offset {} (index says {})",
entry.offset,
oid_bytes_to_hex(&entry.oid)
)));
}
}
Ok(records)
}
pub fn read_alternates_recursive(objects_dir: &Path) -> Result<Vec<PathBuf>> {
let mut visited = HashSet::new();
let mut out = Vec::new();
read_alternates_inner(objects_dir, &mut visited, &mut out, 0)?;
Ok(out)
}
const MAX_ALTERNATE_DEPTH: usize = 5;
fn read_alternates_inner(
objects_dir: &Path,
visited: &mut HashSet<PathBuf>,
out: &mut Vec<PathBuf>,
depth: usize,
) -> Result<()> {
if depth > MAX_ALTERNATE_DEPTH {
return Ok(());
}
let canonical = canonical_or_self(objects_dir);
let alt_file = canonical.join("info").join("alternates");
let text = match fs::read_to_string(&alt_file) {
Ok(text) => text,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
Err(err) => return Err(Error::Io(err)),
};
for raw in text.lines() {
let line = raw.trim();
if line.is_empty() {
continue;
}
let candidate = if Path::new(line).is_absolute() {
PathBuf::from(line)
} else {
canonical.join(line)
};
let candidate = canonical_or_self(&candidate);
if visited.insert(candidate.clone()) {
out.push(candidate.clone());
read_alternates_inner(&candidate, visited, out, depth + 1)?;
}
}
Ok(())
}
fn canonical_or_self(path: &Path) -> PathBuf {
fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
}
fn packed_type_to_kind(pt: PackedType) -> Result<ObjectKind> {
match pt {
PackedType::Commit => Ok(ObjectKind::Commit),
PackedType::Tree => Ok(ObjectKind::Tree),
PackedType::Blob => Ok(ObjectKind::Blob),
PackedType::Tag => Ok(ObjectKind::Tag),
PackedType::OfsDelta | PackedType::RefDelta => Err(Error::CorruptObject(
"cannot convert delta type to object kind directly".to_owned(),
)),
}
}
fn decompress_pack_data(bytes: &[u8], pos: &mut usize, expected_size: u64) -> Result<Vec<u8>> {
let slice = &bytes[*pos..];
let mut decoder = ZlibDecoder::new(slice);
let mut out = Vec::with_capacity(expected_size as usize);
decoder
.read_to_end(&mut out)
.map_err(|e| Error::Zlib(e.to_string()))?;
*pos += decoder.total_in() as usize;
Ok(out)
}
fn read_pack_object_at(
pack_bytes: &[u8],
offset: u64,
idx: &PackIndex,
depth: usize,
) -> Result<(ObjectKind, Vec<u8>)> {
if depth > 50 {
return Err(Error::CorruptObject(
"delta chain too deep (>50)".to_owned(),
));
}
let mut pos = offset as usize;
let (packed_type, size) = parse_pack_object_header(pack_bytes, &mut pos)?;
match packed_type {
PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
let data = decompress_pack_data(pack_bytes, &mut pos, size)?;
let kind = packed_type_to_kind(packed_type)?;
Ok((kind, data))
}
PackedType::OfsDelta => {
let base_offset = parse_ofs_delta_base(pack_bytes, &mut pos, offset)?;
let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
let (base_kind, base_data) =
read_pack_object_at(pack_bytes, base_offset, idx, depth + 1)?;
let result = apply_delta(&base_data, &delta_data)?;
Ok((base_kind, result))
}
PackedType::RefDelta => {
let hb = idx.hash_bytes;
if pos + hb > pack_bytes.len() {
return Err(Error::CorruptObject(
"truncated ref-delta base OID".to_owned(),
));
}
let base_raw = pack_bytes[pos..pos + hb].to_vec();
pos += hb;
let delta_data = decompress_pack_data(pack_bytes, &mut pos, size)?;
let base_entry = idx
.entries
.iter()
.find(|e| e.oid == base_raw)
.ok_or_else(|| {
Error::CorruptObject(format!(
"ref-delta base {} not found in pack",
oid_bytes_to_hex(&base_raw)
))
})?;
let (base_kind, base_data) =
read_pack_object_at(pack_bytes, base_entry.offset, idx, depth + 1)?;
let result = apply_delta(&base_data, &delta_data)?;
Ok((base_kind, result))
}
}
}
pub fn read_object_from_pack(idx: &PackIndex, oid: &ObjectId) -> Result<Object> {
if idx.find_offset(oid).is_none() {
return Err(Error::ObjectNotFound(oid.to_hex()));
}
let pack_bytes = read_pack_bytes_cached(&idx.pack_path)?;
read_object_from_pack_bytes(&pack_bytes, idx, oid.as_bytes().as_slice())
}
pub fn read_object_from_pack_bytes(
pack_bytes: &[u8],
idx: &PackIndex,
oid: &[u8],
) -> Result<Object> {
let entry = idx
.entries
.iter()
.find(|e| e.oid.as_slice() == oid)
.ok_or_else(|| Error::ObjectNotFound(oid_bytes_to_hex(oid)))?;
let (kind, data) = read_pack_object_at(pack_bytes, entry.offset, idx, 0)?;
Ok(Object::new(kind, data))
}
pub fn read_object_from_packs(objects_dir: &Path, oid: &ObjectId) -> Result<Object> {
let indexes = read_local_pack_indexes_cached(objects_dir)?;
for idx in &indexes {
if idx.find_offset(oid).is_some() {
return read_object_from_pack(idx, oid);
}
}
Err(Error::ObjectNotFound(oid.to_hex()))
}
pub fn packed_ref_delta_reuse_slice(
objects_dir: &Path,
oid: &ObjectId,
packed_set: &HashSet<ObjectId>,
) -> Result<Option<(ObjectId, Vec<u8>)>> {
let mut indexes = read_local_pack_indexes(objects_dir)?;
sort_pack_indexes_oldest_first(&mut indexes);
for idx in indexes {
let Some(entry) = idx
.entries
.iter()
.find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes().as_slice())
else {
continue;
};
let hb = idx.hash_bytes;
if hb != 20 {
continue;
}
let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
let mut p = entry.offset as usize;
let (packed_type, _size) = parse_pack_object_header(&pack_bytes, &mut p)?;
let base = match packed_type {
PackedType::RefDelta => {
if p + hb > pack_bytes.len() {
return Err(Error::CorruptObject(
"truncated ref-delta base oid while scanning for reuse".to_owned(),
));
}
let bo = ObjectId::from_bytes(&pack_bytes[p..p + hb])?;
p += hb;
bo
}
PackedType::OfsDelta => {
let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
let Some(base_entry) = idx.entries.iter().find(|e| e.offset == base_off) else {
continue;
};
if base_entry.oid.len() != 20 {
continue;
}
ObjectId::from_bytes(base_entry.oid.as_slice())?
}
_ => {
continue;
}
};
if !packed_set.contains(&base) {
continue;
}
let zlib_start = p;
let mut end_pos = zlib_start;
if skip_one_pack_object(&pack_bytes, &mut end_pos, entry.offset, hb).is_err() {
continue;
}
let compressed = &pack_bytes[zlib_start..end_pos];
let mut dec = ZlibDecoder::new(compressed);
let mut delta = Vec::new();
if dec.read_to_end(&mut delta).is_err() {
continue;
}
return Ok(Some((base, delta)));
}
Ok(None)
}
fn sort_pack_indexes_oldest_first(indexes: &mut [PackIndex]) {
indexes.sort_by(|a, b| {
let ta = fs::metadata(&a.pack_path)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let tb = fs::metadata(&b.pack_path)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
ta.cmp(&tb).then_with(|| a.pack_path.cmp(&b.pack_path))
});
}
fn sort_pack_indexes_newest_first(indexes: &mut [PackIndex]) {
indexes.sort_by(|a, b| {
let ta = fs::metadata(&a.pack_path)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let tb = fs::metadata(&b.pack_path)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
tb.cmp(&ta).then_with(|| b.pack_path.cmp(&a.pack_path))
});
}
pub fn packed_delta_base_oid(objects_dir: &Path, oid: &ObjectId) -> Result<Option<ObjectId>> {
let mut indexes = read_local_pack_indexes(objects_dir)?;
sort_pack_indexes_newest_first(&mut indexes);
for idx in &indexes {
if idx.hash_bytes != 20 {
continue;
}
let Some(entry) = idx
.entries
.iter()
.find(|e| e.oid.len() == 20 && e.oid.as_slice() == oid.as_bytes().as_slice())
else {
continue;
};
let pack_bytes = fs::read(&idx.pack_path).map_err(Error::Io)?;
let mut p = entry.offset as usize;
let (packed_type, _) = parse_pack_object_header(&pack_bytes, &mut p)?;
match packed_type {
PackedType::RefDelta => {
let hb = idx.hash_bytes;
if p + hb > pack_bytes.len() {
return Err(Error::CorruptObject("truncated ref-delta base".to_owned()));
}
return Ok(Some(ObjectId::from_bytes(&pack_bytes[p..p + hb])?));
}
PackedType::OfsDelta => {
let base_off = parse_ofs_delta_base(&pack_bytes, &mut p, entry.offset)?;
return Ok(idx
.entries
.iter()
.find(|e| e.offset == base_off)
.and_then(|e| ObjectId::from_bytes(e.oid.as_slice()).ok()));
}
_ => continue,
}
}
Ok(None)
}
fn parse_pack_object_header(bytes: &[u8], pos: &mut usize) -> Result<(PackedType, u64)> {
let first = *bytes.get(*pos).ok_or_else(|| {
Error::CorruptObject("unexpected end of pack header while decoding object".to_owned())
})?;
*pos += 1;
let type_code = (first >> 4) & 0x7;
let mut size = (first & 0x0f) as u64;
let mut shift = 4u32;
let mut c = first;
while (c & 0x80) != 0 {
c = *bytes.get(*pos).ok_or_else(|| {
Error::CorruptObject("unexpected end of variable size header".to_owned())
})?;
*pos += 1;
size |= ((c & 0x7f) as u64) << shift;
shift += 7;
}
let packed_type = match type_code {
1 => PackedType::Commit,
2 => PackedType::Tree,
3 => PackedType::Blob,
4 => PackedType::Tag,
6 => PackedType::OfsDelta,
7 => PackedType::RefDelta,
_ => {
return Err(Error::CorruptObject(format!(
"unsupported packed object type {}",
type_code
)))
}
};
Ok((packed_type, size))
}
#[derive(Debug, Clone, Copy)]
pub enum PackedDeltaDependency {
OfsBase {
base_offset: u64,
},
RefBase {
base_oid: ObjectId,
},
}
pub fn read_packed_delta_dependency(
pack_bytes: &[u8],
object_offset: u64,
) -> Result<Option<PackedDeltaDependency>> {
let mut pos = object_offset as usize;
let (ty, _) = parse_pack_object_header(pack_bytes, &mut pos)?;
match ty {
PackedType::OfsDelta => {
let base = parse_ofs_delta_base(pack_bytes, &mut pos, object_offset)?;
Ok(Some(PackedDeltaDependency::OfsBase { base_offset: base }))
}
PackedType::RefDelta => {
if pos + 20 > pack_bytes.len() {
return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
}
let base_oid = ObjectId::from_bytes(&pack_bytes[pos..pos + 20])?;
Ok(Some(PackedDeltaDependency::RefBase { base_oid }))
}
_ => Ok(None),
}
}
fn parse_ofs_delta_base(bytes: &[u8], pos: &mut usize, this_offset: u64) -> Result<u64> {
let mut c = *bytes
.get(*pos)
.ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
*pos += 1;
let mut value = (c & 0x7f) as u64;
while (c & 0x80) != 0 {
c = *bytes
.get(*pos)
.ok_or_else(|| Error::CorruptObject("truncated ofs-delta header".to_owned()))?;
*pos += 1;
value = ((value + 1) << 7) | (c & 0x7f) as u64;
}
this_offset
.checked_sub(value)
.ok_or_else(|| Error::CorruptObject("invalid ofs-delta base offset".to_owned()))
}
#[must_use]
pub fn slice_one_pack_object(
bytes: &[u8],
object_start_offset: u64,
hash_bytes: usize,
) -> Result<&[u8]> {
let start = object_start_offset as usize;
let mut pos = start;
skip_one_pack_object(bytes, &mut pos, object_start_offset, hash_bytes)?;
Ok(&bytes[start..pos])
}
pub fn skip_one_pack_object(
bytes: &[u8],
pos: &mut usize,
object_start_offset: u64,
hash_bytes: usize,
) -> Result<()> {
let (packed_type, size) = parse_pack_object_header(bytes, pos)?;
match packed_type {
PackedType::Commit | PackedType::Tree | PackedType::Blob | PackedType::Tag => {
let mut dec = ZlibDecoder::new(&bytes[*pos..]);
let mut tmp = Vec::with_capacity(size as usize);
dec.read_to_end(&mut tmp)
.map_err(|e| Error::Zlib(e.to_string()))?;
*pos += dec.total_in() as usize;
}
PackedType::RefDelta => {
if *pos + hash_bytes > bytes.len() {
return Err(Error::CorruptObject("truncated ref-delta base oid".into()));
}
*pos += hash_bytes;
let mut dec = ZlibDecoder::new(&bytes[*pos..]);
let mut tmp = Vec::with_capacity(size as usize);
dec.read_to_end(&mut tmp)
.map_err(|e| Error::Zlib(e.to_string()))?;
*pos += dec.total_in() as usize;
}
PackedType::OfsDelta => {
let _base_off = parse_ofs_delta_base(bytes, pos, object_start_offset)?;
let mut dec = ZlibDecoder::new(&bytes[*pos..]);
let mut tmp = Vec::with_capacity(size as usize);
dec.read_to_end(&mut tmp)
.map_err(|e| Error::Zlib(e.to_string()))?;
*pos += dec.total_in() as usize;
}
}
Ok(())
}
fn read_u32_be(bytes: &[u8], pos: &mut usize) -> Result<u32> {
if bytes.len() < *pos + 4 {
return Err(Error::CorruptObject(
"unexpected end of idx while reading u32".to_owned(),
));
}
let v = u32::from_be_bytes(
bytes[*pos..*pos + 4]
.try_into()
.map_err(|_| Error::CorruptObject("failed to parse u32".to_owned()))?,
);
*pos += 4;
Ok(v)
}
fn read_u64_be(bytes: &[u8], pos: &mut usize) -> Result<u64> {
if bytes.len() < *pos + 8 {
return Err(Error::CorruptObject(
"unexpected end of idx while reading u64".to_owned(),
));
}
let v = u64::from_be_bytes(
bytes[*pos..*pos + 8]
.try_into()
.map_err(|_| Error::CorruptObject("failed to parse u64".to_owned()))?,
);
*pos += 8;
Ok(v)
}
pub fn read_idx_object_ids(idx_path: &Path) -> Result<Vec<ObjectId>> {
let index = read_pack_index(idx_path)?;
let mut out = Vec::new();
for e in index.entries {
if e.oid.len() == 20 {
out.push(ObjectId::from_bytes(&e.oid)?);
}
}
Ok(out)
}