use std::path::Path;
use super::error::GitError;
use super::oid::Oid;
pub struct PackIndex {
data: Vec<u8>,
object_count: u32,
}
const HEADER_SIZE: usize = 8;
const FANOUT_SIZE: usize = 256 * 4;
const FANOUT_OFFSET: usize = HEADER_SIZE;
const OID_TABLE_OFFSET: usize = HEADER_SIZE + FANOUT_SIZE;
impl PackIndex {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, GitError> {
let data = std::fs::read(path.as_ref()).map_err(GitError::Io)?;
if data.len() < HEADER_SIZE + FANOUT_SIZE {
return Err(GitError::InvalidPack("index file too small".into()));
}
let magic = read_u32_be(&data, 0);
let version = read_u32_be(&data, 4);
if magic != 0xff74_4f63 {
return Err(GitError::InvalidPack(format!(
"bad pack index magic: {magic:#010x}"
)));
}
if version != 2 {
return Err(GitError::InvalidPack(format!(
"unsupported pack index version: {version}"
)));
}
let object_count = read_u32_be(&data, FANOUT_OFFSET + 255 * 4);
Ok(Self { data, object_count })
}
pub fn object_count(&self) -> u32 {
self.object_count
}
pub fn find(&self, oid: &Oid) -> Option<u64> {
let bytes = oid.as_bytes();
let first_byte = bytes[0] as usize;
let lo = if first_byte == 0 {
0u32
} else {
read_u32_be(&self.data, FANOUT_OFFSET + (first_byte - 1) * 4)
};
let hi = read_u32_be(&self.data, FANOUT_OFFSET + first_byte * 4);
if lo >= hi {
return None;
}
let mut low = lo;
let mut high = hi;
while low < high {
let mid = low + (high - low) / 2;
let oid_offset = OID_TABLE_OFFSET + mid as usize * 20;
let entry = &self.data[oid_offset..oid_offset + 20];
match entry.cmp(bytes.as_slice()) {
std::cmp::Ordering::Equal => {
return Some(self.read_offset(mid));
}
std::cmp::Ordering::Less => {
low = mid + 1;
}
std::cmp::Ordering::Greater => {
high = mid;
}
}
}
None
}
pub fn oid_at(&self, index: u32) -> Option<Oid> {
if index >= self.object_count {
return None;
}
let offset = OID_TABLE_OFFSET + index as usize * 20;
Oid::from_slice(&self.data[offset..offset + 20]).ok()
}
fn read_offset(&self, index: u32) -> u64 {
let n = self.object_count as usize;
let offset_table_start = OID_TABLE_OFFSET + n * 20 + n * 4;
let raw = read_u32_be(&self.data, offset_table_start + index as usize * 4);
if raw & 0x8000_0000 != 0 {
let large_idx = (raw & 0x7FFF_FFFF) as usize;
let large_table_start = offset_table_start + n * 4;
read_u64_be(&self.data, large_table_start + large_idx * 8)
} else {
raw as u64
}
}
}
fn read_u32_be(data: &[u8], offset: usize) -> u32 {
u32::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
])
}
fn read_u64_be(data: &[u8], offset: usize) -> u64 {
u64::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
data[offset + 4],
data[offset + 5],
data[offset + 6],
data[offset + 7],
])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_real_pack_index() {
let git_dir = crate::git::raw::tests::find_repo_git_dir();
let pack_dir = git_dir.join("objects/pack");
if !pack_dir.exists() {
return;
}
for entry in std::fs::read_dir(&pack_dir).unwrap().flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if name.ends_with(".idx") {
let idx = PackIndex::open(entry.path()).unwrap();
assert!(idx.object_count() > 0);
let first_oid = idx.oid_at(0).unwrap();
let offset = idx.find(&first_oid);
assert!(offset.is_some(), "first OID not found in index");
return;
}
}
}
#[test]
fn test_binary_search_correctness() {
let git_dir = crate::git::raw::tests::find_repo_git_dir();
let pack_dir = git_dir.join("objects/pack");
if !pack_dir.exists() {
return;
}
for entry in std::fs::read_dir(&pack_dir).unwrap().flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if name.ends_with(".idx") {
let idx = PackIndex::open(entry.path()).unwrap();
let count = idx.object_count();
for i in [0, count / 4, count / 2, count - 1].iter().copied() {
if i >= count {
continue;
}
let oid = idx.oid_at(i).unwrap();
assert!(idx.find(&oid).is_some(), "OID at index {i} not found");
}
assert!(idx.find(&Oid::ZERO).is_none());
return;
}
}
}
}