use std::path::Path;
use super::deflate::inflate_zlib;
use super::error::GitError;
use super::object::ObjectType;
use super::pack_index::PackIndex;
const MAX_DELTA_CHAIN: usize = 50;
pub struct Packfile {
data: Vec<u8>,
}
impl Packfile {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, GitError> {
let data = std::fs::read(path.as_ref()).map_err(GitError::Io)?;
Self::validate_header(&data)?;
Ok(Self { data })
}
pub fn from_bytes(data: &[u8]) -> Result<Self, GitError> {
Self::validate_header(data)?;
Ok(Self {
data: data.to_vec(),
})
}
fn validate_header(data: &[u8]) -> Result<(), GitError> {
if data.len() < 12 {
return Err(GitError::InvalidPack("pack file too small".into()));
}
if &data[..4] != b"PACK" {
return Err(GitError::InvalidPack("bad pack magic".into()));
}
let version = u32::from_be_bytes([data[4], data[5], data[6], data[7]]);
if version != 2 {
return Err(GitError::InvalidPack(format!(
"unsupported pack version: {version}"
)));
}
Ok(())
}
pub fn read_object_at(
&self,
offset: u64,
idx: &PackIndex,
) -> Result<(ObjectType, Vec<u8>), GitError> {
self.read_object_recursive(offset, idx, 0)
}
fn read_object_recursive(
&self,
offset: u64,
idx: &PackIndex,
depth: usize,
) -> Result<(ObjectType, Vec<u8>), GitError> {
if depth > MAX_DELTA_CHAIN {
return Err(GitError::DeltaChainTooDeep(MAX_DELTA_CHAIN));
}
let pos = offset as usize;
let (type_id, _size, header_len) = parse_object_header(&self.data[pos..]);
let data_start = pos + header_len;
match type_id {
1..=4 => {
let obj_type = ObjectType::from_type_id(type_id)?;
let content = inflate_zlib(&self.data[data_start..])?;
Ok((obj_type, content))
}
6 => {
let (neg_offset, ofs_len) = read_ofs_delta_offset(&self.data[data_start..]);
let base_offset = offset
.checked_sub(neg_offset)
.ok_or_else(|| GitError::InvalidPack("OFS_DELTA underflow".into()))?;
let delta_data_start = data_start + ofs_len;
let delta_bytes = inflate_zlib(&self.data[delta_data_start..])?;
let (base_type, base_data) =
self.read_object_recursive(base_offset, idx, depth + 1)?;
let result = apply_delta(&base_data, &delta_bytes)?;
Ok((base_type, result))
}
7 => {
let ref_oid_bytes: [u8; 20] = self.data[data_start..data_start + 20]
.try_into()
.map_err(|_| GitError::InvalidPack("REF_DELTA OID too short".into()))?;
let ref_oid = super::oid::Oid::from_bytes(ref_oid_bytes);
let delta_data_start = data_start + 20;
let delta_bytes = inflate_zlib(&self.data[delta_data_start..])?;
let base_offset = idx
.find(&ref_oid)
.ok_or(GitError::ObjectNotFound(ref_oid))?;
let (base_type, base_data) =
self.read_object_recursive(base_offset, idx, depth + 1)?;
let result = apply_delta(&base_data, &delta_bytes)?;
Ok((base_type, result))
}
_ => Err(GitError::InvalidPack(format!(
"unknown pack object type: {type_id}"
))),
}
}
}
pub fn parse_object_header(data: &[u8]) -> (u8, u64, usize) {
let byte = data[0];
let type_id = (byte >> 4) & 0x07;
let mut size = (byte & 0x0F) as u64;
let mut shift = 4u32;
let mut pos = 1;
if byte & 0x80 != 0 {
loop {
let byte = data[pos];
size |= ((byte & 0x7F) as u64) << shift;
shift += 7;
pos += 1;
if byte & 0x80 == 0 {
break;
}
}
}
(type_id, size, pos)
}
fn read_ofs_delta_offset(data: &[u8]) -> (u64, usize) {
let mut result = (data[0] & 0x7F) as u64;
let mut pos = 1;
if data[0] & 0x80 != 0 {
loop {
let byte = data[pos];
result = (result + 1) << 7;
result |= (byte & 0x7F) as u64;
pos += 1;
if byte & 0x80 == 0 {
break;
}
}
}
(result, pos)
}
fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>, GitError> {
let mut pos = 0;
let (_base_size, consumed) = read_size_vlq(&delta[pos..]);
pos += consumed;
let (result_size, consumed) = read_size_vlq(&delta[pos..]);
pos += consumed;
let mut output = Vec::with_capacity(result_size as usize);
while pos < delta.len() {
let cmd = delta[pos];
pos += 1;
if cmd & 0x80 != 0 {
let mut offset = 0u32;
let mut size = 0u32;
if cmd & 0x01 != 0 {
offset |= delta[pos] as u32;
pos += 1;
}
if cmd & 0x02 != 0 {
offset |= (delta[pos] as u32) << 8;
pos += 1;
}
if cmd & 0x04 != 0 {
offset |= (delta[pos] as u32) << 16;
pos += 1;
}
if cmd & 0x08 != 0 {
offset |= (delta[pos] as u32) << 24;
pos += 1;
}
if cmd & 0x10 != 0 {
size |= delta[pos] as u32;
pos += 1;
}
if cmd & 0x20 != 0 {
size |= (delta[pos] as u32) << 8;
pos += 1;
}
if cmd & 0x40 != 0 {
size |= (delta[pos] as u32) << 16;
pos += 1;
}
if size == 0 {
size = 0x10000;
}
let start = offset as usize;
let end = start + size as usize;
if end > base.len() {
return Err(GitError::InvalidPack(format!(
"delta copy out of bounds: offset={offset} size={size} base_len={}",
base.len()
)));
}
output.extend_from_slice(&base[start..end]);
} else if cmd > 0 {
let len = cmd as usize;
output.extend_from_slice(&delta[pos..pos + len]);
pos += len;
} else {
return Err(GitError::InvalidPack(
"delta instruction 0 is reserved".into(),
));
}
}
if output.len() != result_size as usize {
return Err(GitError::InvalidPack(format!(
"delta result size mismatch: expected {result_size}, got {}",
output.len()
)));
}
Ok(output)
}
fn read_size_vlq(data: &[u8]) -> (u64, usize) {
let mut result = 0u64;
let mut shift = 0u32;
let mut pos = 0;
loop {
let byte = data[pos];
result |= ((byte & 0x7F) as u64) << shift;
shift += 7;
pos += 1;
if byte & 0x80 == 0 {
break;
}
}
(result, pos)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_object_from_pack() {
let git_dir = crate::git::raw::tests::find_repo_git_dir();
let pack_dir = git_dir.join("objects/pack");
if !pack_dir.exists() {
return;
}
for entry in std::fs::read_dir(&pack_dir).unwrap().flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if name.ends_with(".idx") {
let pack_path = entry.path().with_extension("pack");
let idx = PackIndex::open(entry.path()).unwrap();
let pack = Packfile::open(&pack_path).unwrap();
let oid = idx.oid_at(0).unwrap();
let offset = idx.find(&oid).unwrap();
let (obj_type, data) = pack.read_object_at(offset, &idx).unwrap();
assert!(matches!(
obj_type,
ObjectType::Commit | ObjectType::Tree | ObjectType::Blob | ObjectType::Tag
));
assert!(!data.is_empty());
return;
}
}
}
#[test]
fn test_pack_header_validation() {
let bad_data = b"NOTPACK";
let result = Packfile::from_bytes(bad_data);
assert!(result.is_err());
}
#[test]
fn test_vlq_size_decode() {
let data = [0x1A];
let (obj_type, size, consumed) = parse_object_header(&data);
assert_eq!(obj_type, 1);
assert_eq!(size, 10);
assert_eq!(consumed, 1);
}
}