use std::path::Path;
use bytes::Bytes;
use super::{
ObjectType, PackObjectId, PackObjectRecord, decompress_pack_payload, has_zstd_magic,
pack_container_spec, pack_index::PackIndex, varint, verify_container,
};
use crate::{
object::ContentHash,
store::{Result, StoreError},
};
const MAX_PACK_DELTA_OUTPUT_SIZE: usize = crate::delta::MAX_DELTA_OUTPUT_SIZE;
const MAX_DELTA_CHAIN_DEPTH: usize = 50;
enum PackData<'a> {
Borrowed(&'a [u8]),
Owned(Bytes),
}
impl<'a> PackData<'a> {
fn as_slice(&self) -> &[u8] {
match self {
Self::Borrowed(data) => data,
Self::Owned(data) => data,
}
}
fn slice(&self, range: std::ops::Range<usize>) -> Bytes {
match self {
Self::Borrowed(data) => Bytes::copy_from_slice(&data[range]),
Self::Owned(data) => data.slice(range),
}
}
}
pub struct PackReader<'a> {
data: PackData<'a>,
index: PackIndex,
content_end: usize,
}
impl PackReader<'static> {
pub fn open(pack_path: &Path, index_path: &Path) -> Result<Self> {
let pack_bytes = crate::store::fs::read_file_bytes_for_pack(pack_path)?;
let index_data = std::fs::read(index_path)?;
let (_, _, content_end) = verify_container(&pack_bytes, pack_container_spec())?;
let index = PackIndex::from_bytes(&index_data)?;
Ok(Self {
data: PackData::Owned(pack_bytes),
index,
content_end,
})
}
pub fn from_bytes(pack_data: impl Into<Bytes>, index_data: impl AsRef<[u8]>) -> Result<Self> {
let pack_data = pack_data.into();
let (_, _, content_end) = verify_container(&pack_data, pack_container_spec())?;
let index = PackIndex::from_bytes(index_data.as_ref())?;
Ok(Self {
data: PackData::Owned(pack_data),
index,
content_end,
})
}
}
impl<'a> PackReader<'a> {
pub fn from_slice(pack_data: &'a [u8], index_data: impl AsRef<[u8]>) -> Result<Self> {
let (_, _, content_end) = verify_container(pack_data, pack_container_spec())?;
let index = PackIndex::from_bytes(index_data.as_ref())?;
Ok(Self {
data: PackData::Borrowed(pack_data),
index,
content_end,
})
}
pub fn list_ids(&self) -> Vec<PackObjectId> {
self.index.ids()
}
pub fn list_hashes(&self) -> Vec<ContentHash> {
self.list_ids()
.into_iter()
.filter_map(|id| match id {
PackObjectId::Hash(hash) => Some(hash),
PackObjectId::ChangeId(_) => None,
})
.collect()
}
pub fn has_object(&self, id: &PackObjectId) -> bool {
self.index.find(id).is_some()
}
pub fn get_object(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Vec<u8>)>> {
let offset = match self.index.find(id) {
Some(offset) => checked_index_offset(offset)?,
None => return Ok(None),
};
let record = self.read_record_at_depth(offset, 0)?;
verify_record_id_matches(id, &record.id)?;
Ok(Some((record.obj_type, record.data)))
}
pub fn get_hashed_object(&self, hash: &ContentHash) -> Result<Option<(ObjectType, Vec<u8>)>> {
self.get_object(&PackObjectId::Hash(*hash))
}
pub fn get_object_bytes(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Bytes)>> {
let Some(offset) = self.index.find(id) else {
return Ok(None);
};
let offset = checked_index_offset(offset)?;
if offset >= self.content_end {
return Err(StoreError::InvalidObject(
"Entry offset out of bounds".to_string(),
));
}
let (record_id, id_len) = PackObjectId::decode_tagged(self.content_from(offset)?)?;
verify_record_id_matches(id, &record_id)?;
let header_start = checked_index_add(offset, id_len, "record header start")?;
let (obj_type, uncompressed_size, type_len) =
varint::decode_type_and_size(self.content_from(header_start)?).ok_or_else(|| {
StoreError::InvalidObject("Truncated type+size varint".to_string())
})?;
let uncompressed_size = checked_decoded_size("uncompressed_size", uncompressed_size)?;
let varint_start = checked_index_add(header_start, type_len, "compressed_size start")?;
let (compressed_size, comp_len) = varint::decode_varint(self.content_from(varint_start)?)
.ok_or_else(truncated_compressed_size_varint)?;
let compressed_size = checked_decoded_size("compressed_size", compressed_size)?;
if obj_type != ObjectType::Delta && compressed_size == uncompressed_size {
let data_start = checked_index_add(varint_start, comp_len, "entry data start")?;
let data_end = checked_data_end(data_start, compressed_size, self.content_end)?;
return Ok(Some((obj_type, self.data.slice(data_start..data_end))));
}
let record = self.read_record_at_depth(offset, 0)?;
Ok(Some((record.obj_type, Bytes::from(record.data))))
}
pub fn get_hashed_object_bytes(
&self,
hash: &ContentHash,
) -> Result<Option<(ObjectType, Bytes)>> {
self.get_object_bytes(&PackObjectId::Hash(*hash))
}
pub fn get_hashed_object_size(&self, hash: &ContentHash) -> Result<Option<u64>> {
let id = PackObjectId::Hash(*hash);
let Some(offset) = self.index.find(&id) else {
return Ok(None);
};
let offset = checked_index_offset(offset)?;
if offset >= self.content_end {
return Err(StoreError::InvalidObject(
"Entry offset out of bounds".to_string(),
));
}
let (record_id, id_len) = PackObjectId::decode_tagged(self.content_from(offset)?)?;
verify_record_id_matches(&id, &record_id)?;
let header_start = checked_index_add(offset, id_len, "record header start")?;
let (obj_type, uncompressed_size, _type_len) = super::varint::decode_type_and_size(
self.content_from(header_start)?,
)
.ok_or_else(|| StoreError::InvalidObject("Truncated type+size varint".to_string()))?;
if obj_type == ObjectType::Delta {
return Ok(Some(uncompressed_size));
}
Ok(Some(uncompressed_size))
}
fn read_record_at_depth(&self, offset: usize, depth: usize) -> Result<PackObjectRecord> {
if offset >= self.content_end {
return Err(StoreError::InvalidObject(
"Entry offset out of bounds".to_string(),
));
}
let (id, id_len) = PackObjectId::decode_tagged(self.content_from(offset)?)?;
let header_start = checked_index_add(offset, id_len, "record header start")?;
let (obj_type, uncompressed_size, type_len) =
varint::decode_type_and_size(self.content_from(header_start)?).ok_or_else(|| {
StoreError::InvalidObject("Truncated type+size varint".to_string())
})?;
let uncompressed_size = checked_decoded_size("uncompressed_size", uncompressed_size)?;
let varint_start = checked_index_add(header_start, type_len, "compressed_size start")?;
let (compressed_size, comp_len) = varint::decode_varint(self.content_from(varint_start)?)
.ok_or_else(truncated_compressed_size_varint)?;
let compressed_size = checked_decoded_size("compressed_size", compressed_size)?;
let mut data_start = checked_index_add(varint_start, comp_len, "entry data start")?;
let base_id = if obj_type == ObjectType::Delta {
let (base_id, base_len) = PackObjectId::decode_tagged(self.content_from(data_start)?)?;
data_start = checked_index_add(data_start, base_len, "delta data start")?;
Some(base_id)
} else {
None
};
let data_end = checked_data_end(data_start, compressed_size, self.content_end)?;
let stored_data = &self.data.as_slice()[data_start..data_end];
let decompressed = if obj_type == ObjectType::Delta {
if has_zstd_magic(stored_data) {
decompress_pack_payload(stored_data, 0)?
} else {
stored_data.to_vec()
}
} else if compressed_size != uncompressed_size {
decompress_pack_payload(stored_data, uncompressed_size)?
} else {
stored_data.to_vec()
};
let (resolved_type, final_data) = if obj_type == ObjectType::Delta {
self.read_delta_record(base_id, &decompressed, uncompressed_size, depth)?
} else {
(obj_type, decompressed)
};
if final_data.len() != uncompressed_size {
return Err(StoreError::InvalidObject(format!(
"Size mismatch: expected {}, got {}",
uncompressed_size,
final_data.len()
)));
}
Ok(PackObjectRecord {
id,
obj_type: resolved_type,
data: final_data,
delta_base: None,
path_hint: None,
})
}
fn read_delta_record(
&self,
base_id: Option<PackObjectId>,
delta: &[u8],
uncompressed_size: usize,
depth: usize,
) -> Result<(ObjectType, Vec<u8>)> {
if depth > MAX_DELTA_CHAIN_DEPTH {
return Err(StoreError::InvalidObject(format!(
"Delta chain depth {} exceeds max {}",
depth, MAX_DELTA_CHAIN_DEPTH
)));
}
if uncompressed_size > MAX_PACK_DELTA_OUTPUT_SIZE {
return Err(StoreError::InvalidObject(format!(
"Delta output size {} exceeds max {}",
uncompressed_size, MAX_PACK_DELTA_OUTPUT_SIZE
)));
}
let base_hash = Self::require_delta_base_hash(base_id)?;
let base_offset = self
.index
.find(&PackObjectId::Hash(base_hash))
.ok_or_else(|| StoreError::NotFound(base_hash.to_string()))?;
let base_offset = checked_index_offset(base_offset)?;
let base_record = self.read_record_at_depth(base_offset, depth + 1)?;
let base_type = base_record.obj_type;
let base_data = base_record.data;
let decoded = crate::delta::DeltaDecoder::decode(&base_data, delta, uncompressed_size)
.map_err(|error| StoreError::InvalidObject(format!("Delta decode failed: {error}")))?;
Ok((base_type, decoded))
}
fn require_delta_base_hash(base_id: Option<PackObjectId>) -> Result<ContentHash> {
match base_id {
Some(PackObjectId::Hash(hash)) => Ok(hash),
Some(PackObjectId::ChangeId(_)) => Err(StoreError::InvalidObject(
"pack delta base must be hash-backed content".into(),
)),
None => Err(StoreError::InvalidObject(
"pack object type is Delta but base hash is missing".into(),
)),
}
}
fn content_from(&self, offset: usize) -> Result<&[u8]> {
if offset > self.content_end {
return Err(StoreError::InvalidObject(
"Entry header out of bounds".to_string(),
));
}
Ok(&self.data.as_slice()[offset..self.content_end])
}
}
fn checked_index_offset(offset: u64) -> Result<usize> {
usize::try_from(offset)
.map_err(|_| StoreError::InvalidObject("Entry offset exceeds platform limits".to_string()))
}
fn checked_decoded_size(field: &str, size: u64) -> Result<usize> {
let size = usize::try_from(size).map_err(|_| {
StoreError::InvalidObject(format!("Decoded {field} exceeds platform limits"))
})?;
if field == "uncompressed_size" && size > super::shared::MAX_PACK_OBJECT_OUTPUT_SIZE {
return Err(StoreError::InvalidObject(format!(
"Pack object output size {size} exceeds max {}",
super::shared::MAX_PACK_OBJECT_OUTPUT_SIZE
)));
}
Ok(size)
}
fn checked_index_add(start: usize, len: usize, field: &str) -> Result<usize> {
start.checked_add(len).ok_or_else(|| {
StoreError::InvalidObject(format!("{field} offset overflows platform limits"))
})
}
fn checked_data_end(
data_start: usize,
compressed_size: usize,
content_end: usize,
) -> Result<usize> {
let data_end = data_start.checked_add(compressed_size).ok_or_else(|| {
StoreError::InvalidObject("Entry data range overflows platform limits".to_string())
})?;
if data_end > content_end {
return Err(StoreError::InvalidObject(
"Entry data out of bounds".to_string(),
));
}
Ok(data_end)
}
fn truncated_compressed_size_varint() -> StoreError {
StoreError::InvalidObject("Truncated compressed_size varint".to_string())
}
fn verify_record_id_matches(requested: &PackObjectId, found: &PackObjectId) -> Result<()> {
if requested == found {
return Ok(());
}
Err(StoreError::InvalidObject(format!(
"pack index routed lookup for {requested:?} to record tagged {found:?} \
— index is stale or corrupt; the loose-store path will re-promote on \
the next read"
)))
}
#[cfg(test)]
mod tests {
use super::{PackObjectId, PackReader, verify_record_id_matches};
use crate::{object::ContentHash, store::StoreError};
#[test]
fn test_require_delta_base_hash_rejects_missing_hash() {
let error =
PackReader::require_delta_base_hash(None).expect_err("missing hash should fail");
assert!(
matches!(error, StoreError::InvalidObject(message) if message == "pack object type is Delta but base hash is missing")
);
}
#[test]
fn verify_record_id_matches_accepts_identical_ids() {
let id = PackObjectId::Hash(ContentHash::from_bytes([7u8; 32]));
verify_record_id_matches(&id, &id).expect("matching ids must verify");
}
#[test]
fn verify_record_id_matches_rejects_mismatched_ids() {
let asked = PackObjectId::Hash(ContentHash::from_bytes([7u8; 32]));
let found = PackObjectId::Hash(ContentHash::from_bytes([8u8; 32]));
let error = verify_record_id_matches(&asked, &found)
.expect_err("mismatched record id must error rather than silently route");
assert!(
matches!(&error, StoreError::InvalidObject(message) if message.contains("stale or corrupt")),
"stale-index mismatch must surface as InvalidObject with the diagnostic phrase, got: {error:?}",
);
}
}