1use std::path::Path;
5
6use bytes::Bytes;
7use heddle_format::delta::{DeltaDecoder, MAX_DELTA_OUTPUT_SIZE};
8
9use super::{
10 ObjectType, PackObjectId, PackObjectRecord, decompress_pack_payload, has_zstd_magic,
11 pack_container_spec, pack_index::PackIndex, varint, verify_container,
12};
13use crate::{
14 object::ContentHash,
15 store::{Result, StoreError},
16};
17
18const MAX_PACK_DELTA_OUTPUT_SIZE: usize = MAX_DELTA_OUTPUT_SIZE;
19const MAX_DELTA_CHAIN_DEPTH: usize = 50;
20
21enum PackData<'a> {
30 Borrowed(&'a [u8]),
31 Owned(Bytes),
32}
33
34impl<'a> PackData<'a> {
35 fn as_slice(&self) -> &[u8] {
36 match self {
37 Self::Borrowed(data) => data,
38 Self::Owned(data) => data,
39 }
40 }
41
42 fn slice(&self, range: std::ops::Range<usize>) -> Bytes {
43 match self {
44 Self::Borrowed(data) => Bytes::copy_from_slice(&data[range]),
45 Self::Owned(data) => data.slice(range),
46 }
47 }
48}
49
50pub struct PackReader<'a> {
51 data: PackData<'a>,
52 index: PackIndex,
53 content_end: usize,
54}
55
56impl PackReader<'static> {
57 pub fn open(pack_path: &Path, index_path: &Path) -> Result<Self> {
61 let pack_bytes = crate::store::fs::read_file_bytes_for_pack(pack_path)?;
62 let index_data = std::fs::read(index_path)?;
63 let (_, _, content_end) = verify_container(&pack_bytes, pack_container_spec())?;
64 let index = PackIndex::from_bytes(&index_data)?;
65 Ok(Self {
66 data: PackData::Owned(pack_bytes),
67 index,
68 content_end,
69 })
70 }
71
72 pub fn from_bytes(pack_data: impl Into<Bytes>, index_data: impl AsRef<[u8]>) -> Result<Self> {
73 let pack_data = pack_data.into();
74 let (_, _, content_end) = verify_container(&pack_data, pack_container_spec())?;
75 let index = PackIndex::from_bytes(index_data.as_ref())?;
76 Ok(Self {
77 data: PackData::Owned(pack_data),
78 index,
79 content_end,
80 })
81 }
82}
83
84impl<'a> PackReader<'a> {
85 pub fn from_slice(pack_data: &'a [u8], index_data: impl AsRef<[u8]>) -> Result<Self> {
86 let (_, _, content_end) = verify_container(pack_data, pack_container_spec())?;
87 let index = PackIndex::from_bytes(index_data.as_ref())?;
88 Ok(Self {
89 data: PackData::Borrowed(pack_data),
90 index,
91 content_end,
92 })
93 }
94
95 pub fn list_ids(&self) -> Vec<PackObjectId> {
97 self.index.ids()
98 }
99
100 pub fn list_hashes(&self) -> Vec<ContentHash> {
101 self.list_ids()
102 .into_iter()
103 .filter_map(|id| match id {
104 PackObjectId::Hash(hash) => Some(hash),
105 PackObjectId::ChangeId(_) => None,
106 })
107 .collect()
108 }
109
110 pub fn has_object(&self, id: &PackObjectId) -> bool {
111 self.index.find(id).is_some()
112 }
113
114 pub fn get_object(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Vec<u8>)>> {
127 let offset = match self.index.find(id) {
128 Some(offset) => checked_index_offset(offset)?,
129 None => return Ok(None),
130 };
131
132 let record = self.read_record_at_depth(offset, 0)?;
133 verify_record_id_matches(id, &record.id)?;
134 Ok(Some((record.obj_type, record.data)))
135 }
136
137 pub fn get_hashed_object(&self, hash: &ContentHash) -> Result<Option<(ObjectType, Vec<u8>)>> {
138 self.get_object(&PackObjectId::Hash(*hash))
139 }
140
141 pub fn get_object_bytes(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Bytes)>> {
151 let Some(offset) = self.index.find(id) else {
152 return Ok(None);
153 };
154 let offset = checked_index_offset(offset)?;
155 if offset >= self.content_end {
156 return Err(StoreError::InvalidObject(
157 "Entry offset out of bounds".to_string(),
158 ));
159 }
160
161 let (record_id, id_len) = PackObjectId::decode_tagged(self.content_from(offset)?)?;
166 verify_record_id_matches(id, &record_id)?;
167 let header_start = checked_index_add(offset, id_len, "record header start")?;
168 let (obj_type, uncompressed_size, type_len) =
169 varint::decode_type_and_size(self.content_from(header_start)?).ok_or_else(|| {
170 StoreError::InvalidObject("Truncated type+size varint".to_string())
171 })?;
172 let uncompressed_size = checked_decoded_size("uncompressed_size", uncompressed_size)?;
173 let varint_start = checked_index_add(header_start, type_len, "compressed_size start")?;
174 let (compressed_size, comp_len) = varint::decode_varint(self.content_from(varint_start)?)
175 .ok_or_else(truncated_compressed_size_varint)?;
176 let compressed_size = checked_decoded_size("compressed_size", compressed_size)?;
177
178 if obj_type != ObjectType::Delta && compressed_size == uncompressed_size {
182 let data_start = checked_index_add(varint_start, comp_len, "entry data start")?;
183 let data_end = checked_data_end(data_start, compressed_size, self.content_end)?;
184 return Ok(Some((obj_type, self.data.slice(data_start..data_end))));
185 }
186
187 let record = self.read_record_at_depth(offset, 0)?;
191 Ok(Some((record.obj_type, Bytes::from(record.data))))
192 }
193
194 pub fn get_hashed_object_bytes(
195 &self,
196 hash: &ContentHash,
197 ) -> Result<Option<(ObjectType, Bytes)>> {
198 self.get_object_bytes(&PackObjectId::Hash(*hash))
199 }
200
201 pub fn get_hashed_object_size(&self, hash: &ContentHash) -> Result<Option<u64>> {
211 let id = PackObjectId::Hash(*hash);
212 let Some(offset) = self.index.find(&id) else {
213 return Ok(None);
214 };
215 let offset = checked_index_offset(offset)?;
216 if offset >= self.content_end {
217 return Err(StoreError::InvalidObject(
218 "Entry offset out of bounds".to_string(),
219 ));
220 }
221 let (record_id, id_len) = PackObjectId::decode_tagged(self.content_from(offset)?)?;
222 verify_record_id_matches(&id, &record_id)?;
223 let header_start = checked_index_add(offset, id_len, "record header start")?;
224 let (obj_type, uncompressed_size, _type_len) = super::varint::decode_type_and_size(
225 self.content_from(header_start)?,
226 )
227 .ok_or_else(|| StoreError::InvalidObject("Truncated type+size varint".to_string()))?;
228 if obj_type == ObjectType::Delta {
229 return Ok(Some(uncompressed_size));
234 }
235 Ok(Some(uncompressed_size))
236 }
237
238 fn read_record_at_depth(&self, offset: usize, depth: usize) -> Result<PackObjectRecord> {
239 if offset >= self.content_end {
240 return Err(StoreError::InvalidObject(
241 "Entry offset out of bounds".to_string(),
242 ));
243 }
244
245 let (id, id_len) = PackObjectId::decode_tagged(self.content_from(offset)?)?;
246 let header_start = checked_index_add(offset, id_len, "record header start")?;
247
248 let (obj_type, uncompressed_size, type_len) =
249 varint::decode_type_and_size(self.content_from(header_start)?).ok_or_else(|| {
250 StoreError::InvalidObject("Truncated type+size varint".to_string())
251 })?;
252 let uncompressed_size = checked_decoded_size("uncompressed_size", uncompressed_size)?;
253
254 let varint_start = checked_index_add(header_start, type_len, "compressed_size start")?;
255 let (compressed_size, comp_len) = varint::decode_varint(self.content_from(varint_start)?)
256 .ok_or_else(truncated_compressed_size_varint)?;
257 let compressed_size = checked_decoded_size("compressed_size", compressed_size)?;
258
259 let mut data_start = checked_index_add(varint_start, comp_len, "entry data start")?;
260
261 let base_id = if obj_type == ObjectType::Delta {
263 let (base_id, base_len) = PackObjectId::decode_tagged(self.content_from(data_start)?)?;
264 data_start = checked_index_add(data_start, base_len, "delta data start")?;
265 Some(base_id)
266 } else {
267 None
268 };
269
270 let data_end = checked_data_end(data_start, compressed_size, self.content_end)?;
271
272 let stored_data = &self.data.as_slice()[data_start..data_end];
273
274 let decompressed = if obj_type == ObjectType::Delta {
278 if has_zstd_magic(stored_data) {
279 decompress_pack_payload(stored_data, 0)?
280 } else {
281 stored_data.to_vec()
282 }
283 } else if compressed_size != uncompressed_size {
284 decompress_pack_payload(stored_data, uncompressed_size)?
285 } else {
286 stored_data.to_vec()
287 };
288
289 let (resolved_type, final_data) = if obj_type == ObjectType::Delta {
290 self.read_delta_record(base_id, &decompressed, uncompressed_size, depth)?
291 } else {
292 (obj_type, decompressed)
293 };
294
295 if final_data.len() != uncompressed_size {
296 return Err(StoreError::InvalidObject(format!(
297 "Size mismatch: expected {}, got {}",
298 uncompressed_size,
299 final_data.len()
300 )));
301 }
302
303 Ok(PackObjectRecord {
304 id,
305 obj_type: resolved_type,
306 data: final_data,
307 delta_base: None,
308 path_hint: None,
309 })
310 }
311
312 fn read_delta_record(
313 &self,
314 base_id: Option<PackObjectId>,
315 delta: &[u8],
316 uncompressed_size: usize,
317 depth: usize,
318 ) -> Result<(ObjectType, Vec<u8>)> {
319 if depth > MAX_DELTA_CHAIN_DEPTH {
320 return Err(StoreError::InvalidObject(format!(
321 "Delta chain depth {} exceeds max {}",
322 depth, MAX_DELTA_CHAIN_DEPTH
323 )));
324 }
325
326 if uncompressed_size > MAX_PACK_DELTA_OUTPUT_SIZE {
327 return Err(StoreError::InvalidObject(format!(
328 "Delta output size {} exceeds max {}",
329 uncompressed_size, MAX_PACK_DELTA_OUTPUT_SIZE
330 )));
331 }
332
333 let base_hash = Self::require_delta_base_hash(base_id)?;
334 let base_offset = self
335 .index
336 .find(&PackObjectId::Hash(base_hash))
337 .ok_or_else(|| StoreError::NotFound(base_hash.to_string()))?;
338 let base_offset = checked_index_offset(base_offset)?;
339 let base_record = self.read_record_at_depth(base_offset, depth + 1)?;
340 let base_type = base_record.obj_type;
341 let base_data = base_record.data;
342
343 let decoded = DeltaDecoder::decode(&base_data, delta, uncompressed_size)
344 .map_err(|error| StoreError::InvalidObject(format!("Delta decode failed: {error}")))?;
345
346 Ok((base_type, decoded))
347 }
348
349 fn require_delta_base_hash(base_id: Option<PackObjectId>) -> Result<ContentHash> {
350 match base_id {
351 Some(PackObjectId::Hash(hash)) => Ok(hash),
352 Some(PackObjectId::ChangeId(_)) => Err(StoreError::InvalidObject(
353 "pack delta base must be hash-backed content".into(),
354 )),
355 None => Err(StoreError::InvalidObject(
356 "pack object type is Delta but base hash is missing".into(),
357 )),
358 }
359 }
360
361 fn content_from(&self, offset: usize) -> Result<&[u8]> {
362 if offset > self.content_end {
363 return Err(StoreError::InvalidObject(
364 "Entry header out of bounds".to_string(),
365 ));
366 }
367 Ok(&self.data.as_slice()[offset..self.content_end])
368 }
369}
370
371fn checked_index_offset(offset: u64) -> Result<usize> {
372 usize::try_from(offset)
373 .map_err(|_| StoreError::InvalidObject("Entry offset exceeds platform limits".to_string()))
374}
375
376fn checked_decoded_size(field: &str, size: u64) -> Result<usize> {
377 let size = usize::try_from(size).map_err(|_| {
378 StoreError::InvalidObject(format!("Decoded {field} exceeds platform limits"))
379 })?;
380 if field == "uncompressed_size" && size > super::shared::MAX_PACK_OBJECT_OUTPUT_SIZE {
381 return Err(StoreError::InvalidObject(format!(
382 "Pack object output size {size} exceeds max {}",
383 super::shared::MAX_PACK_OBJECT_OUTPUT_SIZE
384 )));
385 }
386 Ok(size)
387}
388
389fn checked_index_add(start: usize, len: usize, field: &str) -> Result<usize> {
390 start.checked_add(len).ok_or_else(|| {
391 StoreError::InvalidObject(format!("{field} offset overflows platform limits"))
392 })
393}
394
395fn checked_data_end(
396 data_start: usize,
397 compressed_size: usize,
398 content_end: usize,
399) -> Result<usize> {
400 let data_end = data_start.checked_add(compressed_size).ok_or_else(|| {
401 StoreError::InvalidObject("Entry data range overflows platform limits".to_string())
402 })?;
403 if data_end > content_end {
404 return Err(StoreError::InvalidObject(
405 "Entry data out of bounds".to_string(),
406 ));
407 }
408 Ok(data_end)
409}
410
411fn truncated_compressed_size_varint() -> StoreError {
412 StoreError::InvalidObject("Truncated compressed_size varint".to_string())
413}
414
415fn verify_record_id_matches(requested: &PackObjectId, found: &PackObjectId) -> Result<()> {
424 if requested == found {
425 return Ok(());
426 }
427 Err(StoreError::InvalidObject(format!(
428 "pack index routed lookup for {requested:?} to record tagged {found:?} \
429 — index is stale or corrupt; the loose-store path will re-promote on \
430 the next read"
431 )))
432}
433
434#[cfg(test)]
435mod tests {
436 use super::{PackObjectId, PackReader, verify_record_id_matches};
437 use crate::{object::ContentHash, store::StoreError};
438
439 #[test]
440 fn test_require_delta_base_hash_rejects_missing_hash() {
441 let error =
442 PackReader::require_delta_base_hash(None).expect_err("missing hash should fail");
443
444 assert!(
445 matches!(error, StoreError::InvalidObject(message) if message == "pack object type is Delta but base hash is missing")
446 );
447 }
448
449 #[test]
450 fn verify_record_id_matches_accepts_identical_ids() {
451 let id = PackObjectId::Hash(ContentHash::from_bytes([7u8; 32]));
452 verify_record_id_matches(&id, &id).expect("matching ids must verify");
453 }
454
455 #[test]
456 fn verify_record_id_matches_rejects_mismatched_ids() {
457 let asked = PackObjectId::Hash(ContentHash::from_bytes([7u8; 32]));
458 let found = PackObjectId::Hash(ContentHash::from_bytes([8u8; 32]));
459 let error = verify_record_id_matches(&asked, &found)
460 .expect_err("mismatched record id must error rather than silently route");
461 assert!(
462 matches!(&error, StoreError::InvalidObject(message) if message.contains("stale or corrupt")),
463 "stale-index mismatch must surface as InvalidObject with the diagnostic phrase, got: {error:?}",
464 );
465 }
466}