objects/store/pack/
pack_reader.rs1use std::path::Path;
5
6use super::{
7 ObjectType, PackObjectId, PackObjectRecord, decompress_pack_payload, has_zstd_magic,
8 pack_container_spec, pack_index::PackIndex, varint, verify_container,
9};
10use crate::{
11 object::ContentHash,
12 store::{Result, StoreError},
13};
14
15const MAX_PACK_DELTA_OUTPUT_SIZE: usize = crate::delta::MAX_DELTA_OUTPUT_SIZE;
16const MAX_DELTA_CHAIN_DEPTH: usize = 50;
17
18pub struct PackReader {
20 data: Vec<u8>,
21 index: PackIndex,
22 content_end: usize,
23}
24
25impl PackReader {
26 pub fn open(pack_path: &Path, index_path: &Path) -> Result<Self> {
28 let pack_data = std::fs::read(pack_path)?;
29 let index_data = std::fs::read(index_path)?;
30 Self::from_bytes(pack_data, index_data)
31 }
32
33 pub fn from_bytes(pack_data: Vec<u8>, index_data: Vec<u8>) -> Result<Self> {
34 let (_, _, content_end) = verify_container(&pack_data, pack_container_spec())?;
35 let index = PackIndex::from_bytes(&index_data)?;
36 Ok(Self {
37 data: pack_data,
38 index,
39 content_end,
40 })
41 }
42
43 pub fn list_ids(&self) -> Vec<PackObjectId> {
45 self.index.ids()
46 }
47
48 pub fn list_hashes(&self) -> Vec<ContentHash> {
49 self.list_ids()
50 .into_iter()
51 .filter_map(|id| match id {
52 PackObjectId::Hash(hash) => Some(hash),
53 PackObjectId::ChangeId(_) => None,
54 })
55 .collect()
56 }
57
58 pub fn has_object(&self, id: &PackObjectId) -> bool {
59 self.index.find(id).is_some()
60 }
61
62 pub fn get_object(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Vec<u8>)>> {
64 let offset = match self.index.find(id) {
65 Some(offset) => offset,
66 None => return Ok(None),
67 };
68
69 let record = self.read_record_at_depth(offset as usize, 0)?;
70 Ok(Some((record.obj_type, record.data)))
71 }
72
73 pub fn get_hashed_object(&self, hash: &ContentHash) -> Result<Option<(ObjectType, Vec<u8>)>> {
74 self.get_object(&PackObjectId::Hash(*hash))
75 }
76
77 pub fn get_hashed_object_size(&self, hash: &ContentHash) -> Result<Option<u64>> {
87 let id = PackObjectId::Hash(*hash);
88 let Some(offset) = self.index.find(&id) else {
89 return Ok(None);
90 };
91 let offset = offset as usize;
92 if offset >= self.content_end {
93 return Err(StoreError::InvalidObject(
94 "Entry offset out of bounds".to_string(),
95 ));
96 }
97 let (_, id_len) = PackObjectId::decode_tagged(&self.data[offset..])?;
98 let header_start = offset + id_len;
99 let (obj_type, uncompressed_size, _type_len) =
100 super::varint::decode_type_and_size(&self.data[header_start..]).ok_or_else(|| {
101 StoreError::InvalidObject("Truncated type+size varint".to_string())
102 })?;
103 if obj_type == ObjectType::Delta {
104 return Ok(Some(uncompressed_size));
109 }
110 Ok(Some(uncompressed_size))
111 }
112
113 fn read_record_at_depth(&self, offset: usize, depth: usize) -> Result<PackObjectRecord> {
114 if offset >= self.content_end {
115 return Err(StoreError::InvalidObject(
116 "Entry offset out of bounds".to_string(),
117 ));
118 }
119
120 let (id, id_len) = PackObjectId::decode_tagged(&self.data[offset..])?;
121 let header_start = offset + id_len;
122
123 let (obj_type, uncompressed_size, type_len) =
124 varint::decode_type_and_size(&self.data[header_start..]).ok_or_else(|| {
125 StoreError::InvalidObject("Truncated type+size varint".to_string())
126 })?;
127 let uncompressed_size = uncompressed_size as usize;
128
129 let varint_start = header_start + type_len;
130 let (compressed_size, comp_len) = varint::decode_varint(&self.data[varint_start..])
131 .ok_or_else(|| {
132 StoreError::InvalidObject("Truncated compressed_size varint".to_string())
133 })?;
134 let compressed_size = compressed_size as usize;
135
136 let mut data_start = varint_start + comp_len;
137
138 let base_id = if obj_type == ObjectType::Delta {
140 let (base_id, base_len) = PackObjectId::decode_tagged(&self.data[data_start..])?;
141 data_start += base_len;
142 Some(base_id)
143 } else {
144 None
145 };
146
147 let data_end = data_start + compressed_size;
148 if data_end > self.content_end {
149 return Err(StoreError::InvalidObject(
150 "Entry data out of bounds".to_string(),
151 ));
152 }
153
154 let stored_data = &self.data[data_start..data_end];
155
156 let decompressed = if obj_type == ObjectType::Delta {
160 if has_zstd_magic(stored_data) {
161 decompress_pack_payload(stored_data, 0)?
162 } else {
163 stored_data.to_vec()
164 }
165 } else if compressed_size != uncompressed_size {
166 decompress_pack_payload(stored_data, uncompressed_size)?
167 } else {
168 stored_data.to_vec()
169 };
170
171 let (resolved_type, final_data) = if obj_type == ObjectType::Delta {
172 self.read_delta_record(base_id, &decompressed, uncompressed_size, depth)?
173 } else {
174 (obj_type, decompressed)
175 };
176
177 if final_data.len() != uncompressed_size {
178 return Err(StoreError::InvalidObject(format!(
179 "Size mismatch: expected {}, got {}",
180 uncompressed_size,
181 final_data.len()
182 )));
183 }
184
185 Ok(PackObjectRecord {
186 id,
187 obj_type: resolved_type,
188 data: final_data,
189 delta_base: None,
190 path_hint: None,
191 })
192 }
193
194 fn read_delta_record(
195 &self,
196 base_id: Option<PackObjectId>,
197 delta: &[u8],
198 uncompressed_size: usize,
199 depth: usize,
200 ) -> Result<(ObjectType, Vec<u8>)> {
201 if depth > MAX_DELTA_CHAIN_DEPTH {
202 return Err(StoreError::InvalidObject(format!(
203 "Delta chain depth {} exceeds max {}",
204 depth, MAX_DELTA_CHAIN_DEPTH
205 )));
206 }
207
208 if uncompressed_size > MAX_PACK_DELTA_OUTPUT_SIZE {
209 return Err(StoreError::InvalidObject(format!(
210 "Delta output size {} exceeds max {}",
211 uncompressed_size, MAX_PACK_DELTA_OUTPUT_SIZE
212 )));
213 }
214
215 let base_hash = Self::require_delta_base_hash(base_id)?;
216 let base_offset = self
217 .index
218 .find(&PackObjectId::Hash(base_hash))
219 .ok_or_else(|| StoreError::NotFound(base_hash.to_string()))?;
220 let base_record = self.read_record_at_depth(base_offset as usize, depth + 1)?;
221 let base_type = base_record.obj_type;
222 let base_data = base_record.data;
223
224 let decoded = crate::delta::DeltaDecoder::decode(&base_data, delta, uncompressed_size)
225 .map_err(|error| StoreError::InvalidObject(format!("Delta decode failed: {error}")))?;
226
227 Ok((base_type, decoded))
228 }
229
230 fn require_delta_base_hash(base_id: Option<PackObjectId>) -> Result<ContentHash> {
231 match base_id {
232 Some(PackObjectId::Hash(hash)) => Ok(hash),
233 Some(PackObjectId::ChangeId(_)) => Err(StoreError::InvalidObject(
234 "pack delta base must be hash-backed content".into(),
235 )),
236 None => Err(StoreError::InvalidObject(
237 "pack object type is Delta but base hash is missing".into(),
238 )),
239 }
240 }
241}
242
243#[cfg(test)]
244mod tests {
245 use super::PackReader;
246 use crate::store::StoreError;
247
248 #[test]
249 fn test_require_delta_base_hash_rejects_missing_hash() {
250 let error =
251 PackReader::require_delta_base_hash(None).expect_err("missing hash should fail");
252
253 assert!(
254 matches!(error, StoreError::InvalidObject(message) if message == "pack object type is Delta but base hash is missing")
255 );
256 }
257}