Skip to main content

objects/store/pack/
pack_reader.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Pack reader for extracting objects from packfiles.
3
4use std::path::Path;
5
6use super::{
7    ObjectType, PackObjectId, PackObjectRecord, decompress_pack_payload, has_zstd_magic,
8    pack_container_spec, pack_index::PackIndex, varint, verify_container,
9};
10use crate::{
11    object::ContentHash,
12    store::{Result, StoreError},
13};
14
15const MAX_PACK_DELTA_OUTPUT_SIZE: usize = crate::delta::MAX_DELTA_OUTPUT_SIZE;
16const MAX_DELTA_CHAIN_DEPTH: usize = 50;
17
18/// Pack reader for extracting objects.
19pub struct PackReader {
20    data: Vec<u8>,
21    index: PackIndex,
22    content_end: usize,
23}
24
25impl PackReader {
26    /// Open a pack file.
27    pub fn open(pack_path: &Path, index_path: &Path) -> Result<Self> {
28        let pack_data = std::fs::read(pack_path)?;
29        let index_data = std::fs::read(index_path)?;
30        Self::from_bytes(pack_data, index_data)
31    }
32
33    pub fn from_bytes(pack_data: Vec<u8>, index_data: Vec<u8>) -> Result<Self> {
34        let (_, _, content_end) = verify_container(&pack_data, pack_container_spec())?;
35        let index = PackIndex::from_bytes(&index_data)?;
36        Ok(Self {
37            data: pack_data,
38            index,
39            content_end,
40        })
41    }
42
43    /// List all object ids in this pack.
44    pub fn list_ids(&self) -> Vec<PackObjectId> {
45        self.index.ids()
46    }
47
48    pub fn list_hashes(&self) -> Vec<ContentHash> {
49        self.list_ids()
50            .into_iter()
51            .filter_map(|id| match id {
52                PackObjectId::Hash(hash) => Some(hash),
53                PackObjectId::ChangeId(_) => None,
54            })
55            .collect()
56    }
57
58    pub fn has_object(&self, id: &PackObjectId) -> bool {
59        self.index.find(id).is_some()
60    }
61
62    /// Get an object from the pack.
63    pub fn get_object(&self, id: &PackObjectId) -> Result<Option<(ObjectType, Vec<u8>)>> {
64        let offset = match self.index.find(id) {
65            Some(offset) => offset,
66            None => return Ok(None),
67        };
68
69        let record = self.read_record_at_depth(offset as usize, 0)?;
70        Ok(Some((record.obj_type, record.data)))
71    }
72
73    pub fn get_hashed_object(&self, hash: &ContentHash) -> Result<Option<(ObjectType, Vec<u8>)>> {
74        self.get_object(&PackObjectId::Hash(*hash))
75    }
76
77    /// Read just the type+size header for an object without
78    /// decompressing its payload. Returns `Ok(None)` when the object
79    /// isn't in this pack.
80    ///
81    /// For non-delta entries this is one varint decode at the indexed
82    /// offset — much cheaper than `get_object`. Delta entries fall
83    /// back to a full read because their *resolved* size requires
84    /// chasing the base; in practice deltas are rare in the directory
85    /// listing hot path so the fallback is acceptable.
86    pub fn get_hashed_object_size(&self, hash: &ContentHash) -> Result<Option<u64>> {
87        let id = PackObjectId::Hash(*hash);
88        let Some(offset) = self.index.find(&id) else {
89            return Ok(None);
90        };
91        let offset = offset as usize;
92        if offset >= self.content_end {
93            return Err(StoreError::InvalidObject(
94                "Entry offset out of bounds".to_string(),
95            ));
96        }
97        let (_, id_len) = PackObjectId::decode_tagged(&self.data[offset..])?;
98        let header_start = offset + id_len;
99        let (obj_type, uncompressed_size, _type_len) =
100            super::varint::decode_type_and_size(&self.data[header_start..]).ok_or_else(|| {
101                StoreError::InvalidObject("Truncated type+size varint".to_string())
102            })?;
103        if obj_type == ObjectType::Delta {
104            // Delta entries record the *resolved* output size in the
105            // type+size varint already (see `read_record_at_depth`'s
106            // size-mismatch check), so we can still return without
107            // decompressing the payload.
108            return Ok(Some(uncompressed_size));
109        }
110        Ok(Some(uncompressed_size))
111    }
112
113    fn read_record_at_depth(&self, offset: usize, depth: usize) -> Result<PackObjectRecord> {
114        if offset >= self.content_end {
115            return Err(StoreError::InvalidObject(
116                "Entry offset out of bounds".to_string(),
117            ));
118        }
119
120        let (id, id_len) = PackObjectId::decode_tagged(&self.data[offset..])?;
121        let header_start = offset + id_len;
122
123        let (obj_type, uncompressed_size, type_len) =
124            varint::decode_type_and_size(&self.data[header_start..]).ok_or_else(|| {
125                StoreError::InvalidObject("Truncated type+size varint".to_string())
126            })?;
127        let uncompressed_size = uncompressed_size as usize;
128
129        let varint_start = header_start + type_len;
130        let (compressed_size, comp_len) = varint::decode_varint(&self.data[varint_start..])
131            .ok_or_else(|| {
132                StoreError::InvalidObject("Truncated compressed_size varint".to_string())
133            })?;
134        let compressed_size = compressed_size as usize;
135
136        let mut data_start = varint_start + comp_len;
137
138        // Delta entries carry a tagged base id in pack v2.
139        let base_id = if obj_type == ObjectType::Delta {
140            let (base_id, base_len) = PackObjectId::decode_tagged(&self.data[data_start..])?;
141            data_start += base_len;
142            Some(base_id)
143        } else {
144            None
145        };
146
147        let data_end = data_start + compressed_size;
148        if data_end > self.content_end {
149            return Err(StoreError::InvalidObject(
150                "Entry data out of bounds".to_string(),
151            ));
152        }
153
154        let stored_data = &self.data[data_start..data_end];
155
156        // Raw zstd (no wrapper). For non-delta entries, decompress
157        // if sizes differ. For delta entries, the stored data IS the delta
158        // payload (possibly zstd-compressed); check for zstd magic.
159        let decompressed = if obj_type == ObjectType::Delta {
160            if has_zstd_magic(stored_data) {
161                decompress_pack_payload(stored_data, 0)?
162            } else {
163                stored_data.to_vec()
164            }
165        } else if compressed_size != uncompressed_size {
166            decompress_pack_payload(stored_data, uncompressed_size)?
167        } else {
168            stored_data.to_vec()
169        };
170
171        let (resolved_type, final_data) = if obj_type == ObjectType::Delta {
172            self.read_delta_record(base_id, &decompressed, uncompressed_size, depth)?
173        } else {
174            (obj_type, decompressed)
175        };
176
177        if final_data.len() != uncompressed_size {
178            return Err(StoreError::InvalidObject(format!(
179                "Size mismatch: expected {}, got {}",
180                uncompressed_size,
181                final_data.len()
182            )));
183        }
184
185        Ok(PackObjectRecord {
186            id,
187            obj_type: resolved_type,
188            data: final_data,
189            delta_base: None,
190            path_hint: None,
191        })
192    }
193
194    fn read_delta_record(
195        &self,
196        base_id: Option<PackObjectId>,
197        delta: &[u8],
198        uncompressed_size: usize,
199        depth: usize,
200    ) -> Result<(ObjectType, Vec<u8>)> {
201        if depth > MAX_DELTA_CHAIN_DEPTH {
202            return Err(StoreError::InvalidObject(format!(
203                "Delta chain depth {} exceeds max {}",
204                depth, MAX_DELTA_CHAIN_DEPTH
205            )));
206        }
207
208        if uncompressed_size > MAX_PACK_DELTA_OUTPUT_SIZE {
209            return Err(StoreError::InvalidObject(format!(
210                "Delta output size {} exceeds max {}",
211                uncompressed_size, MAX_PACK_DELTA_OUTPUT_SIZE
212            )));
213        }
214
215        let base_hash = Self::require_delta_base_hash(base_id)?;
216        let base_offset = self
217            .index
218            .find(&PackObjectId::Hash(base_hash))
219            .ok_or_else(|| StoreError::NotFound(base_hash.to_string()))?;
220        let base_record = self.read_record_at_depth(base_offset as usize, depth + 1)?;
221        let base_type = base_record.obj_type;
222        let base_data = base_record.data;
223
224        let decoded = crate::delta::DeltaDecoder::decode(&base_data, delta, uncompressed_size)
225            .map_err(|error| StoreError::InvalidObject(format!("Delta decode failed: {error}")))?;
226
227        Ok((base_type, decoded))
228    }
229
230    fn require_delta_base_hash(base_id: Option<PackObjectId>) -> Result<ContentHash> {
231        match base_id {
232            Some(PackObjectId::Hash(hash)) => Ok(hash),
233            Some(PackObjectId::ChangeId(_)) => Err(StoreError::InvalidObject(
234                "pack delta base must be hash-backed content".into(),
235            )),
236            None => Err(StoreError::InvalidObject(
237                "pack object type is Delta but base hash is missing".into(),
238            )),
239        }
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::PackReader;
246    use crate::store::StoreError;
247
248    #[test]
249    fn test_require_delta_base_hash_rejects_missing_hash() {
250        let error =
251            PackReader::require_delta_base_hash(None).expect_err("missing hash should fail");
252
253        assert!(
254            matches!(error, StoreError::InvalidObject(message) if message == "pack object type is Delta but base hash is missing")
255        );
256    }
257}