1use std::fs;
13use std::io::Read;
14use std::path::Path;
15
16use flate2::read::ZlibDecoder;
17use sha1::{Digest, Sha1};
18
19use crate::error::{GitError, Result};
20use crate::hash::GitHash;
21use crate::object::{ObjectKind, RawObject};
22
23const IDX_V2_MAGIC: [u8; 4] = [0xff, 0x74, 0x4f, 0x63];
25const MAX_OBJECT_SIZE: u64 = 512 * 1024 * 1024;
27const MAX_DELTA_DEPTH: u32 = 64;
29
30pub fn read_packed(objects_dir: &Path, hash: &GitHash) -> Result<Option<RawObject>> {
33 let pack_dir = objects_dir.join("pack");
34 let Ok(entries) = fs::read_dir(&pack_dir) else {
35 return Ok(None);
36 };
37 let mut idx_paths: Vec<_> = entries
38 .flatten()
39 .map(|e| e.path())
40 .filter(|p| p.extension().is_some_and(|x| x == "idx"))
41 .collect();
42 idx_paths.sort(); for idx_path in idx_paths {
44 let idx = fs::read(&idx_path)?;
45 if let Some(offset) = idx_lookup(&idx, hash)? {
46 let pack = fs::read(idx_path.with_extension("pack"))?;
47 let (kind, data) = read_object_at(&pack, offset, objects_dir, 0)?;
48 let verified = verify(hash, kind, &data);
49 return Ok(Some(RawObject {
50 kind,
51 data,
52 verified,
53 }));
54 }
55 }
56 Ok(None)
57}
58
59pub fn list_packed(objects_dir: &Path) -> Result<Vec<GitHash>> {
67 let pack_dir = objects_dir.join("pack");
68 let Ok(entries) = fs::read_dir(&pack_dir) else {
69 return Ok(Vec::new());
70 };
71 let mut idx_paths: Vec<_> = entries
72 .flatten()
73 .map(|e| e.path())
74 .filter(|p| p.extension().is_some_and(|x| x == "idx"))
75 .collect();
76 idx_paths.sort();
77
78 let mut out = Vec::new();
79 for idx_path in idx_paths {
80 let idx = fs::read(&idx_path)?;
81 idx_names(&idx, &mut out)?;
82 }
83 Ok(out)
84}
85
86fn idx_names(idx: &[u8], out: &mut Vec<GitHash>) -> Result<()> {
88 if idx.len() < 8 || idx[0..4] != IDX_V2_MAGIC || be_u32(idx, 4)? != 2 {
89 return Err(GitError::OutOfBounds);
92 }
93 const FANOUT: usize = 8;
94 let count = be_u32(idx, FANOUT + 255 * 4)? as usize;
95 let names = FANOUT + 256 * 4;
96 for i in 0..count {
97 let name = idx
98 .get(names + i * 20..names + i * 20 + 20)
99 .ok_or(GitError::OutOfBounds)?;
100 out.push(GitHash::from_bytes(name)?);
101 }
102 Ok(())
103}
104
105fn idx_lookup(idx: &[u8], hash: &GitHash) -> Result<Option<u64>> {
108 if idx.len() < 8 || idx[0..4] != IDX_V2_MAGIC {
109 return Err(GitError::PackfileUnsupported(*hash));
111 }
112 if be_u32(idx, 4)? != 2 {
113 return Err(GitError::PackfileUnsupported(*hash));
114 }
115 const FANOUT: usize = 8;
116 let want = hash.as_bytes();
117 let first = want[0] as usize;
118 let mut lo = if first == 0 {
119 0
120 } else {
121 be_u32(idx, FANOUT + (first - 1) * 4)?
122 } as usize;
123 let mut hi = be_u32(idx, FANOUT + first * 4)? as usize;
124 let count = be_u32(idx, FANOUT + 255 * 4)? as usize;
125 let names = FANOUT + 256 * 4;
126 while lo < hi {
127 let mid = lo + (hi - lo) / 2;
128 let name = idx
129 .get(names + mid * 20..names + mid * 20 + 20)
130 .ok_or(GitError::OutOfBounds)?;
131 match name.cmp(want.as_slice()) {
132 std::cmp::Ordering::Less => lo = mid + 1,
133 std::cmp::Ordering::Greater => hi = mid,
134 std::cmp::Ordering::Equal => {
135 let offsets = names + count * 20 + count * 4;
137 let raw = be_u32(idx, offsets + mid * 4)?;
138 if raw & 0x8000_0000 == 0 {
139 return Ok(Some(u64::from(raw)));
140 }
141 let large = offsets + count * 4;
143 let slot = (raw & 0x7fff_ffff) as usize;
144 return Ok(Some(be_u64(idx, large + slot * 8)?));
145 }
146 }
147 }
148 Ok(None)
149}
150
151fn read_object_at(
153 pack: &[u8],
154 offset: u64,
155 objects_dir: &Path,
156 depth: u32,
157) -> Result<(ObjectKind, Vec<u8>)> {
158 if depth > MAX_DELTA_DEPTH {
159 return Err(GitError::InvalidObject("delta chain too deep".into()));
160 }
161 let off = usize::try_from(offset).map_err(|_| GitError::OutOfBounds)?;
162 let (type_id, _size, body) = parse_object_header(pack, off)?;
163 match type_id {
164 1..=4 => Ok((kind_from_type(type_id)?, inflate(pack, body)?)),
165 6 => {
166 let (back, delta_start) = parse_ofs_delta(pack, body)?;
168 let base_off = off.checked_sub(back).ok_or(GitError::OutOfBounds)?;
169 let (kind, base) = read_object_at(pack, base_off as u64, objects_dir, depth + 1)?;
170 Ok((kind, apply_delta(&base, &inflate(pack, delta_start)?)?))
171 }
172 7 => {
173 let base_hash =
175 GitHash::from_bytes(pack.get(body..body + 20).ok_or(GitError::OutOfBounds)?)?;
176 let delta = inflate(pack, body + 20)?;
177 let (kind, base) = resolve_base(objects_dir, &base_hash, depth + 1)?;
178 Ok((kind, apply_delta(&base, &delta)?))
179 }
180 other => Err(GitError::InvalidObject(format!(
181 "unknown pack object type {other}"
182 ))),
183 }
184}
185
186fn resolve_base(objects_dir: &Path, hash: &GitHash, depth: u32) -> Result<(ObjectKind, Vec<u8>)> {
188 if depth > MAX_DELTA_DEPTH {
189 return Err(GitError::InvalidObject("delta chain too deep".into()));
190 }
191 if let Some(obj) = read_packed(objects_dir, hash)? {
192 return Ok((obj.kind, obj.data));
193 }
194 let obj = crate::loose::read_loose(objects_dir, hash)?;
195 Ok((obj.kind, obj.data))
196}
197
198fn parse_object_header(pack: &[u8], off: usize) -> Result<(u8, usize, usize)> {
201 let mut pos = off;
202 let b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
203 pos += 1;
204 let type_id = (b >> 4) & 0x7;
205 let mut size = (b & 0x0f) as usize;
206 let mut shift = 4;
207 let mut more = b & 0x80 != 0;
208 while more {
209 let b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
210 pos += 1;
211 size |= ((b & 0x7f) as usize)
212 .checked_shl(shift)
213 .ok_or(GitError::OutOfBounds)?;
214 shift += 7;
215 more = b & 0x80 != 0;
216 }
217 Ok((type_id, size, pos))
218}
219
220fn parse_ofs_delta(pack: &[u8], off: usize) -> Result<(usize, usize)> {
222 let mut pos = off;
223 let mut b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
224 pos += 1;
225 let mut value = (b & 0x7f) as usize;
226 while b & 0x80 != 0 {
227 b = *pack.get(pos).ok_or(GitError::OutOfBounds)?;
228 pos += 1;
229 value = value
230 .checked_add(1)
231 .and_then(|v| v.checked_shl(7))
232 .map(|v| v | (b & 0x7f) as usize)
233 .ok_or(GitError::OutOfBounds)?;
234 }
235 Ok((value, pos))
236}
237
238pub fn apply_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
247 let (_src, mut p) = read_delta_size(delta, 0)?;
248 let (target_size, q) = read_delta_size(delta, p)?;
249 p = q;
250 let mut out = Vec::with_capacity(target_size.min(MAX_OBJECT_SIZE as usize));
251 while p < delta.len() {
252 let cmd = delta[p];
253 p += 1;
254 if cmd & 0x80 != 0 {
255 let mut copy_off = 0usize;
257 for i in 0..4 {
258 if cmd & (1 << i) != 0 {
259 copy_off |= (*delta.get(p).ok_or(GitError::OutOfBounds)? as usize) << (8 * i);
260 p += 1;
261 }
262 }
263 let mut copy_size = 0usize;
264 for i in 0..3 {
265 if cmd & (1 << (4 + i)) != 0 {
266 copy_size |= (*delta.get(p).ok_or(GitError::OutOfBounds)? as usize) << (8 * i);
267 p += 1;
268 }
269 }
270 if copy_size == 0 {
271 copy_size = 0x1_0000;
272 }
273 let end = copy_off
274 .checked_add(copy_size)
275 .ok_or(GitError::OutOfBounds)?;
276 out.extend_from_slice(base.get(copy_off..end).ok_or(GitError::OutOfBounds)?);
277 } else if cmd != 0 {
278 let n = cmd as usize;
280 out.extend_from_slice(delta.get(p..p + n).ok_or(GitError::OutOfBounds)?);
281 p += n;
282 } else {
283 return Err(GitError::InvalidObject("delta opcode 0 is reserved".into()));
284 }
285 }
286 if out.len() != target_size {
287 return Err(GitError::InvalidObject(format!(
288 "delta produced {} bytes, header said {target_size}",
289 out.len()
290 )));
291 }
292 Ok(out)
293}
294
295fn read_delta_size(delta: &[u8], mut p: usize) -> Result<(usize, usize)> {
297 let mut value = 0usize;
298 let mut shift = 0u32;
299 loop {
300 let b = *delta.get(p).ok_or(GitError::OutOfBounds)?;
301 p += 1;
302 value |= ((b & 0x7f) as usize)
303 .checked_shl(shift)
304 .ok_or(GitError::OutOfBounds)?;
305 shift += 7;
306 if b & 0x80 == 0 {
307 break;
308 }
309 }
310 Ok((value, p))
311}
312
313fn inflate(pack: &[u8], start: usize) -> Result<Vec<u8>> {
315 let compressed = pack.get(start..).ok_or(GitError::OutOfBounds)?;
316 let mut out = Vec::new();
317 ZlibDecoder::new(compressed)
318 .take(MAX_OBJECT_SIZE + 1)
319 .read_to_end(&mut out)
320 .map_err(|e| GitError::InvalidObject(format!("pack zlib failed: {e}")))?;
321 if out.len() as u64 > MAX_OBJECT_SIZE {
322 return Err(GitError::DeflateBomb);
323 }
324 Ok(out)
325}
326
327fn kind_from_type(type_id: u8) -> Result<ObjectKind> {
328 match type_id {
329 1 => Ok(ObjectKind::Commit),
330 2 => Ok(ObjectKind::Tree),
331 3 => Ok(ObjectKind::Blob),
332 4 => Ok(ObjectKind::Tag),
333 other => Err(GitError::InvalidObject(format!(
334 "pack object type {other} is not a base type"
335 ))),
336 }
337}
338
339fn kind_str(kind: ObjectKind) -> &'static [u8] {
340 match kind {
341 ObjectKind::Commit => b"commit",
342 ObjectKind::Tree => b"tree",
343 ObjectKind::Blob => b"blob",
344 ObjectKind::Tag => b"tag",
345 }
346}
347
348fn verify(hash: &GitHash, kind: ObjectKind, data: &[u8]) -> bool {
350 let mut h = Sha1::new();
351 h.update(kind_str(kind));
352 h.update(b" ");
353 h.update(data.len().to_string().as_bytes());
354 h.update(b"\0");
355 h.update(data);
356 let digest: [u8; 20] = h.finalize().into();
357 &digest == hash.as_bytes()
358}
359
360fn be_u32(b: &[u8], off: usize) -> Result<u32> {
361 let s = b.get(off..off + 4).ok_or(GitError::OutOfBounds)?;
362 Ok(u32::from_be_bytes([s[0], s[1], s[2], s[3]]))
363}
364
365fn be_u64(b: &[u8], off: usize) -> Result<u64> {
366 let s = b.get(off..off + 8).ok_or(GitError::OutOfBounds)?;
367 let mut a = [0u8; 8];
368 a.copy_from_slice(s);
369 Ok(u64::from_be_bytes(a))
370}