1use std::ops::Range;
2
3use gix_features::zlib;
4use smallvec::SmallVec;
5
6use crate::{
7 cache, data,
8 data::{File, delta, file::decode::Error},
9};
10
11#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum ResolvedBase {
15 InPack(data::Entry),
17 #[allow(missing_docs)]
20 OutOfPack { kind: gix_object::Kind, end: usize },
21}
22
23#[derive(Debug)]
24struct Delta {
25 data: Range<usize>,
26 base_size: usize,
27 result_size: usize,
28
29 decompressed_size: usize,
30 data_offset: data::Offset,
31}
32
33#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38pub struct Outcome {
39 pub kind: gix_object::Kind,
41 pub num_deltas: u32,
46 pub decompressed_size: u64,
48 pub compressed_size: usize,
50 pub object_size: u64,
52}
53
54impl Outcome {
55 pub(crate) fn default_from_kind(kind: gix_object::Kind) -> Self {
56 Self {
57 kind,
58 num_deltas: 0,
59 decompressed_size: 0,
60 compressed_size: 0,
61 object_size: 0,
62 }
63 }
64 fn from_object_entry(kind: gix_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self {
65 Self {
66 kind,
67 num_deltas: 0,
68 decompressed_size: entry.decompressed_size,
69 compressed_size,
70 object_size: entry.decompressed_size,
71 }
72 }
73}
74
75impl<T> File<T>
77where
78 T: crate::FileData,
79{
80 fn decoded_object_size(&self, size: u64) -> Result<usize, Error> {
81 decoded_object_size(size, self.alloc_limit_bytes)
82 }
83
84 pub fn decompress_entry(
90 &self,
91 entry: &data::Entry,
92 inflate: &mut zlib::Inflate,
93 out: &mut [u8],
94 ) -> Result<usize, Error> {
95 let size: usize = entry.decompressed_size.try_into().map_err(|_| Error::OutOfMemory)?;
96 if out.len() < size {
97 return Err(Error::OutOfMemory);
98 }
99 self.decompress_entry_from_data_offset(entry.data_offset, inflate, &mut out[..size])
100 }
101
102 pub fn entry(&self, offset: data::Offset) -> Result<data::Entry, data::entry::decode::Error> {
106 let pack_offset: usize = offset.try_into().expect("offset representable by machine");
107 if pack_offset > self.data.len() {
108 return Err(data::entry::decode::Error::Corrupt {
109 message: "an entry offset pointing beyond pack data",
110 });
111 }
112
113 let object_data = &self.data[pack_offset..];
114 data::Entry::from_bytes(object_data, offset, self.hash_len)
115 }
116
117 pub(crate) fn decompress_entry_from_data_offset(
123 &self,
124 data_offset: data::Offset,
125 inflate: &mut zlib::Inflate,
126 out: &mut [u8],
127 ) -> Result<usize, Error> {
128 let (consumed_in, _consumed_out) =
129 self.decompress_complete_entry_from_data_offset(data_offset, inflate, out)?;
130 Ok(consumed_in)
131 }
132
133 pub(crate) fn decompress_complete_entry_from_data_offset(
141 &self,
142 data_offset: data::Offset,
143 inflate: &mut zlib::Inflate,
144 out: &mut [u8],
145 ) -> Result<(usize, usize), Error> {
146 let (status, consumed_in, consumed_out) =
147 self.decompress_entry_from_data_offset_unchecked(data_offset, inflate, out)?;
148 if status != zlib::Status::StreamEnd || consumed_out != out.len() {
149 return Err(data::entry::decode::Error::Corrupt {
150 message: "pack entry decompressed size does not match entry header",
151 }
152 .into());
153 }
154 Ok((consumed_in, consumed_out))
155 }
156
157 pub(crate) fn decompress_entry_from_data_offset_unchecked(
162 &self,
163 data_offset: data::Offset,
164 inflate: &mut zlib::Inflate,
165 out: &mut [u8],
166 ) -> Result<(zlib::Status, usize, usize), Error> {
167 let offset: usize = data_offset.try_into().expect("offset representable by machine");
168 if offset >= self.data.len() {
169 return Err(data::entry::decode::Error::Corrupt {
170 message: "an entry data offset pointing beyond pack data",
171 }
172 .into());
173 }
174
175 inflate.reset();
176 inflate.once(&self.data[offset..], out).map_err(Into::into)
177 }
178
179 pub fn decode_entry(
191 &self,
192 entry: data::Entry,
193 out: &mut Vec<u8>,
194 inflate: &mut zlib::Inflate,
195 resolve: &dyn Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
196 delta_cache: &mut dyn cache::DecodeEntry,
197 ) -> Result<Outcome, Error> {
198 use crate::data::entry::Header::*;
199 match entry.header {
200 Tree | Blob | Commit | Tag => {
201 let size = self.decoded_object_size(entry.decompressed_size)?;
202 if let Some(additional) = size.checked_sub(out.len()) {
203 out.try_reserve(additional)?;
204 }
205 out.resize(size, 0);
206 self.decompress_entry(&entry, inflate, out.as_mut_slice())
207 .map(|consumed_input| {
208 Outcome::from_object_entry(
209 entry.header.as_kind().expect("a non-delta entry"),
210 &entry,
211 consumed_input,
212 )
213 })
214 }
215 OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, inflate, out, delta_cache),
216 }
217 }
218
219 fn resolve_deltas(
223 &self,
224 last: data::Entry,
225 resolve: &dyn Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
226 inflate: &mut zlib::Inflate,
227 out: &mut Vec<u8>,
228 cache: &mut dyn cache::DecodeEntry,
229 ) -> Result<Outcome, Error> {
230 let mut chain = SmallVec::<[Delta; 10]>::default();
232 let first_entry = last.clone();
233 let mut cursor = last;
234 let mut base_buffer_size: Option<usize> = None;
235 let mut object_kind: Option<gix_object::Kind> = None;
236 let mut consumed_input: Option<usize> = None;
237
238 let mut total_delta_data_size: u64 = 0;
240 while cursor.header.is_delta() {
241 if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
242 base_buffer_size = Some(out.len());
243 object_kind = Some(kind);
244 if total_delta_data_size == 0 {
247 consumed_input = Some(packed_size);
248 }
249 break;
250 }
251 total_delta_data_size = total_delta_data_size
254 .checked_add(cursor.decompressed_size)
255 .ok_or(Error::OutOfMemory)?;
256 if self
257 .alloc_limit_bytes
258 .is_some_and(|limit| total_delta_data_size > limit as u64)
259 {
260 return Err(Error::OutOfMemory);
261 }
262 let decompressed_size = self.decoded_object_size(cursor.decompressed_size)?;
263 chain.push(Delta {
264 data: Range {
265 start: 0,
266 end: decompressed_size,
267 },
268 base_size: 0,
269 result_size: 0,
270 decompressed_size,
271 data_offset: cursor.data_offset,
272 });
273 use crate::data::entry::Header;
274 cursor = match cursor.header {
275 Header::OfsDelta { base_distance } => {
276 self.entry(cursor.checked_base_pack_offset(base_distance).ok_or(
277 crate::data::entry::decode::Error::Corrupt {
278 message: "an ofs-delta base distance pointing before pack start",
279 },
280 )?)?
281 }
282 Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
283 Some(ResolvedBase::InPack(entry)) => entry,
284 Some(ResolvedBase::OutOfPack { end, kind }) => {
285 base_buffer_size = Some(end);
286 object_kind = Some(kind);
287 break;
288 }
289 None => return Err(Error::DeltaBaseUnresolved(base_id)),
290 },
291 _ => unreachable!("cursor.is_delta() only allows deltas here"),
292 };
293 }
294
295 if chain.is_empty() {
298 return Ok(Outcome::from_object_entry(
299 object_kind.expect("object kind as set by cache"),
300 &first_entry,
301 consumed_input.expect("consumed bytes as set by cache"),
302 ));
303 }
304
305 let total_delta_data_size: usize = total_delta_data_size.try_into().map_err(|_| Error::OutOfMemory)?;
309
310 let chain_len = chain.len();
311 let (first_buffer_end, second_buffer_end) = {
312 let delta_start = base_buffer_size.unwrap_or(0);
313
314 let delta_range = Range {
315 start: delta_start,
316 end: delta_start
317 .checked_add(total_delta_data_size)
318 .ok_or(Error::OutOfMemory)?,
319 };
320 out.try_reserve(delta_range.end.saturating_sub(out.len()))?;
321 out.resize(delta_range.end, 0);
322
323 let mut instructions = &mut out[delta_range.clone()];
324 let mut relative_delta_start = 0;
325 let mut biggest_result_size = 0;
326 for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
327 let (consumed_from_data_offset, consumed_out) = self.decompress_complete_entry_from_data_offset(
328 delta.data_offset,
329 inflate,
330 &mut instructions[..delta.decompressed_size],
331 )?;
332 let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
333 if is_last_delta_to_be_applied {
334 consumed_input = Some(consumed_from_data_offset);
335 }
336
337 let current_delta = &instructions[..consumed_out];
338 let (base_size, offset) = delta::decode_header_size(current_delta)?;
339 let mut bytes_consumed_by_header = offset;
340 biggest_result_size = biggest_result_size.max(base_size);
341 delta.base_size = self.decoded_object_size(base_size)?;
342
343 let (result_size, offset) = delta::decode_header_size(¤t_delta[offset..])?;
344 bytes_consumed_by_header += offset;
345 biggest_result_size = biggest_result_size.max(result_size);
346 delta.result_size = self.decoded_object_size(result_size)?;
347
348 delta.data.start = relative_delta_start + bytes_consumed_by_header;
350 delta.data.end = relative_delta_start + consumed_out;
351 relative_delta_start += delta.decompressed_size;
352
353 instructions = &mut instructions[delta.decompressed_size..];
354 }
355
356 if base_buffer_size.is_none() {
360 biggest_result_size = biggest_result_size.max(cursor.decompressed_size);
361 }
362 let biggest_result_size = self.decoded_object_size(biggest_result_size)?;
363 let first_buffer_size = biggest_result_size;
364 let second_buffer_size = first_buffer_size;
365 let out_size = first_buffer_size
366 .checked_add(second_buffer_size)
367 .and_then(|size| size.checked_add(total_delta_data_size))
368 .ok_or(Error::OutOfMemory)?;
369 out.try_reserve(out_size.saturating_sub(out.len()))?;
370 out.resize(out_size, 0);
371
372 let second_buffer_end = {
375 let end = first_buffer_size
376 .checked_add(second_buffer_size)
377 .ok_or(Error::OutOfMemory)?;
378 out.copy_within(delta_range, end);
381 end
382 };
383
384 if base_buffer_size.is_none() {
387 let base_entry = cursor;
388 debug_assert!(!base_entry.header.is_delta());
389 object_kind = base_entry.header.as_kind();
390 let base_size = self.decoded_object_size(base_entry.decompressed_size)?;
391 let out_base = &mut out[..base_size];
392 self.decompress_entry_from_data_offset(base_entry.data_offset, inflate, out_base)?;
393 }
394
395 (first_buffer_size, second_buffer_end)
396 };
397
398 let (buffers, instructions) = out.split_at_mut(second_buffer_end);
404 let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
405
406 let mut last_result_size = None;
407 for (
408 delta_idx,
409 Delta {
410 data,
411 base_size,
412 result_size,
413 ..
414 },
415 ) in chain.into_iter().rev().enumerate()
416 {
417 let data = &mut instructions[data];
418 if delta_idx + 1 == chain_len {
419 last_result_size = Some(result_size);
420 }
421 delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data)?;
422 std::mem::swap(&mut source_buf, &mut target_buf);
424 }
425
426 let last_result_size = last_result_size.expect("at least one delta chain item");
427 if chain_len % 2 == 1 {
436 target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
438 }
439 debug_assert!(out.len() >= last_result_size);
440 out.truncate(last_result_size);
441
442 let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
443 let consumed_input = consumed_input.expect("at least one decompressed delta object");
444 cache.put(
445 self.id,
446 first_entry.data_offset,
447 out.as_slice(),
448 object_kind,
449 consumed_input,
450 );
451 Ok(Outcome {
452 kind: object_kind,
453 num_deltas: chain_len as u32,
457 decompressed_size: first_entry.decompressed_size,
458 compressed_size: consumed_input,
459 object_size: last_result_size as u64,
460 })
461 }
462}
463
464fn decoded_object_size(size: u64, alloc_limit_bytes: Option<usize>) -> Result<usize, Error> {
466 let size: usize = size.try_into().map_err(|_| Error::OutOfMemory)?;
467 if alloc_limit_bytes.is_some_and(|limit| size > limit) {
468 return Err(Error::OutOfMemory);
469 }
470 Ok(size)
471}
472
473#[cfg(test)]
474mod tests {
475 use gix_testtools::size_ok;
476
477 use super::*;
478
479 #[test]
480 fn size_of_decode_entry_outcome() {
481 let actual = std::mem::size_of::<Outcome>();
482 let expected = 32;
483 assert!(
484 size_ok(actual, expected),
485 "this shouldn't change without use noticing as it's returned a lot: {actual} <~ {expected}"
486 );
487 }
488}