1use std::ops::Range;
2
3use gix_features::zlib;
4use smallvec::SmallVec;
5
6use crate::{
7 cache, data,
8 data::{delta, file::decode::Error, File},
9};
10
11#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
13#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14pub enum ResolvedBase {
15 InPack(data::Entry),
17 #[allow(missing_docs)]
20 OutOfPack { kind: gix_object::Kind, end: usize },
21}
22
23#[derive(Debug)]
24struct Delta {
25 data: Range<usize>,
26 base_size: usize,
27 result_size: usize,
28
29 decompressed_size: usize,
30 data_offset: data::Offset,
31}
32
33#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38pub struct Outcome {
39 pub kind: gix_object::Kind,
41 pub num_deltas: u32,
46 pub decompressed_size: u64,
48 pub compressed_size: usize,
50 pub object_size: u64,
52}
53
54impl Outcome {
55 pub(crate) fn default_from_kind(kind: gix_object::Kind) -> Self {
56 Self {
57 kind,
58 num_deltas: 0,
59 decompressed_size: 0,
60 compressed_size: 0,
61 object_size: 0,
62 }
63 }
64 fn from_object_entry(kind: gix_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self {
65 Self {
66 kind,
67 num_deltas: 0,
68 decompressed_size: entry.decompressed_size,
69 compressed_size,
70 object_size: entry.decompressed_size,
71 }
72 }
73}
74
75impl<T> File<T>
77where
78 T: crate::FileData,
79{
80 fn decoded_object_size(&self, size: u64) -> Result<usize, Error> {
81 decoded_object_size(size, self.alloc_limit_bytes)
82 }
83
84 pub fn decompress_entry(
94 &self,
95 entry: &data::Entry,
96 inflate: &mut zlib::Inflate,
97 out: &mut [u8],
98 ) -> Result<usize, Error> {
99 assert!(
100 out.len() as u64 >= entry.decompressed_size,
101 "output buffer isn't large enough to hold decompressed result, want {}, have {}",
102 entry.decompressed_size,
103 out.len()
104 );
105
106 self.decompress_entry_from_data_offset(entry.data_offset, inflate, out)
107 }
108
109 pub fn entry(&self, offset: data::Offset) -> Result<data::Entry, data::entry::decode::Error> {
113 let pack_offset: usize = offset.try_into().expect("offset representable by machine");
114 if pack_offset > self.data.len() {
115 return Err(data::entry::decode::Error::Corrupt {
116 message: "an entry offset pointing beyond pack data",
117 });
118 }
119
120 let object_data = &self.data[pack_offset..];
121 data::Entry::from_bytes(object_data, offset, self.hash_len)
122 }
123
124 pub(crate) fn decompress_entry_from_data_offset(
130 &self,
131 data_offset: data::Offset,
132 inflate: &mut zlib::Inflate,
133 out: &mut [u8],
134 ) -> Result<usize, Error> {
135 let offset: usize = data_offset.try_into().expect("offset representable by machine");
136 if offset >= self.data.len() {
137 return Err(data::entry::decode::Error::Corrupt {
138 message: "an entry data offset pointing beyond pack data",
139 }
140 .into());
141 }
142
143 inflate.reset();
144 inflate
145 .once(&self.data[offset..], out)
146 .map(|(_status, consumed_in, _consumed_out)| consumed_in)
147 .map_err(Into::into)
148 }
149
150 pub(crate) fn decompress_entry_from_data_offset_2(
152 &self,
153 data_offset: data::Offset,
154 inflate: &mut zlib::Inflate,
155 out: &mut [u8],
156 ) -> Result<(usize, usize), Error> {
157 let offset: usize = data_offset.try_into().expect("offset representable by machine");
158 if offset >= self.data.len() {
159 return Err(data::entry::decode::Error::Corrupt {
160 message: "an entry data offset pointing beyond pack data",
161 }
162 .into());
163 }
164
165 inflate.reset();
166 inflate
167 .once(&self.data[offset..], out)
168 .map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out))
169 .map_err(Into::into)
170 }
171
172 pub fn decode_entry(
184 &self,
185 entry: data::Entry,
186 out: &mut Vec<u8>,
187 inflate: &mut zlib::Inflate,
188 resolve: &dyn Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
189 delta_cache: &mut dyn cache::DecodeEntry,
190 ) -> Result<Outcome, Error> {
191 use crate::data::entry::Header::*;
192 match entry.header {
193 Tree | Blob | Commit | Tag => {
194 let size = self.decoded_object_size(entry.decompressed_size)?;
195 if let Some(additional) = size.checked_sub(out.len()) {
196 out.try_reserve(additional)?;
197 }
198 out.resize(size, 0);
199 self.decompress_entry(&entry, inflate, out.as_mut_slice())
200 .map(|consumed_input| {
201 Outcome::from_object_entry(
202 entry.header.as_kind().expect("a non-delta entry"),
203 &entry,
204 consumed_input,
205 )
206 })
207 }
208 OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, inflate, out, delta_cache),
209 }
210 }
211
212 fn resolve_deltas(
216 &self,
217 last: data::Entry,
218 resolve: &dyn Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
219 inflate: &mut zlib::Inflate,
220 out: &mut Vec<u8>,
221 cache: &mut dyn cache::DecodeEntry,
222 ) -> Result<Outcome, Error> {
223 let mut chain = SmallVec::<[Delta; 10]>::default();
225 let first_entry = last.clone();
226 let mut cursor = last;
227 let mut base_buffer_size: Option<usize> = None;
228 let mut object_kind: Option<gix_object::Kind> = None;
229 let mut consumed_input: Option<usize> = None;
230
231 let mut total_delta_data_size: u64 = 0;
233 while cursor.header.is_delta() {
234 if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
235 base_buffer_size = Some(out.len());
236 object_kind = Some(kind);
237 if total_delta_data_size == 0 {
240 consumed_input = Some(packed_size);
241 }
242 break;
243 }
244 total_delta_data_size = total_delta_data_size
247 .checked_add(cursor.decompressed_size)
248 .ok_or(Error::OutOfMemory)?;
249 let decompressed_size = self.decoded_object_size(cursor.decompressed_size)?;
250 chain.push(Delta {
251 data: Range {
252 start: 0,
253 end: decompressed_size,
254 },
255 base_size: 0,
256 result_size: 0,
257 decompressed_size,
258 data_offset: cursor.data_offset,
259 });
260 use crate::data::entry::Header;
261 cursor = match cursor.header {
262 Header::OfsDelta { base_distance } => {
263 self.entry(cursor.checked_base_pack_offset(base_distance).ok_or(
264 crate::data::entry::decode::Error::Corrupt {
265 message: "an ofs-delta base distance pointing before pack start",
266 },
267 )?)?
268 }
269 Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
270 Some(ResolvedBase::InPack(entry)) => entry,
271 Some(ResolvedBase::OutOfPack { end, kind }) => {
272 base_buffer_size = Some(end);
273 object_kind = Some(kind);
274 break;
275 }
276 None => return Err(Error::DeltaBaseUnresolved(base_id)),
277 },
278 _ => unreachable!("cursor.is_delta() only allows deltas here"),
279 };
280 }
281
282 if chain.is_empty() {
285 return Ok(Outcome::from_object_entry(
286 object_kind.expect("object kind as set by cache"),
287 &first_entry,
288 consumed_input.expect("consumed bytes as set by cache"),
289 ));
290 }
291
292 let total_delta_data_size: usize = total_delta_data_size.try_into().map_err(|_| Error::OutOfMemory)?;
296
297 let chain_len = chain.len();
298 let (first_buffer_end, second_buffer_end) = {
299 let delta_start = base_buffer_size.unwrap_or(0);
300
301 let delta_range = Range {
302 start: delta_start,
303 end: delta_start
304 .checked_add(total_delta_data_size)
305 .ok_or(Error::OutOfMemory)?,
306 };
307 out.try_reserve(delta_range.end.saturating_sub(out.len()))?;
308 out.resize(delta_range.end, 0);
309
310 let mut instructions = &mut out[delta_range.clone()];
311 let mut relative_delta_start = 0;
312 let mut biggest_result_size = 0;
313 for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
314 let consumed_from_data_offset = self.decompress_entry_from_data_offset(
315 delta.data_offset,
316 inflate,
317 &mut instructions[..delta.decompressed_size],
318 )?;
319 let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
320 if is_last_delta_to_be_applied {
321 consumed_input = Some(consumed_from_data_offset);
322 }
323
324 let (base_size, offset) = delta::decode_header_size(instructions)?;
325 let mut bytes_consumed_by_header = offset;
326 biggest_result_size = biggest_result_size.max(base_size);
327 delta.base_size = self.decoded_object_size(base_size)?;
328
329 let (result_size, offset) = delta::decode_header_size(&instructions[offset..])?;
330 bytes_consumed_by_header += offset;
331 biggest_result_size = biggest_result_size.max(result_size);
332 delta.result_size = self.decoded_object_size(result_size)?;
333
334 delta.data.start = relative_delta_start + bytes_consumed_by_header;
336 relative_delta_start += delta.decompressed_size;
337 delta.data.end = relative_delta_start;
338
339 instructions = &mut instructions[delta.decompressed_size..];
340 }
341
342 let biggest_result_size = self.decoded_object_size(biggest_result_size)?;
346 let first_buffer_size = biggest_result_size;
347 let second_buffer_size = first_buffer_size;
348 let out_size = first_buffer_size
349 .checked_add(second_buffer_size)
350 .and_then(|size| size.checked_add(total_delta_data_size))
351 .ok_or(Error::OutOfMemory)?;
352 out.try_reserve(out_size.saturating_sub(out.len()))?;
353 out.resize(out_size, 0);
354
355 let second_buffer_end = {
358 let end = first_buffer_size
359 .checked_add(second_buffer_size)
360 .ok_or(Error::OutOfMemory)?;
361 out.copy_within(delta_range, end);
364 end
365 };
366
367 if base_buffer_size.is_none() {
370 let base_entry = cursor;
371 debug_assert!(!base_entry.header.is_delta());
372 object_kind = base_entry.header.as_kind();
373 let out_base = &mut out[..out_size - total_delta_data_size];
374 self.decompress_entry_from_data_offset(base_entry.data_offset, inflate, out_base)?;
375 }
376
377 (first_buffer_size, second_buffer_end)
378 };
379
380 let (buffers, instructions) = out.split_at_mut(second_buffer_end);
386 let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
387
388 let mut last_result_size = None;
389 for (
390 delta_idx,
391 Delta {
392 data,
393 base_size,
394 result_size,
395 ..
396 },
397 ) in chain.into_iter().rev().enumerate()
398 {
399 let data = &mut instructions[data];
400 if delta_idx + 1 == chain_len {
401 last_result_size = Some(result_size);
402 }
403 delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data)?;
404 std::mem::swap(&mut source_buf, &mut target_buf);
406 }
407
408 let last_result_size = last_result_size.expect("at least one delta chain item");
409 if chain_len % 2 == 1 {
418 target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
420 }
421 debug_assert!(out.len() >= last_result_size);
422 out.truncate(last_result_size);
423
424 let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
425 let consumed_input = consumed_input.expect("at least one decompressed delta object");
426 cache.put(
427 self.id,
428 first_entry.data_offset,
429 out.as_slice(),
430 object_kind,
431 consumed_input,
432 );
433 Ok(Outcome {
434 kind: object_kind,
435 num_deltas: chain_len as u32,
439 decompressed_size: first_entry.decompressed_size,
440 compressed_size: consumed_input,
441 object_size: last_result_size as u64,
442 })
443 }
444}
445
446fn decoded_object_size(size: u64, alloc_limit_bytes: Option<usize>) -> Result<usize, Error> {
448 let size: usize = size.try_into().map_err(|_| Error::OutOfMemory)?;
449 if alloc_limit_bytes.is_some_and(|limit| size > limit) {
450 return Err(Error::OutOfMemory);
451 }
452 Ok(size)
453}
454
455#[cfg(test)]
456mod tests {
457 use gix_testtools::size_ok;
458
459 use super::*;
460
461 #[test]
462 fn size_of_decode_entry_outcome() {
463 let actual = std::mem::size_of::<Outcome>();
464 let expected = 32;
465 assert!(
466 size_ok(actual, expected),
467 "this shouldn't change without use noticing as it's returned a lot: {actual} <~ {expected}"
468 );
469 }
470}