1use std::{convert::TryInto, ops::Range};
2
3use git_features::zlib;
4use smallvec::SmallVec;
5
6use crate::{
7 cache, data,
8 data::{delta, file::decode::Error, File},
9};
10
11#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
13#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
14pub enum ResolvedBase {
15 InPack(data::Entry),
17 #[allow(missing_docs)]
20 OutOfPack { kind: git_object::Kind, end: usize },
21}
22
23#[derive(Debug)]
24struct Delta {
25 data: Range<usize>,
26 base_size: usize,
27 result_size: usize,
28
29 decompressed_size: usize,
30 data_offset: data::Offset,
31}
32
33#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
37#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
38pub struct Outcome {
39 pub kind: git_object::Kind,
41 pub num_deltas: u32,
46 pub decompressed_size: u64,
48 pub compressed_size: usize,
50 pub object_size: u64,
52}
53
54impl Outcome {
55 pub(crate) fn default_from_kind(kind: git_object::Kind) -> Self {
56 Self {
57 kind,
58 num_deltas: 0,
59 decompressed_size: 0,
60 compressed_size: 0,
61 object_size: 0,
62 }
63 }
64 fn from_object_entry(kind: git_object::Kind, entry: &data::Entry, compressed_size: usize) -> Self {
65 Self {
66 kind,
67 num_deltas: 0,
68 decompressed_size: entry.decompressed_size,
69 compressed_size,
70 object_size: entry.decompressed_size,
71 }
72 }
73}
74
75impl File {
77 pub fn decompress_entry(&self, entry: &data::Entry, out: &mut [u8]) -> Result<usize, Error> {
86 assert!(
87 out.len() as u64 >= entry.decompressed_size,
88 "output buffer isn't large enough to hold decompressed result, want {}, have {}",
89 entry.decompressed_size,
90 out.len()
91 );
92
93 self.decompress_entry_from_data_offset(entry.data_offset, out)
94 .map_err(Into::into)
95 }
96
97 fn assure_v2(&self) {
98 assert!(
99 matches!(self.version, crate::data::Version::V2),
100 "Only V2 is implemented"
101 );
102 }
103
104 pub fn entry(&self, offset: data::Offset) -> data::Entry {
108 self.assure_v2();
109 let pack_offset: usize = offset.try_into().expect("offset representable by machine");
110 assert!(pack_offset <= self.data.len(), "offset out of bounds");
111
112 let object_data = &self.data[pack_offset..];
113 data::Entry::from_bytes(object_data, offset, self.hash_len)
114 }
115
116 pub(crate) fn decompress_entry_from_data_offset(
122 &self,
123 data_offset: data::Offset,
124 out: &mut [u8],
125 ) -> Result<usize, zlib::inflate::Error> {
126 let offset: usize = data_offset.try_into().expect("offset representable by machine");
127 assert!(offset < self.data.len(), "entry offset out of bounds");
128
129 zlib::Inflate::default()
130 .once(&self.data[offset..], out)
131 .map(|(_status, consumed_in, _consumed_out)| consumed_in)
132 }
133
134 pub(crate) fn decompress_entry_from_data_offset_2(
136 &self,
137 data_offset: data::Offset,
138 out: &mut [u8],
139 ) -> Result<(usize, usize), zlib::inflate::Error> {
140 let offset: usize = data_offset.try_into().expect("offset representable by machine");
141 assert!(offset < self.data.len(), "entry offset out of bounds");
142
143 zlib::Inflate::default()
144 .once(&self.data[offset..], out)
145 .map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out))
146 }
147
148 pub fn decode_entry(
159 &self,
160 entry: data::Entry,
161 out: &mut Vec<u8>,
162 resolve: impl Fn(&git_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
163 delta_cache: &mut impl cache::DecodeEntry,
164 ) -> Result<Outcome, Error> {
165 use crate::data::entry::Header::*;
166 match entry.header {
167 Tree | Blob | Commit | Tag => {
168 out.resize(
169 entry
170 .decompressed_size
171 .try_into()
172 .expect("size representable by machine"),
173 0,
174 );
175 self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| {
176 Outcome::from_object_entry(
177 entry.header.as_kind().expect("a non-delta entry"),
178 &entry,
179 consumed_input,
180 )
181 })
182 }
183 OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, delta_cache),
184 }
185 }
186
187 fn resolve_deltas(
191 &self,
192 last: data::Entry,
193 resolve: impl Fn(&git_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
194 out: &mut Vec<u8>,
195 cache: &mut impl cache::DecodeEntry,
196 ) -> Result<Outcome, Error> {
197 let mut chain = SmallVec::<[Delta; 10]>::default();
199 let first_entry = last.clone();
200 let mut cursor = last;
201 let mut base_buffer_size: Option<usize> = None;
202 let mut object_kind: Option<git_object::Kind> = None;
203 let mut consumed_input: Option<usize> = None;
204
205 let mut total_delta_data_size: u64 = 0;
207 while cursor.header.is_delta() {
208 if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
209 base_buffer_size = Some(out.len());
210 object_kind = Some(kind);
211 if total_delta_data_size == 0 {
214 consumed_input = Some(packed_size);
215 }
216 break;
217 }
218 total_delta_data_size += cursor.decompressed_size;
219 let decompressed_size = cursor
220 .decompressed_size
221 .try_into()
222 .expect("a single delta size small enough to fit a usize");
223 chain.push(Delta {
224 data: Range {
225 start: 0,
226 end: decompressed_size,
227 },
228 base_size: 0,
229 result_size: 0,
230 decompressed_size,
231 data_offset: cursor.data_offset,
232 });
233 use crate::data::entry::Header;
234 cursor = match cursor.header {
235 Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)),
236 Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
237 Some(ResolvedBase::InPack(entry)) => entry,
238 Some(ResolvedBase::OutOfPack { end, kind }) => {
239 base_buffer_size = Some(end);
240 object_kind = Some(kind);
241 break;
242 }
243 None => return Err(Error::DeltaBaseUnresolved(base_id)),
244 },
245 _ => unreachable!("cursor.is_delta() only allows deltas here"),
246 };
247 }
248
249 if chain.is_empty() {
252 return Ok(Outcome::from_object_entry(
253 object_kind.expect("object kind as set by cache"),
254 &first_entry,
255 consumed_input.expect("consumed bytes as set by cache"),
256 ));
257 };
258
259 let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
263
264 let chain_len = chain.len();
265 let (first_buffer_end, second_buffer_end) = {
266 let delta_start = base_buffer_size.unwrap_or(0);
267 out.resize(delta_start + total_delta_data_size, 0);
268
269 let delta_range = Range {
270 start: delta_start,
271 end: delta_start + total_delta_data_size,
272 };
273 let mut instructions = &mut out[delta_range.clone()];
274 let mut relative_delta_start = 0;
275 let mut biggest_result_size = 0;
276 for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
277 let consumed_from_data_offset = self.decompress_entry_from_data_offset(
278 delta.data_offset,
279 &mut instructions[..delta.decompressed_size],
280 )?;
281 let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
282 if is_last_delta_to_be_applied {
283 consumed_input = Some(consumed_from_data_offset);
284 }
285
286 let (base_size, offset) = delta::decode_header_size(instructions);
287 let mut bytes_consumed_by_header = offset;
288 biggest_result_size = biggest_result_size.max(base_size);
289 delta.base_size = base_size.try_into().expect("base size fits into usize");
290
291 let (result_size, offset) = delta::decode_header_size(&instructions[offset..]);
292 bytes_consumed_by_header += offset;
293 biggest_result_size = biggest_result_size.max(result_size);
294 delta.result_size = result_size.try_into().expect("result size fits into usize");
295
296 delta.data.start = relative_delta_start + bytes_consumed_by_header;
298 relative_delta_start += delta.decompressed_size;
299 delta.data.end = relative_delta_start;
300
301 instructions = &mut instructions[delta.decompressed_size..];
302 }
303
304 let biggest_result_size: usize = biggest_result_size
308 .try_into()
309 .expect("biggest result size small enough to fit into usize");
310 let first_buffer_size = biggest_result_size;
311 let second_buffer_size = first_buffer_size;
312 out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0);
313
314 let second_buffer_end = {
317 let end = first_buffer_size + second_buffer_size;
318 if delta_range.start < end {
319 out.copy_within(delta_range, end);
326 } else {
327 let (buffers, instructions) = out.split_at_mut(end);
328 instructions.copy_from_slice(&buffers[delta_range]);
329 }
330 end
331 };
332
333 if base_buffer_size.is_none() {
336 let base_entry = cursor;
337 debug_assert!(!base_entry.header.is_delta());
338 object_kind = base_entry.header.as_kind();
339 self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
340 }
341
342 (first_buffer_size, second_buffer_end)
343 };
344
345 let (buffers, instructions) = out.split_at_mut(second_buffer_end);
351 let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
352
353 let mut last_result_size = None;
354 for (
355 delta_idx,
356 Delta {
357 data,
358 base_size,
359 result_size,
360 ..
361 },
362 ) in chain.into_iter().rev().enumerate()
363 {
364 let data = &mut instructions[data];
365 if delta_idx + 1 == chain_len {
366 last_result_size = Some(result_size);
367 }
368 delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
369 std::mem::swap(&mut source_buf, &mut target_buf);
371 }
372
373 let last_result_size = last_result_size.expect("at least one delta chain item");
374 if chain_len % 2 == 1 {
383 target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
385 }
386 out.resize(last_result_size, 0);
387
388 let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
389 let consumed_input = consumed_input.expect("at least one decompressed delta object");
390 cache.put(
391 self.id,
392 first_entry.data_offset,
393 out.as_slice(),
394 object_kind,
395 consumed_input,
396 );
397 Ok(Outcome {
398 kind: object_kind,
399 num_deltas: chain_len as u32,
403 decompressed_size: first_entry.decompressed_size,
404 compressed_size: consumed_input,
405 object_size: last_result_size as u64,
406 })
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413
414 #[test]
415 fn size_of_decode_entry_outcome() {
416 assert_eq!(
417 std::mem::size_of::<Outcome>(),
418 32,
419 "this shouldn't change without use noticing as it's returned a lot"
420 );
421 }
422}