gix_pack/data/file/decode/header.rs
1use gix_features::zlib;
2
3use crate::{
4 data,
5 data::{File, delta, file::decode::Error},
6};
7
8/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found.
9#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
10#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
11pub enum ResolvedBase {
12 /// Indicate an object is within this pack, at the given entry, and thus can be looked up locally.
13 InPack(data::Entry),
14 /// Indicates the object of `kind` was found outside of the pack.
15 OutOfPack {
16 /// The kind of object we found when reading the header of the out-of-pack base.
17 kind: gix_object::Kind,
18 /// The amount of deltas encountered if the object was packed as well.
19 num_deltas: Option<u32>,
20 },
21}
22
23/// Additional information and statistics about a successfully decoded object produced by [`File::decode_header()`].
24///
25/// Useful to understand the effectiveness of the pack compression or the cost of decompression.
26#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
27#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
28pub struct Outcome {
29 /// The kind of resolved object.
30 pub kind: gix_object::Kind,
31 /// The decompressed size of the object.
32 pub object_size: u64,
33 /// The amount of deltas in the chain of objects that had to be resolved beforehand.
34 pub num_deltas: u32,
35}
36
37/// Obtain object information quickly.
38impl<T> File<T>
39where
40 T: crate::FileData,
41{
42 /// Resolve the object header information starting at `entry`, following the chain of entries as needed.
43 ///
44 /// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
45 /// `inflate` will be used for (partially) decompressing entries, and will be reset before first use, but not after the last use.
46 ///
47 /// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id
48 /// is used to refer to a base object, instead of an in-pack offset.
49 ///
50 /// For delta entries, this only probes the initial delta header bytes to determine the result
51 /// object size. It can reject streams that end or overflow within that probe, but it does not
52 /// fully validate that the compressed stream produces exactly the decompressed size declared in
53 /// the pack entry header. Use [`File::decode_entry()`][crate::data::File::decode_entry()] when
54 /// callers need that full validation.
55 pub fn decode_header(
56 &self,
57 mut entry: data::Entry,
58 inflate: &mut zlib::Inflate,
59 resolve: &dyn Fn(&gix_hash::oid) -> Option<ResolvedBase>,
60 ) -> Result<Outcome, Error> {
61 use crate::data::entry::Header::*;
62 let mut num_deltas = 0;
63 let mut first_delta_decompressed_size = None::<u64>;
64 loop {
65 match entry.header {
66 Tree | Blob | Commit | Tag => {
67 return Ok(Outcome {
68 kind: entry.header.as_kind().expect("always valid for non-refs"),
69 object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
70 num_deltas,
71 });
72 }
73 OfsDelta { base_distance } => {
74 num_deltas += 1;
75 if first_delta_decompressed_size.is_none() {
76 first_delta_decompressed_size = Some(self.decode_delta_object_size(inflate, &entry)?);
77 }
78 entry = self.entry(entry.checked_base_pack_offset(base_distance).ok_or(
79 crate::data::entry::decode::Error::Corrupt {
80 message: "an ofs-delta base distance pointing before pack start",
81 },
82 )?)?;
83 }
84 RefDelta { base_id } => {
85 num_deltas += 1;
86 if first_delta_decompressed_size.is_none() {
87 first_delta_decompressed_size = Some(self.decode_delta_object_size(inflate, &entry)?);
88 }
89 match resolve(base_id.as_ref()) {
90 Some(ResolvedBase::InPack(base_entry)) => entry = base_entry,
91 Some(ResolvedBase::OutOfPack {
92 kind,
93 num_deltas: origin_num_deltas,
94 }) => {
95 return Ok(Outcome {
96 kind,
97 object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
98 num_deltas: origin_num_deltas.unwrap_or_default() + num_deltas,
99 });
100 }
101 None => return Err(Error::DeltaBaseUnresolved(base_id)),
102 }
103 }
104 }
105 }
106 }
107
108 /// Decode the result object size from the initial delta header bytes in `inflate`, using `entry`
109 /// for offsets.
110 ///
111 /// This intentionally mirrors Git's cheap header probe: only the first 20 decompressed bytes
112 /// are inspected, which is enough for the two `u64` varints that make up a valid delta header.
113 /// If the zlib stream ends within that probe, we can reject declared-size mismatches here.
114 /// Otherwise this result only proves that the delta header prefix is parseable; full
115 /// decompression through `decode_entry()` must still validate that the stream length matches
116 /// the pack entry header.
117 #[inline]
118 fn decode_delta_object_size(&self, inflate: &mut zlib::Inflate, entry: &data::Entry) -> Result<u64, Error> {
119 let mut buf = [0_u8; 20];
120 let max_size = entry.decompressed_size.min(buf.len() as u64) as usize;
121 let (status, _consumed_in, consumed_out) =
122 self.decompress_entry_from_data_offset_unchecked(entry.data_offset, inflate, &mut buf[..max_size])?;
123 if status == zlib::Status::StreamEnd {
124 if consumed_out as u64 != entry.decompressed_size {
125 return Err(data::entry::decode::Error::Corrupt {
126 message: "pack entry decompressed to fewer bytes than declared in the entry header",
127 }
128 .into());
129 }
130 } else if entry.decompressed_size == max_size as u64 {
131 return Err(data::entry::decode::Error::Corrupt {
132 message: "pack entry decompressed to more bytes than declared in the entry header",
133 }
134 .into());
135 }
136 let buf = &buf[..consumed_out];
137 let (_base_size, offset) = delta::decode_header_size(buf)?;
138 let (result_size, _offset) = delta::decode_header_size(&buf[offset..])?;
139 Ok(result_size)
140 }
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn size_of_decode_entry_outcome() {
149 assert_eq!(
150 std::mem::size_of::<Outcome>(),
151 16,
152 "this shouldn't change without use noticing as it's returned a lot"
153 );
154 }
155}