pub enum Header {
Commit,
Tree,
Blob,
Tag,
RefDelta {
base_id: ObjectId,
},
OfsDelta {
base_distance: u64,
},
}
Expand description
The header portion of a pack data entry, identifying the kind of stored object.
Variants§
Commit
The object is a commit
Tree
The object is a tree
Blob
The object is a blob
Tag
The object is a tag
RefDelta
Describes a delta-object which needs to be applied to a base. The base object is identified by the base_id
field
which is found within the parent repository.
Most commonly used for thin-packs when receiving pack files from the server to refer to objects that are not
part of the pack but expected to be present in the receivers repository.
Note
This could also be an object within this pack if the LSB encoded offset would be larger than 20 bytes, which is unlikely to happen.
The naming is exactly the same as the canonical implementation uses, namely REF_DELTA.
OfsDelta
Describes a delta-object present in this pack which acts as base for this object.
The base object is measured as a distance from this objects
pack offset, so that base_pack_offset = this_objects_pack_offset - base_distance
Note
The naming is exactly the same as the canonical implementation uses, namely OFS_DELTA.
Implementations§
source§impl Header
impl Header
sourcepub fn verified_base_pack_offset(
pack_offset: Offset,
distance: u64
) -> Option<Offset>
pub fn verified_base_pack_offset(
pack_offset: Offset,
distance: u64
) -> Option<Offset>
Subtract distance
from pack_offset
safely without the chance for overflow or no-ops if distance
is 0.
Examples found in repository?
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
pub fn write_data_iter_to_stream<F, F2>(
version: crate::index::Version,
make_resolver: F,
entries: impl Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>,
thread_limit: Option<usize>,
mut root_progress: impl Progress,
out: impl io::Write,
should_interrupt: &AtomicBool,
object_hash: git_hash::Kind,
pack_version: crate::data::Version,
) -> Result<Outcome, Error>
where
F: FnOnce() -> io::Result<F2>,
F2: for<'r> Fn(crate::data::EntryRange, &'r mut Vec<u8>) -> Option<()> + Send + Clone,
{
if version != crate::index::Version::default() {
return Err(Error::Unsupported(version));
}
let mut num_objects: usize = 0;
let mut last_seen_trailer = None;
let anticipated_num_objects = entries.size_hint().1.unwrap_or_else(|| entries.size_hint().0);
let mut tree = Tree::with_capacity(anticipated_num_objects)?;
let indexing_start = std::time::Instant::now();
root_progress.init(Some(4), progress::steps());
let mut objects_progress = root_progress.add_child_with_id("indexing", *b"IWIO"); /* Index Write Index Objects */
objects_progress.init(entries.size_hint().1, progress::count("objects"));
let mut decompressed_progress = root_progress.add_child_with_id("decompressing", *b"IWDB"); /* Index Write Decompressed Bytes */
decompressed_progress.init(None, progress::bytes());
let mut pack_entries_end: u64 = 0;
for entry in entries {
let crate::data::input::Entry {
header,
pack_offset,
crc32,
header_size,
compressed: _,
compressed_size,
decompressed_size,
trailer,
} = entry?;
decompressed_progress.inc_by(decompressed_size as usize);
let entry_len = header_size as u64 + compressed_size;
pack_entries_end = pack_offset + entry_len;
let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration.");
use crate::data::entry::Header::*;
match header {
Tree | Blob | Commit | Tag => {
tree.add_root(
pack_offset,
TreeEntry {
id: object_hash.null(),
crc32,
},
)?;
}
RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta),
OfsDelta { base_distance } => {
let base_pack_offset =
crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or(
Error::IteratorInvariantBaseOffset {
pack_offset,
distance: base_distance,
},
)?;
tree.add_child(
base_pack_offset,
pack_offset,
TreeEntry {
id: object_hash.null(),
crc32,
},
)?;
}
};
last_seen_trailer = trailer;
num_objects += 1;
objects_progress.inc();
}
if num_objects != anticipated_num_objects {
objects_progress.info(format!(
"{} objects were resolved into {} objects during thin-pack resolution",
anticipated_num_objects, num_objects
));
}
let num_objects: u32 = num_objects
.try_into()
.map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?;
objects_progress.show_throughput(indexing_start);
decompressed_progress.show_throughput(indexing_start);
drop(objects_progress);
drop(decompressed_progress);
root_progress.inc();
let resolver = make_resolver()?;
let sorted_pack_offsets_by_oid = {
let traverse::Outcome { roots, children } = tree.traverse(
resolver,
pack_entries_end,
|| (),
|data,
_progress,
traverse::Context {
entry,
decompressed: bytes,
..
}| {
modify_base(data, entry, bytes, version.hash());
Ok::<_, Error>(())
},
traverse::Options {
object_progress: root_progress.add_child_with_id("Resolving", *b"IWRO"), /* Index Write Resolve Objects */
size_progress: root_progress.add_child_with_id("Decoding", *b"IWDB"), /* Index Write Decode Bytes */
thread_limit,
should_interrupt,
object_hash,
},
)?;
root_progress.inc();
let mut items = roots;
items.extend(children);
{
let _progress = root_progress.add_child_with_id("sorting by id", *b"info");
items.sort_by_key(|e| e.data.id);
}
root_progress.inc();
items
};
let pack_hash = match last_seen_trailer {
Some(ph) => ph,
None if num_objects == 0 => {
let header = crate::data::header::encode(pack_version, 0);
let mut hasher = git_features::hash::hasher(object_hash);
hasher.update(&header);
git_hash::ObjectId::from(hasher.digest())
}
None => return Err(Error::IteratorInvariantTrailer),
};
let index_hash = encode::write_to(
out,
sorted_pack_offsets_by_oid,
&pack_hash,
version,
root_progress.add_child_with_id("writing index file", *b"IWBW"), /* Index Write Bytes Written */
)?;
root_progress.show_throughput_with(
indexing_start,
num_objects as usize,
progress::count("objects").expect("unit always set"),
progress::MessageLevel::Success,
);
Ok(Outcome {
index_version: version,
index_hash,
data_hash: pack_hash,
num_objects,
})
}
sourcepub fn as_kind(&self) -> Option<Kind>
pub fn as_kind(&self) -> Option<Kind>
Convert the header’s object kind into git_object::Kind
if possible
Examples found in repository?
223 224 225 226 227 228 229 230 231 232 233 234
fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: git_hash::Kind) {
fn compute_hash(kind: git_object::Kind, bytes: &[u8], object_hash: git_hash::Kind) -> git_hash::ObjectId {
let mut hasher = git_features::hash::hasher(object_hash);
hasher.update(&git_object::encode::loose_header(kind, bytes.len()));
hasher.update(bytes);
git_hash::ObjectId::from(hasher.digest())
}
let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration");
let id = compute_hash(object_kind, decompressed, hash);
entry.id = id;
}
More examples
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
pub fn decode_entry(
&self,
entry: data::Entry,
out: &mut Vec<u8>,
resolve: impl Fn(&git_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
delta_cache: &mut impl cache::DecodeEntry,
) -> Result<Outcome, Error> {
use crate::data::entry::Header::*;
match entry.header {
Tree | Blob | Commit | Tag => {
out.resize(
entry
.decompressed_size
.try_into()
.expect("size representable by machine"),
0,
);
self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| {
Outcome::from_object_entry(
entry.header.as_kind().expect("a non-delta entry"),
&entry,
consumed_input,
)
})
}
OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, delta_cache),
}
}
/// resolve: technically, this shouldn't ever be required as stored local packs don't refer to objects by id
/// that are outside of the pack. Unless, of course, the ref refers to an object within this pack, which means
/// it's very, very large as 20bytes are smaller than the corresponding MSB encoded number
fn resolve_deltas(
&self,
last: data::Entry,
resolve: impl Fn(&git_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
out: &mut Vec<u8>,
cache: &mut impl cache::DecodeEntry,
) -> Result<Outcome, Error> {
// all deltas, from the one that produces the desired object (first) to the oldest at the end of the chain
let mut chain = SmallVec::<[Delta; 10]>::default();
let first_entry = last.clone();
let mut cursor = last;
let mut base_buffer_size: Option<usize> = None;
let mut object_kind: Option<git_object::Kind> = None;
let mut consumed_input: Option<usize> = None;
// Find the first full base, either an undeltified object in the pack or a reference to another object.
let mut total_delta_data_size: u64 = 0;
while cursor.header.is_delta() {
if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
base_buffer_size = Some(out.len());
object_kind = Some(kind);
// If the input entry is a cache hit, keep the packed size as it must be returned.
// Otherwise, the packed size will be determined later when decompressing the input delta
if total_delta_data_size == 0 {
consumed_input = Some(packed_size);
}
break;
}
total_delta_data_size += cursor.decompressed_size;
let decompressed_size = cursor
.decompressed_size
.try_into()
.expect("a single delta size small enough to fit a usize");
chain.push(Delta {
data: Range {
start: 0,
end: decompressed_size,
},
base_size: 0,
result_size: 0,
decompressed_size,
data_offset: cursor.data_offset,
});
use crate::data::entry::Header;
cursor = match cursor.header {
Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)),
Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
Some(ResolvedBase::InPack(entry)) => entry,
Some(ResolvedBase::OutOfPack { end, kind }) => {
base_buffer_size = Some(end);
object_kind = Some(kind);
break;
}
None => return Err(Error::DeltaBaseUnresolved(base_id)),
},
_ => unreachable!("cursor.is_delta() only allows deltas here"),
};
}
// This can happen if the cache held the first entry itself
// We will just treat it as an object then, even though it's technically incorrect.
if chain.is_empty() {
return Ok(Outcome::from_object_entry(
object_kind.expect("object kind as set by cache"),
&first_entry,
consumed_input.expect("consumed bytes as set by cache"),
));
};
// First pass will decompress all delta data and keep it in our output buffer
// [<possibly resolved base object>]<delta-1..delta-n>...
// so that we can find the biggest result size.
let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
let chain_len = chain.len();
let (first_buffer_end, second_buffer_end) = {
let delta_start = base_buffer_size.unwrap_or(0);
out.resize(delta_start + total_delta_data_size, 0);
let delta_range = Range {
start: delta_start,
end: delta_start + total_delta_data_size,
};
let mut instructions = &mut out[delta_range.clone()];
let mut relative_delta_start = 0;
let mut biggest_result_size = 0;
for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
let consumed_from_data_offset = self.decompress_entry_from_data_offset(
delta.data_offset,
&mut instructions[..delta.decompressed_size],
)?;
let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
if is_last_delta_to_be_applied {
consumed_input = Some(consumed_from_data_offset);
}
let (base_size, offset) = delta::decode_header_size(instructions);
let mut bytes_consumed_by_header = offset;
biggest_result_size = biggest_result_size.max(base_size);
delta.base_size = base_size.try_into().expect("base size fits into usize");
let (result_size, offset) = delta::decode_header_size(&instructions[offset..]);
bytes_consumed_by_header += offset;
biggest_result_size = biggest_result_size.max(result_size);
delta.result_size = result_size.try_into().expect("result size fits into usize");
// the absolute location into the instructions buffer, so we keep track of the end point of the last
delta.data.start = relative_delta_start + bytes_consumed_by_header;
relative_delta_start += delta.decompressed_size;
delta.data.end = relative_delta_start;
instructions = &mut instructions[delta.decompressed_size..];
}
// Now we can produce a buffer like this
// [<biggest-result-buffer, possibly filled with resolved base object data>]<biggest-result-buffer><delta-1..delta-n>
// from [<possibly resolved base object>]<delta-1..delta-n>...
let biggest_result_size: usize = biggest_result_size
.try_into()
.expect("biggest result size small enough to fit into usize");
let first_buffer_size = biggest_result_size;
let second_buffer_size = first_buffer_size;
out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0);
// Now 'rescue' the deltas, because in the next step we possibly overwrite that portion
// of memory with the base object (in the majority of cases)
let second_buffer_end = {
let end = first_buffer_size + second_buffer_size;
if delta_range.start < end {
// …this means that the delta size is even larger than two uncompressed worst-case
// intermediate results combined. It would already be undesirable to have it bigger
// then the target size (as you could just store the object in whole).
// However, this just means that it reuses existing deltas smartly, which as we rightfully
// remember stand for an object each. However, this means a lot of data is read to restore
// a single object sometimes. Fair enough - package size is minimized that way.
out.copy_within(delta_range, end);
} else {
let (buffers, instructions) = out.split_at_mut(end);
instructions.copy_from_slice(&buffers[delta_range]);
}
end
};
// If we don't have a out-of-pack object already, fill the base-buffer by decompressing the full object
// at which the cursor is left after the iteration
if base_buffer_size.is_none() {
let base_entry = cursor;
debug_assert!(!base_entry.header.is_delta());
object_kind = base_entry.header.as_kind();
self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
}
(first_buffer_size, second_buffer_end)
};
// From oldest to most recent, apply all deltas, swapping the buffer back and forth
// TODO: once we have more tests, we could optimize this memory-intensive work to
// analyse the delta-chains to only copy data once - after all, with 'copy-from-base' deltas,
// all data originates from one base at some point.
// `out` is: [source-buffer][target-buffer][max-delta-instructions-buffer]
let (buffers, instructions) = out.split_at_mut(second_buffer_end);
let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
let mut last_result_size = None;
for (
delta_idx,
Delta {
data,
base_size,
result_size,
..
},
) in chain.into_iter().rev().enumerate()
{
let data = &mut instructions[data];
if delta_idx + 1 == chain_len {
last_result_size = Some(result_size);
}
delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
// use the target as source for the next delta
std::mem::swap(&mut source_buf, &mut target_buf);
}
let last_result_size = last_result_size.expect("at least one delta chain item");
// uneven chains leave the target buffer after the source buffer
// FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here
// Instead we could use a different start buffer, to naturally end up with the result in the
// right one.
// However, this is a bit more complicated than just that - you have to deal with the base
// object, which should also be placed in the second buffer right away. You don't have that
// control/knowledge for out-of-pack bases, so this is a special case to deal with, too.
// Maybe these invariants can be represented in the type system though.
if chain_len % 2 == 1 {
// this seems inverted, but remember: we swapped the buffers on the last iteration
target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
}
out.resize(last_result_size, 0);
let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
let consumed_input = consumed_input.expect("at least one decompressed delta object");
cache.put(
self.id,
first_entry.data_offset,
out.as_slice(),
object_kind,
consumed_input,
);
Ok(Outcome {
kind: object_kind,
// technically depending on the cache, the chain size is not correct as it might
// have been cut short by a cache hit. The caller must deactivate the cache to get
// actual results
num_deltas: chain_len as u32,
decompressed_size: first_entry.decompressed_size,
compressed_size: consumed_input,
object_size: last_result_size as u64,
})
}
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
pub fn decode_header(
&self,
mut entry: data::Entry,
resolve: impl Fn(&git_hash::oid) -> Option<ResolvedBase>,
) -> Result<Outcome, Error> {
use crate::data::entry::Header::*;
let mut num_deltas = 0;
let mut first_delta_decompressed_size = None::<u64>;
loop {
match entry.header {
Tree | Blob | Commit | Tag => {
return Ok(Outcome {
kind: entry.header.as_kind().expect("always valid for non-refs"),
object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
num_deltas,
});
}
OfsDelta { base_distance } => {
num_deltas += 1;
if first_delta_decompressed_size.is_none() {
first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
}
entry = self.entry(entry.base_pack_offset(base_distance))
}
RefDelta { base_id } => {
num_deltas += 1;
if first_delta_decompressed_size.is_none() {
first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
}
match resolve(base_id.as_ref()) {
Some(ResolvedBase::InPack(base_entry)) => entry = base_entry,
Some(ResolvedBase::OutOfPack {
kind,
num_deltas: origin_num_deltas,
}) => {
return Ok(Outcome {
kind,
object_size: first_delta_decompressed_size.unwrap_or(entry.decompressed_size),
num_deltas: origin_num_deltas.unwrap_or_default() + num_deltas,
})
}
None => return Err(Error::DeltaBaseUnresolved(base_id)),
}
}
};
}
}
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
pub fn traverse_with_index<P, Processor, E>(
&self,
pack: &crate::data::File,
new_processor: impl Fn() -> Processor + Send + Clone,
mut progress: P,
should_interrupt: &AtomicBool,
Options { check, thread_limit }: Options,
) -> Result<Outcome<P>, Error<E>>
where
P: Progress,
Processor: FnMut(
git_object::Kind,
&[u8],
&index::Entry,
&mut <P::SubProgress as Progress>::SubProgress,
) -> Result<(), E>,
E: std::error::Error + Send + Sync + 'static,
{
let (verify_result, traversal_result) = parallel::join(
{
let pack_progress = progress.add_child_with_id(
format!(
"Hash of pack '{}'",
pack.path().file_name().expect("pack has filename").to_string_lossy()
),
*b"PTHP", /* Pack Traverse Hash Pack bytes */
);
let index_progress = progress.add_child_with_id(
format!(
"Hash of index '{}'",
self.path.file_name().expect("index has filename").to_string_lossy()
),
*b"PTHI", /* Pack Traverse Hash Index bytes */
);
move || {
let res = self.possibly_verify(pack, check, pack_progress, index_progress, should_interrupt);
if res.is_err() {
should_interrupt.store(true, Ordering::SeqCst);
}
res
}
},
|| -> Result<_, Error<_>> {
let sorted_entries = index_entries_sorted_by_offset_ascending(
self,
progress.add_child_with_id("collecting sorted index", *b"PTCE"),
); /* Pack Traverse Collect sorted Entries */
let tree = crate::cache::delta::Tree::from_offsets_in_pack(
pack.path(),
sorted_entries.into_iter().map(Entry::from),
|e| e.index_entry.pack_offset,
|id| self.lookup(id).map(|idx| self.pack_offset_at_index(idx)),
progress.add_child_with_id("indexing", *b"PTDI"), /* Pack Traverse Delta Index creation */
should_interrupt,
self.object_hash,
)?;
let mut outcome = digest_statistics(tree.traverse(
|slice, out| pack.entry_slice(slice).map(|entry| out.copy_from_slice(entry)),
pack.pack_end() as u64,
new_processor,
|data,
progress,
traverse::Context {
entry: pack_entry,
entry_end,
decompressed: bytes,
state: ref mut processor,
level,
}| {
let object_kind = pack_entry.header.as_kind().expect("non-delta object");
data.level = level;
data.decompressed_size = pack_entry.decompressed_size;
data.object_kind = object_kind;
data.compressed_size = entry_end - pack_entry.data_offset;
data.object_size = bytes.len() as u64;
let result = crate::index::traverse::process_entry(
check,
object_kind,
bytes,
progress,
&data.index_entry,
|| {
// TODO: Fix this - we overwrite the header of 'data' which also changes the computed entry size,
// causing index and pack to seemingly mismatch. This is surprising, and should be done differently.
// debug_assert_eq!(&data.index_entry.pack_offset, &pack_entry.pack_offset());
git_features::hash::crc32(
pack.entry_slice(data.index_entry.pack_offset..entry_end)
.expect("slice pointing into the pack (by now data is verified)"),
)
},
processor,
);
match result {
Err(err @ Error::PackDecode { .. }) if !check.fatal_decode_error() => {
progress.info(format!("Ignoring decode error: {}", err));
Ok(())
}
res => res,
}
},
crate::cache::delta::traverse::Options {
object_progress: progress.add_child_with_id("Resolving", *b"PTRO"), /* Pack Traverse Resolve Objects */
size_progress: progress.add_child_with_id("Decoding", *b"PTDB"), /* Pack Traverse Decode Bytes */
thread_limit,
should_interrupt,
object_hash: self.object_hash,
},
)?);
outcome.pack_size = pack.data_len() as u64;
Ok(outcome)
},
);
Ok(Outcome {
actual_index_checksum: verify_result?,
statistics: traversal_result?,
progress,
})
}
sourcepub fn as_type_id(&self) -> u8
pub fn as_type_id(&self) -> u8
Convert this header’s object kind into the packs internal representation
Examples found in repository?
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
pub fn write_to(&self, decompressed_size_in_bytes: u64, mut out: impl io::Write) -> io::Result<usize> {
let mut size = decompressed_size_in_bytes;
let mut written = 1;
let mut c: u8 = (self.as_type_id() << 4) | (size as u8 & 0b0000_1111);
size >>= 4;
while size != 0 {
out.write_all(&[c | 0b1000_0000])?;
written += 1;
c = size as u8 & 0b0111_1111;
size >>= 7;
}
out.write_all(&[c])?;
use Header::*;
match self {
RefDelta { base_id: oid } => {
out.write_all(oid.as_slice())?;
written += oid.as_slice().len();
}
OfsDelta { base_distance } => {
let mut buf = [0u8; 10];
let buf = leb64_encode(*base_distance, &mut buf);
out.write_all(buf)?;
written += buf.len();
}
Blob | Tree | Commit | Tag => {}
}
Ok(written)
}
sourcepub fn is_delta(&self) -> bool
pub fn is_delta(&self) -> bool
Return’s true if this is a delta object, i.e. not a full object.
Examples found in repository?
More examples
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
fn resolve_deltas(
&self,
last: data::Entry,
resolve: impl Fn(&git_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
out: &mut Vec<u8>,
cache: &mut impl cache::DecodeEntry,
) -> Result<Outcome, Error> {
// all deltas, from the one that produces the desired object (first) to the oldest at the end of the chain
let mut chain = SmallVec::<[Delta; 10]>::default();
let first_entry = last.clone();
let mut cursor = last;
let mut base_buffer_size: Option<usize> = None;
let mut object_kind: Option<git_object::Kind> = None;
let mut consumed_input: Option<usize> = None;
// Find the first full base, either an undeltified object in the pack or a reference to another object.
let mut total_delta_data_size: u64 = 0;
while cursor.header.is_delta() {
if let Some((kind, packed_size)) = cache.get(self.id, cursor.data_offset, out) {
base_buffer_size = Some(out.len());
object_kind = Some(kind);
// If the input entry is a cache hit, keep the packed size as it must be returned.
// Otherwise, the packed size will be determined later when decompressing the input delta
if total_delta_data_size == 0 {
consumed_input = Some(packed_size);
}
break;
}
total_delta_data_size += cursor.decompressed_size;
let decompressed_size = cursor
.decompressed_size
.try_into()
.expect("a single delta size small enough to fit a usize");
chain.push(Delta {
data: Range {
start: 0,
end: decompressed_size,
},
base_size: 0,
result_size: 0,
decompressed_size,
data_offset: cursor.data_offset,
});
use crate::data::entry::Header;
cursor = match cursor.header {
Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)),
Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) {
Some(ResolvedBase::InPack(entry)) => entry,
Some(ResolvedBase::OutOfPack { end, kind }) => {
base_buffer_size = Some(end);
object_kind = Some(kind);
break;
}
None => return Err(Error::DeltaBaseUnresolved(base_id)),
},
_ => unreachable!("cursor.is_delta() only allows deltas here"),
};
}
// This can happen if the cache held the first entry itself
// We will just treat it as an object then, even though it's technically incorrect.
if chain.is_empty() {
return Ok(Outcome::from_object_entry(
object_kind.expect("object kind as set by cache"),
&first_entry,
consumed_input.expect("consumed bytes as set by cache"),
));
};
// First pass will decompress all delta data and keep it in our output buffer
// [<possibly resolved base object>]<delta-1..delta-n>...
// so that we can find the biggest result size.
let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
let chain_len = chain.len();
let (first_buffer_end, second_buffer_end) = {
let delta_start = base_buffer_size.unwrap_or(0);
out.resize(delta_start + total_delta_data_size, 0);
let delta_range = Range {
start: delta_start,
end: delta_start + total_delta_data_size,
};
let mut instructions = &mut out[delta_range.clone()];
let mut relative_delta_start = 0;
let mut biggest_result_size = 0;
for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
let consumed_from_data_offset = self.decompress_entry_from_data_offset(
delta.data_offset,
&mut instructions[..delta.decompressed_size],
)?;
let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
if is_last_delta_to_be_applied {
consumed_input = Some(consumed_from_data_offset);
}
let (base_size, offset) = delta::decode_header_size(instructions);
let mut bytes_consumed_by_header = offset;
biggest_result_size = biggest_result_size.max(base_size);
delta.base_size = base_size.try_into().expect("base size fits into usize");
let (result_size, offset) = delta::decode_header_size(&instructions[offset..]);
bytes_consumed_by_header += offset;
biggest_result_size = biggest_result_size.max(result_size);
delta.result_size = result_size.try_into().expect("result size fits into usize");
// the absolute location into the instructions buffer, so we keep track of the end point of the last
delta.data.start = relative_delta_start + bytes_consumed_by_header;
relative_delta_start += delta.decompressed_size;
delta.data.end = relative_delta_start;
instructions = &mut instructions[delta.decompressed_size..];
}
// Now we can produce a buffer like this
// [<biggest-result-buffer, possibly filled with resolved base object data>]<biggest-result-buffer><delta-1..delta-n>
// from [<possibly resolved base object>]<delta-1..delta-n>...
let biggest_result_size: usize = biggest_result_size
.try_into()
.expect("biggest result size small enough to fit into usize");
let first_buffer_size = biggest_result_size;
let second_buffer_size = first_buffer_size;
out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0);
// Now 'rescue' the deltas, because in the next step we possibly overwrite that portion
// of memory with the base object (in the majority of cases)
let second_buffer_end = {
let end = first_buffer_size + second_buffer_size;
if delta_range.start < end {
// …this means that the delta size is even larger than two uncompressed worst-case
// intermediate results combined. It would already be undesirable to have it bigger
// then the target size (as you could just store the object in whole).
// However, this just means that it reuses existing deltas smartly, which as we rightfully
// remember stand for an object each. However, this means a lot of data is read to restore
// a single object sometimes. Fair enough - package size is minimized that way.
out.copy_within(delta_range, end);
} else {
let (buffers, instructions) = out.split_at_mut(end);
instructions.copy_from_slice(&buffers[delta_range]);
}
end
};
// If we don't have a out-of-pack object already, fill the base-buffer by decompressing the full object
// at which the cursor is left after the iteration
if base_buffer_size.is_none() {
let base_entry = cursor;
debug_assert!(!base_entry.header.is_delta());
object_kind = base_entry.header.as_kind();
self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
}
(first_buffer_size, second_buffer_end)
};
// From oldest to most recent, apply all deltas, swapping the buffer back and forth
// TODO: once we have more tests, we could optimize this memory-intensive work to
// analyse the delta-chains to only copy data once - after all, with 'copy-from-base' deltas,
// all data originates from one base at some point.
// `out` is: [source-buffer][target-buffer][max-delta-instructions-buffer]
let (buffers, instructions) = out.split_at_mut(second_buffer_end);
let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
let mut last_result_size = None;
for (
delta_idx,
Delta {
data,
base_size,
result_size,
..
},
) in chain.into_iter().rev().enumerate()
{
let data = &mut instructions[data];
if delta_idx + 1 == chain_len {
last_result_size = Some(result_size);
}
delta::apply(&source_buf[..base_size], &mut target_buf[..result_size], data);
// use the target as source for the next delta
std::mem::swap(&mut source_buf, &mut target_buf);
}
let last_result_size = last_result_size.expect("at least one delta chain item");
// uneven chains leave the target buffer after the source buffer
// FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here
// Instead we could use a different start buffer, to naturally end up with the result in the
// right one.
// However, this is a bit more complicated than just that - you have to deal with the base
// object, which should also be placed in the second buffer right away. You don't have that
// control/knowledge for out-of-pack bases, so this is a special case to deal with, too.
// Maybe these invariants can be represented in the type system though.
if chain_len % 2 == 1 {
// this seems inverted, but remember: we swapped the buffers on the last iteration
target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
}
out.resize(last_result_size, 0);
let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
let consumed_input = consumed_input.expect("at least one decompressed delta object");
cache.put(
self.id,
first_entry.data_offset,
out.as_slice(),
object_kind,
consumed_input,
);
Ok(Outcome {
kind: object_kind,
// technically depending on the cache, the chain size is not correct as it might
// have been cut short by a cache hit. The caller must deactivate the cache to get
// actual results
num_deltas: chain_len as u32,
decompressed_size: first_entry.decompressed_size,
compressed_size: consumed_input,
object_size: last_result_size as u64,
})
}
source§impl Header
impl Header
sourcepub fn write_to(
&self,
decompressed_size_in_bytes: u64,
out: impl Write
) -> Result<usize>
pub fn write_to(
&self,
decompressed_size_in_bytes: u64,
out: impl Write
) -> Result<usize>
Encode this header along the given decompressed_size_in_bytes
into the out
write stream for use within a data pack.
Returns the amount of bytes written to out
.
decompressed_size_in_bytes
is the full size in bytes of the object that this header represents
Examples found in repository?
More examples
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
fn next_inner(&mut self, entry: input::Entry) -> Result<input::Entry, input::Error> {
if self.num_entries == 0 {
let header_bytes = crate::data::header::encode(self.data_version, 0);
self.output.write_all(&header_bytes[..])?;
}
self.num_entries += 1;
entry.header.write_to(entry.decompressed_size, &mut self.output)?;
std::io::copy(
&mut entry
.compressed
.as_deref()
.expect("caller must configure generator to keep compressed bytes"),
&mut self.output,
)?;
Ok(entry)
}
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
fn next_inner(&mut self) -> Result<u64, Error<E>> {
let previous_written = self.written;
if let Some((version, num_entries)) = self.header_info.take() {
let header_bytes = crate::data::header::encode(version, num_entries);
self.output.write_all(&header_bytes[..])?;
self.written += header_bytes.len() as u64;
}
match self.input.next() {
Some(entries) => {
for entry in entries.map_err(Error::Input)? {
if entry.is_invalid() {
self.pack_offsets_and_validity.push((0, false));
continue;
};
self.pack_offsets_and_validity.push((self.written, true));
let header = entry.to_entry_header(self.entry_version, |index| {
let (base_offset, is_valid_object) = self.pack_offsets_and_validity[index];
if !is_valid_object {
unreachable!("if you see this the object database is correct as a delta refers to a non-existing object")
}
self.written - base_offset
});
self.written += header.write_to(entry.decompressed_size as u64, &mut self.output)? as u64;
self.written += std::io::copy(&mut &*entry.compressed_data, &mut self.output)?;
}
}
None => {
let digest = self.output.hash.clone().digest();
self.output.inner.write_all(&digest[..])?;
self.written += digest.len() as u64;
self.output.inner.flush()?;
self.is_done = true;
self.trailer = Some(git_hash::ObjectId::from(digest));
}
};
Ok(self.written - previous_written)
}
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
fn next_inner(&mut self) -> Result<input::Entry, input::Error> {
self.objects_left -= 1; // even an error counts as objects
// Read header
let entry = match self.hash.take() {
Some(hash) => {
let mut read = read_and_pass_to(
&mut self.read,
hash::Write {
inner: io::sink(),
hash,
},
);
let res = crate::data::Entry::from_read(&mut read, self.offset, self.hash_len);
self.hash = Some(read.write.hash);
res
}
None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len),
}
.map_err(input::Error::from)?;
// Decompress object to learn its compressed bytes
let mut decompressor = self
.decompressor
.take()
.unwrap_or_else(|| Box::new(Decompress::new(true)));
let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096));
decompressor.reset(true);
let mut decompressed_reader = ReadBoxed {
inner: read_and_pass_to(
&mut self.read,
if self.compressed.keep() {
Vec::with_capacity(entry.decompressed_size as usize)
} else {
compressed_buf
},
),
decompressor,
};
let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?;
if bytes_copied != entry.decompressed_size {
return Err(input::Error::IncompletePack {
actual: bytes_copied,
expected: entry.decompressed_size,
});
}
let pack_offset = self.offset;
let compressed_size = decompressed_reader.decompressor.total_in();
self.offset += entry.header_size() as u64 + compressed_size;
self.decompressor = Some(decompressed_reader.decompressor);
let mut compressed = decompressed_reader.inner.write;
debug_assert_eq!(
compressed_size,
compressed.len() as u64,
"we must track exactly the same amount of bytes as read by the decompressor"
);
if let Some(hash) = self.hash.as_mut() {
hash.update(&compressed);
}
let crc32 = if self.compressed.crc32() {
let mut header_buf = [0u8; 12 + git_hash::Kind::longest().len_in_bytes()];
let header_len = entry.header.write_to(bytes_copied, header_buf.as_mut())?;
let state = git_features::hash::crc32_update(0, &header_buf[..header_len]);
Some(git_features::hash::crc32_update(state, &compressed))
} else {
None
};
let compressed = if self.compressed.keep() {
Some(compressed)
} else {
compressed.clear();
self.compressed_buf = Some(compressed);
None
};
// Last objects gets trailer (which is potentially verified)
let trailer = self.try_read_trailer()?;
Ok(input::Entry {
header: entry.header,
header_size: entry.header_size() as u16,
compressed,
compressed_size,
crc32,
pack_offset,
decompressed_size: bytes_copied,
trailer,
})
}
sourcepub fn size(&self, decompressed_size: u64) -> usize
pub fn size(&self, decompressed_size: u64) -> usize
The size of the header in bytes when serialized
Examples found in repository?
More examples
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
pub fn from_data_obj(obj: &git_object::Data<'_>, pack_offset: u64) -> Result<Self, input::Error> {
let header = to_header(obj.kind);
let compressed = compress_data(obj)?;
let compressed_size = compressed.len() as u64;
let mut entry = input::Entry {
header,
header_size: header.size(obj.data.len() as u64) as u16,
pack_offset,
compressed: Some(compressed),
compressed_size,
crc32: None,
decompressed_size: obj.data.len() as u64,
trailer: None,
};
entry.crc32 = Some(entry.compute_crc32());
Ok(entry)
}
Trait Implementations§
source§impl<'de> Deserialize<'de> for Header
impl<'de> Deserialize<'de> for Header
source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
source§impl Ord for Header
impl Ord for Header
source§impl PartialEq<Header> for Header
impl PartialEq<Header> for Header
source§impl PartialOrd<Header> for Header
impl PartialOrd<Header> for Header
1.0.0 · source§fn le(&self, other: &Rhs) -> bool
fn le(&self, other: &Rhs) -> bool
self
and other
) and is used by the <=
operator. Read more