use crate::{
pack::cache,
pack::{Entry, Error, File},
zlib::Inflate,
};
use git_object as object;
use smallvec::SmallVec;
use std::{convert::TryInto, io, ops::Range};
#[derive(Debug)]
struct Delta {
data: Range<usize>,
base_size: usize,
result_size: usize,
decompressed_size: usize,
data_offset: u64,
}
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub enum ResolvedBase {
InPack(Entry),
OutOfPack { kind: object::Kind, end: usize },
}
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
pub struct DecodeEntryResult {
pub kind: object::Kind,
pub num_deltas: u32,
pub decompressed_size: u64,
pub compressed_size: usize,
pub object_size: u64,
}
impl DecodeEntryResult {
pub fn default_from_kind(kind: object::Kind) -> Self {
Self {
kind,
num_deltas: 0,
decompressed_size: 0,
compressed_size: 0,
object_size: 0,
}
}
fn from_object_entry(kind: object::Kind, entry: &Entry, compressed_size: usize) -> Self {
Self {
kind,
num_deltas: 0,
decompressed_size: entry.decompressed_size,
compressed_size,
object_size: entry.decompressed_size,
}
}
}
impl File {
pub fn decompress_entry(&self, entry: &Entry, out: &mut [u8]) -> Result<usize, Error> {
assert!(
out.len() as u64 >= entry.decompressed_size,
"output buffer isn't large enough to hold decompressed result, want {}, have {}",
entry.decompressed_size,
out.len()
);
self.decompress_entry_from_data_offset(entry.data_offset, out)
}
fn decompress_entry_from_data_offset(&self, data_offset: u64, out: &mut [u8]) -> Result<usize, Error> {
let offset: usize = data_offset.try_into().expect("offset representable by machine");
assert!(offset < self.data.len(), "entry offset out of bounds");
Inflate::default()
.once(&self.data[offset..], &mut std::io::Cursor::new(out), true)
.map_err(|e| Error::ZlibInflate(e, "Failed to decompress pack entry"))
.map(|(_, consumed_in, _)| consumed_in)
}
pub fn decode_entry(
&self,
entry: Entry,
out: &mut Vec<u8>,
resolve: impl Fn(&object::Id, &mut Vec<u8>) -> Option<ResolvedBase>,
cache: &mut impl cache::DecodeEntry,
) -> Result<DecodeEntryResult, Error> {
use crate::pack::decoded::Header::*;
match entry.header {
Tree | Blob | Commit | Tag => {
out.resize(
entry
.decompressed_size
.try_into()
.expect("size representable by machine"),
0,
);
self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| {
DecodeEntryResult::from_object_entry(
entry.header.to_kind().expect("a non-delta entry"),
&entry,
consumed_input,
)
})
}
OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, cache),
}
}
fn resolve_deltas(
&self,
last: Entry,
resolve: impl Fn(&object::Id, &mut Vec<u8>) -> Option<ResolvedBase>,
out: &mut Vec<u8>,
cache: &mut impl cache::DecodeEntry,
) -> Result<DecodeEntryResult, Error> {
use crate::pack::decoded::Header;
let mut chain = SmallVec::<[Delta; 10]>::default();
let first_entry = last.clone();
let mut cursor = last;
let mut base_buffer_size: Option<usize> = None;
let mut object_kind: Option<object::Kind> = None;
let mut consumed_input: Option<usize> = None;
let mut total_delta_data_size: u64 = 0;
while cursor.header.is_delta() {
if let Some((kind, packed_size)) = cache.get(cursor.data_offset, out) {
base_buffer_size = Some(out.len());
object_kind = Some(kind);
if total_delta_data_size == 0 {
consumed_input = Some(packed_size);
}
break;
}
total_delta_data_size += cursor.decompressed_size;
let decompressed_size = cursor
.decompressed_size
.try_into()
.expect("a single delta size small enough to fit a usize");
chain.push(Delta {
data: Range {
start: 0,
end: decompressed_size,
},
base_size: 0,
result_size: 0,
decompressed_size,
data_offset: cursor.data_offset,
});
cursor = match cursor.header {
Header::OfsDelta { pack_offset } => self.entry(pack_offset),
Header::RefDelta { oid } => match resolve(&oid, out) {
Some(ResolvedBase::InPack(entry)) => entry,
Some(ResolvedBase::OutOfPack { end, kind }) => {
base_buffer_size = Some(end);
object_kind = Some(kind);
break;
}
None => return Err(Error::DeltaBaseUnresolved(oid)),
},
_ => unreachable!("cursor.is_delta() only allows deltas here"),
};
}
if chain.is_empty() {
return Ok(DecodeEntryResult::from_object_entry(
object_kind.expect("object kind as set by cache"),
&first_entry,
consumed_input.expect("consumed bytes as set by cache"),
));
};
let total_delta_data_size: usize = total_delta_data_size.try_into().expect("delta data to fit in memory");
let chain_len = chain.len();
let (first_buffer_end, second_buffer_end) = {
let delta_start = base_buffer_size.unwrap_or(0);
out.resize(delta_start + total_delta_data_size, 0);
let delta_range = Range {
start: delta_start,
end: delta_start + total_delta_data_size,
};
let mut instructions = &mut out[delta_range.clone()];
let mut relative_delta_start = 0;
let mut biggest_result_size = 0;
for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
let consumed_from_data_offset = self.decompress_entry_from_data_offset(
delta.data_offset,
&mut instructions[..delta.decompressed_size],
)?;
if delta_idx + 1 == chain_len {
consumed_input = Some(consumed_from_data_offset);
}
let (base_size, offset) = delta_header_size_ofs(instructions);
let mut bytes_consumed_by_header = offset;
biggest_result_size = biggest_result_size.max(base_size);
delta.base_size = base_size.try_into().expect("base size fits into usize");
let (result_size, offset) = delta_header_size_ofs(&instructions[offset..]);
bytes_consumed_by_header += offset;
biggest_result_size = biggest_result_size.max(result_size);
delta.result_size = result_size.try_into().expect("result size fits into usize");
delta.data.start = relative_delta_start + bytes_consumed_by_header;
relative_delta_start += delta.decompressed_size;
delta.data.end = relative_delta_start;
instructions = &mut instructions[delta.decompressed_size..];
}
let biggest_result_size: usize = biggest_result_size
.try_into()
.expect("biggest result size small enough to fit into usize");
let first_buffer_size = biggest_result_size;
let second_buffer_size = first_buffer_size;
out.resize(first_buffer_size + second_buffer_size + total_delta_data_size, 0);
let second_buffer_end = {
let end = first_buffer_size + second_buffer_size;
if delta_range.start < end {
out.copy_within(delta_range, end);
} else {
let (buffers, instructions) = out.split_at_mut(end);
instructions.copy_from_slice(&buffers[delta_range]);
}
end
};
if base_buffer_size.is_none() {
let base_entry = cursor;
debug_assert!(!base_entry.header.is_delta());
object_kind = base_entry.header.to_kind();
let packed_size = self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
cache.put(
base_entry.data_offset,
&out[..base_entry
.decompressed_size
.try_into()
.expect("successful decompression should make this successful too")],
object_kind.expect("non-delta object"),
packed_size,
);
}
(first_buffer_size, second_buffer_end)
};
let (buffers, instructions) = out.split_at_mut(second_buffer_end);
let (mut source_buf, mut target_buf) = buffers.split_at_mut(first_buffer_end);
let mut last_result_size = None;
for (
delta_idx,
Delta {
data,
base_size,
result_size,
..
},
) in chain.iter().rev().enumerate()
{
let data = &mut instructions[data.clone()];
if delta_idx + 1 == chain_len {
last_result_size = Some(*result_size);
}
apply_delta(&source_buf[..*base_size], &mut target_buf[..*result_size], data);
std::mem::swap(&mut source_buf, &mut target_buf);
}
let last_result_size = last_result_size.expect("at least one delta chain item");
if chain.len() % 2 == 1 {
target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);
}
out.resize(last_result_size, 0);
let object_kind = object_kind.expect("a base object as root of any delta chain that we are here to resolve");
let consumed_input = consumed_input.expect("at least one decompressed delta object");
cache.put(first_entry.data_offset, out.as_slice(), object_kind, consumed_input);
Ok(DecodeEntryResult {
kind: object_kind,
num_deltas: chain.len() as u32,
decompressed_size: first_entry.decompressed_size as u64,
compressed_size: consumed_input,
object_size: last_result_size as u64,
})
}
}
fn apply_delta(base: &[u8], mut target: &mut [u8], data: &[u8]) {
let mut i = 0;
while let Some(cmd) = data.get(i) {
i += 1;
match cmd {
cmd if cmd & 0b1000_0000 != 0 => {
let (mut ofs, mut size): (u32, u32) = (0, 0);
if cmd & 0b0000_0001 != 0 {
ofs = data[i] as u32;
i += 1;
}
if cmd & 0b0000_0010 != 0 {
ofs |= (data[i] as u32) << 8;
i += 1;
}
if cmd & 0b0000_0100 != 0 {
ofs |= (data[i] as u32) << 16;
i += 1;
}
if cmd & 0b0000_1000 != 0 {
ofs |= (data[i] as u32) << 24;
i += 1;
}
if cmd & 0b0001_0000 != 0 {
size = data[i] as u32;
i += 1;
}
if cmd & 0b0010_0000 != 0 {
size |= (data[i] as u32) << 8;
i += 1;
}
if cmd & 0b0100_0000 != 0 {
size |= (data[i] as u32) << 16;
i += 1;
}
if size == 0 {
size = 0x10000;
}
let ofs = ofs as usize;
io::Write::write(&mut target, &base[ofs..ofs + size as usize])
.expect("delta copy from base: byte slices must match");
}
0 => panic!("encountered unsupported command code: 0"),
size => {
io::Write::write(&mut target, &data[i..i + *size as usize])
.expect("delta copy data: slice sizes to match up");
i += *size as usize;
}
}
}
assert_eq!(i, data.len());
assert_eq!(target.len(), 0);
}
fn delta_header_size_ofs(d: &[u8]) -> (u64, usize) {
let mut i = 0;
let mut size = 0u64;
let mut consumed = 0;
for cmd in d.iter() {
consumed += 1;
size |= (*cmd as u64 & 0x7f) << i;
i += 7;
if *cmd & 0x80 == 0 {
break;
}
}
(size, consumed)
}