1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
//! A low-level Git packfile builder.
//!
//! This implementation requires the caller to push directories to the packfile manually, in the
//! order that Git expects.
use std::{
convert::TryInto,
fmt::{Display, Formatter, Write},
io::Write as IoWrite,
};
use bytes::{BufMut, Bytes, BytesMut};
use flate2::{write::ZlibEncoder, Compression};
use sha1::Digest;
use tracing::instrument;
use crate::{util::ArcOrCowStr, Error};
pub type HashOutput = [u8; 20];
// The packfile itself is a very simple format. There is a header, a
// series of packed objects (each with it's own header and body) and
// then a checksum trailer. The first four bytes is the string 'PACK',
// which is sort of used to make sure you're getting the start of the
// packfile correctly. This is followed by a 4-byte packfile version
// number and then a 4-byte number of entries in that file.
pub struct PackFile<'a> {
entries: &'a [PackFileEntry],
}
impl<'a> PackFile<'a> {
#[must_use]
pub fn new(entries: &'a [PackFileEntry]) -> Self {
Self { entries }
}
#[must_use]
pub const fn header_size() -> usize {
"PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
}
#[must_use]
pub const fn footer_size() -> usize {
20
}
#[instrument(skip(self, original_buf), err)]
pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), Error> {
let mut buf = original_buf.split_off(original_buf.len());
buf.reserve(Self::header_size() + Self::footer_size());
// header
buf.extend_from_slice(b"PACK"); // magic header
buf.put_u32(2); // version
buf.put_u32(
self.entries
.len()
.try_into()
.map_err(Error::EntriesExceedsU32)?,
); // number of entries in the packfile
// body
for entry in self.entries {
entry.encode_to(&mut buf)?;
}
// footer
buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
original_buf.unsplit(buf);
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Commit {
pub tree: HashOutput,
// pub parent: [u8; 20],
pub author: CommitUserInfo,
pub committer: CommitUserInfo,
// pub gpgsig: &str,
pub message: &'static str,
}
impl Commit {
#[instrument(skip(self, out), err)]
fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
let mut tree_hex = [0_u8; 20 * 2];
hex::encode_to_slice(self.tree, &mut tree_hex).map_err(Error::EncodeTreeHash)?;
out.write_str("tree ")?;
out.extend_from_slice(&tree_hex);
out.write_char('\n')?;
writeln!(out, "author {}", self.author)?;
writeln!(out, "committer {}", self.committer)?;
write!(out, "\n{}", self.message)?;
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
let mut len = 0;
len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
len += "author ".len() + self.author.size() + "\n".len();
len += "committer ".len() + self.committer.size() + "\n".len();
len += "\n".len() + self.message.len();
len
}
}
#[derive(Clone, Copy, Debug)]
pub struct CommitUserInfo {
pub name: &'static str,
pub email: &'static str,
pub time: time::OffsetDateTime,
}
impl Display for CommitUserInfo {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{} <{}> {} +0000",
self.name,
self.email,
self.time.unix_timestamp()
)
}
}
impl CommitUserInfo {
#[must_use]
pub fn size(&self) -> usize {
let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len();
self.name.len()
+ "< ".len()
+ self.email.len()
+ "> ".len()
+ timestamp_len
+ " +0000".len()
}
}
#[derive(Debug, Copy, Clone)]
pub enum TreeItemKind {
File,
Directory,
}
impl TreeItemKind {
#[must_use]
pub const fn mode(&self) -> &'static str {
match self {
Self::File => "100644",
Self::Directory => "40000",
}
}
}
#[derive(Debug)]
pub struct TreeItem {
pub kind: TreeItemKind,
pub name: ArcOrCowStr,
pub hash: HashOutput,
pub sort_name: String,
}
// `[mode] [name]\0[hash]`
impl TreeItem {
#[instrument(skip(self, out), err)]
fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
out.write_str(self.kind.mode())?;
write!(out, " {}\0", self.name)?;
out.extend_from_slice(&self.hash);
Ok(())
}
#[must_use]
pub fn size(&self) -> usize {
self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
}
}
#[derive(Debug)] // could be copy but Vec<TreeItem<'a>>
pub enum PackFileEntry {
// jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
// commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
// parent c2a862612a14346ae95234f26efae1ee69b5b7a9
// author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
// committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
// gpgsig -----BEGIN PGP SIGNATURE-----
//
// iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt
// xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2
// tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ
// tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6
// omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX
// fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E
// UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO
// FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G
// hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG
// QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE
// Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf
// bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y=
// =fXoH
// -----END PGP SIGNATURE-----
//
// test
Commit(Commit),
// jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
// tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
// kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
Tree(Vec<TreeItem>),
// jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
// blob 23try and find me in .git
Blob(Bytes),
// Tag,
// OfsDelta,
// RefDelta,
}
impl PackFileEntry {
#[instrument(skip(self, buf))]
fn write_header(&self, buf: &mut BytesMut) {
let mut size = self.uncompressed_size();
// write header
{
let mut val = 0b1000_0000_u8;
val |= match self {
Self::Commit(_) => 0b001,
Self::Tree(_) => 0b010,
Self::Blob(_) => 0b011,
// Self::Tag => 0b100,
// Self::OfsDelta => 0b110,
// Self::RefDelta => 0b111,
} << 4;
// pack the 4 LSBs of the size into the header
#[allow(clippy::cast_possible_truncation)] // value is masked
{
val |= (size & 0b1111) as u8;
}
size >>= 4;
buf.put_u8(val);
}
// write size bytes
loop {
// read 7 LSBs from the `size` and push them off for the next iteration
#[allow(clippy::cast_possible_truncation)] // value is masked
let mut val = (size & 0b111_1111) as u8;
size >>= 7;
if size != 0 {
// MSB set to 1 implies there's more size bytes to come, otherwise
// the data starts after this byte
val |= 1 << 7;
}
buf.put_u8(val);
if size == 0 {
break;
}
}
}
#[instrument(skip(self, original_out), err)]
pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), Error> {
self.write_header(original_out); // TODO: this needs space reserving for it
// todo is there a way to stream through the zlibencoder so we don't have to
// have this intermediate bytesmut and vec?
let mut out = BytesMut::new();
let size = self.uncompressed_size();
original_out.reserve(size);
// the data ends up getting compressed but we'll need at least this many bytes
out.reserve(size);
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(data) => {
out.extend_from_slice(data);
}
}
debug_assert_eq!(out.len(), size);
let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
e.write_all(&out).map_err(Error::CompressWrite)?;
let compressed_data = e.finish().map_err(Error::Compress)?;
original_out.extend_from_slice(&compressed_data);
Ok(())
}
#[instrument(skip(self))]
#[must_use]
pub fn uncompressed_size(&self) -> usize {
match self {
Self::Commit(commit) => commit.size(),
Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
Self::Blob(data) => data.len(),
}
}
#[instrument(skip(self), err)]
pub fn hash(&self) -> Result<HashOutput, Error> {
let size = self.uncompressed_size();
let file_prefix = match self {
Self::Commit(_) => "commit",
Self::Tree(_) => "tree",
Self::Blob(_) => "blob",
};
let size_len = itoa::Buffer::new().format(size).len();
let mut out =
BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);
write!(out, "{file_prefix} {size}\0")?;
match self {
Self::Commit(commit) => {
commit.encode_to(&mut out)?;
}
Self::Tree(items) => {
for item in items {
item.encode_to(&mut out)?;
}
}
Self::Blob(blob) => {
out.extend_from_slice(blob);
}
}
Ok(sha1::Sha1::digest(&out).into())
}
}
#[cfg(test)]
mod test {
mod packfile_entry {
use crate::low_level::PackFileEntry;
use bytes::{Bytes, BytesMut};
#[test]
fn header_size_bytes_large() {
let entry = PackFileEntry::Blob(Bytes::from(vec![0u8; 16]));
let mut header = BytesMut::new();
entry.write_header(&mut header);
assert_eq!(header.to_vec(), &[0xb0, 0x01]);
}
#[test]
fn header_size_bytes_small() {
let entry = PackFileEntry::Blob(Bytes::from(vec![0u8; 15]));
let mut header = BytesMut::new();
entry.write_header(&mut header);
assert_eq!(header.to_vec(), &[0xbf, 0x00]);
}
}
}