packfile/
low_level.rs

1//! A low-level Git packfile builder.
2//!
3//! This implementation requires the caller to push directories to the packfile manually, in the
4//! order that Git expects.
5
6use std::{
7    convert::TryInto,
8    fmt::{Display, Formatter, Write},
9    io::Write as IoWrite,
10};
11
12use bytes::{BufMut, Bytes, BytesMut};
13use flate2::{write::ZlibEncoder, Compression};
14use sha1::Digest;
15
16use crate::{util::ArcOrCowStr, Error};
17
18pub type HashOutput = [u8; 20];
19
20// The packfile itself is a very simple format. There is a header, a
21// series of packed objects (each with it's own header and body) and
22// then a checksum trailer. The first four bytes is the string 'PACK',
23// which is sort of used to make sure you're getting the start of the
24// packfile correctly. This is followed by a 4-byte packfile version
25// number and then a 4-byte number of entries in that file.
26pub struct PackFile<'a> {
27    entries: &'a [PackFileEntry],
28}
29
30impl<'a> PackFile<'a> {
31    #[must_use]
32    pub fn new(entries: &'a [PackFileEntry]) -> Self {
33        Self { entries }
34    }
35
36    #[must_use]
37    pub const fn header_size() -> usize {
38        "PACK".len() + std::mem::size_of::<u32>() + std::mem::size_of::<u32>()
39    }
40
41    #[must_use]
42    pub const fn footer_size() -> usize {
43        20
44    }
45
46    #[cfg_attr(
47        feature = "tracing",
48        tracing::instrument(skip(self, original_buf), err)
49    )]
50    pub fn encode_to(&self, original_buf: &mut BytesMut) -> Result<(), Error> {
51        let mut buf = original_buf.split_off(original_buf.len());
52        buf.reserve(Self::header_size() + Self::footer_size());
53
54        // header
55        buf.extend_from_slice(b"PACK"); // magic header
56        buf.put_u32(2); // version
57        buf.put_u32(
58            self.entries
59                .len()
60                .try_into()
61                .map_err(Error::EntriesExceedsU32)?,
62        ); // number of entries in the packfile
63
64        // body
65        for entry in self.entries {
66            entry.encode_to(&mut buf)?;
67        }
68
69        // footer
70        buf.extend_from_slice(&sha1::Sha1::digest(&buf[..]));
71
72        original_buf.unsplit(buf);
73
74        Ok(())
75    }
76}
77
78#[derive(Debug, Clone)]
79pub struct Commit {
80    pub tree: HashOutput,
81    // pub parent: [u8; 20],
82    pub author: CommitUserInfo,
83    pub committer: CommitUserInfo,
84    // pub gpgsig: &str,
85    pub message: &'static str,
86}
87
88impl Commit {
89    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, out), err))]
90    fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
91        let mut tree_hex = [0_u8; 20 * 2];
92        hex::encode_to_slice(self.tree, &mut tree_hex).map_err(Error::EncodeTreeHash)?;
93
94        out.write_str("tree ")?;
95        out.extend_from_slice(&tree_hex);
96        out.write_char('\n')?;
97
98        writeln!(out, "author {}", self.author)?;
99        writeln!(out, "committer {}", self.committer)?;
100        write!(out, "\n{}", self.message)?;
101
102        Ok(())
103    }
104
105    #[must_use]
106    pub fn size(&self) -> usize {
107        let mut len = 0;
108        len += "tree ".len() + (self.tree.len() * 2) + "\n".len();
109        len += "author ".len() + self.author.size() + "\n".len();
110        len += "committer ".len() + self.committer.size() + "\n".len();
111        len += "\n".len() + self.message.len();
112        len
113    }
114}
115
116#[derive(Clone, Copy, Debug)]
117pub struct CommitUserInfo {
118    pub name: &'static str,
119    pub email: &'static str,
120    pub time: time::OffsetDateTime,
121}
122
123impl Display for CommitUserInfo {
124    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
125        write!(
126            f,
127            "{} <{}> {} +0000",
128            self.name,
129            self.email,
130            self.time.unix_timestamp()
131        )
132    }
133}
134
135impl CommitUserInfo {
136    #[must_use]
137    pub fn size(&self) -> usize {
138        let timestamp_len = itoa::Buffer::new().format(self.time.unix_timestamp()).len();
139
140        self.name.len()
141            + "< ".len()
142            + self.email.len()
143            + "> ".len()
144            + timestamp_len
145            + " +0000".len()
146    }
147}
148
149#[derive(Debug, Copy, Clone)]
150pub enum TreeItemKind {
151    File,
152    Directory,
153}
154
155impl TreeItemKind {
156    #[must_use]
157    pub const fn mode(&self) -> &'static str {
158        match self {
159            Self::File => "100644",
160            Self::Directory => "40000",
161        }
162    }
163}
164
165#[derive(Debug)]
166pub struct TreeItem {
167    pub kind: TreeItemKind,
168    pub name: ArcOrCowStr,
169    pub hash: HashOutput,
170    pub sort_name: String,
171}
172
173// `[mode] [name]\0[hash]`
174impl TreeItem {
175    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, out), err))]
176    fn encode_to(&self, out: &mut BytesMut) -> Result<(), Error> {
177        out.write_str(self.kind.mode())?;
178        write!(out, " {}\0", self.name)?;
179        out.extend_from_slice(&self.hash);
180        Ok(())
181    }
182
183    #[must_use]
184    pub fn size(&self) -> usize {
185        self.kind.mode().len() + " ".len() + self.name.len() + "\0".len() + self.hash.len()
186    }
187}
188
189#[derive(Debug)] // could be copy but Vec<TreeItem<'a>>
190pub enum PackFileEntry {
191    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3 | gzip -dc
192    // commit 1068tree 0d586b48bc42e8591773d3d8a7223551c39d453c
193    // parent c2a862612a14346ae95234f26efae1ee69b5b7a9
194    // author Jordan Doyle <jordan@doyle.la> 1630244577 +0100
195    // committer Jordan Doyle <jordan@doyle.la> 1630244577 +0100
196    // gpgsig -----BEGIN PGP SIGNATURE-----
197    //
198    // iQIzBAABCAAdFiEEMn1zof7yzaURQBGDHqa65vZtxJoFAmErjuEACgkQHqa65vZt
199    // xJqhvhAAieKXnGRjT926qzozcvarC8D3TlA+Z1wVXueTAWqfusNIP0zCun/crOb2
200    // tOULO+/DXVBmwu5eInAf+t/wvlnIsrzJonhVr1ZT0f0vDX6fs2vflWg4UCVEuTsZ
201    // tg+aTjcibwnmViIM9XVOzhU8Au2OIqMQLyQOMWSt8NhY0W2WhBCdQvhktvK1V8W6
202    // omPs04SrR39xWBDQaxsXYxq/1ZKUYXDwudvEfv14EvrxG1vWumpUVJd7Ib5w4gXX
203    // fYa95DxYL720ZaiWPIYEG8FMBzSOpo6lUzY9g2/o/wKwSQZJNvpaMGCuouy8Fb+E
204    // UaqC0XPxqpKG9duXPgCldUr+P7++48CF5zc358RBGz5OCNeTREsIQQo5PUO1k+wO
205    // FnGOQTT8vvNOrxBgb3QgKu67RVwWDc6JnQCNpUrhUJrXMDWnYLBqo4Y+CdKGSQ4G
206    // hW8V/hVTOlJZNi8bbU4v53cxh4nXiMM6NKUblUKs65ar3/2dkojwunz7r7GVZ6mG
207    // QUpr9+ybG61XDqd1ad1A/B/i3WdWixTmJS3K/4uXjFjFX1f3RAk7O0gHc9I8HYOE
208    // Vd8UsHzLOWAUHeaqbsd6xx3GCXF4D5D++kh9OY9Ov7CXlqbYbHd6Atg+PQ7VnqNf
209    // bDqWN0Q2qcKX3k4ggtucmkkA6gP+K3+F5ANQj3AsGMQeddowC0Y=
210    // =fXoH
211    // -----END PGP SIGNATURE-----
212    //
213    // test
214    Commit(Commit),
215    // jordan@Jordans-MacBook-Pro-2 0d % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - 0d/586b48bc42e8591773d3d8a7223551c39d453c | gzip -dc
216    // tree 20940000 .cargo���CYy��Ve�������100644 .gitignore�K��_ow�]����4�n�ݺ100644 Cargo.lock�7�3-�?/��
217    // kt��c0C�100644 Cargo.toml�6�&(��]\8@�SHA�]f40000 src0QW��ƅ���b[�!�S&N�100644 test�G2Y�gN�b9vj?��Ut�
218    Tree(Vec<TreeItem>),
219    // jordan@Jordans-MacBook-Pro-2 objects % printf "\x1f\x8b\x08\x00\x00\x00\x00\x00" | cat - f5/473259d9674ed66239766a013f96a3550374e3| gzip -dc
220    // blob 23try and find me in .git
221    Blob(Bytes),
222    // Tag,
223    // OfsDelta,
224    // RefDelta,
225}
226
227impl PackFileEntry {
228    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, buf)))]
229    fn write_header(&self, buf: &mut BytesMut) {
230        let mut size = self.uncompressed_size();
231
232        // write header
233        {
234            let mut val = 0b1000_0000_u8;
235
236            val |= match self {
237                Self::Commit(_) => 0b001,
238                Self::Tree(_) => 0b010,
239                Self::Blob(_) => 0b011,
240                // Self::Tag => 0b100,
241                // Self::OfsDelta => 0b110,
242                // Self::RefDelta => 0b111,
243            } << 4;
244
245            // pack the 4 LSBs of the size into the header
246            #[allow(clippy::cast_possible_truncation)] // value is masked
247            {
248                val |= (size & 0b1111) as u8;
249            }
250            size >>= 4;
251
252            buf.put_u8(val);
253        }
254
255        // write size bytes
256        loop {
257            // read 7 LSBs from the `size` and push them off for the next iteration
258            #[allow(clippy::cast_possible_truncation)] // value is masked
259            let mut val = (size & 0b111_1111) as u8;
260            size >>= 7;
261
262            if size != 0 {
263                // MSB set to 1 implies there's more size bytes to come, otherwise
264                // the data starts after this byte
265                val |= 1 << 7;
266            }
267
268            buf.put_u8(val);
269
270            if size == 0 {
271                break;
272            }
273        }
274    }
275
276    #[cfg_attr(
277        feature = "tracing",
278        tracing::instrument(skip(self, original_out), err)
279    )]
280    pub fn encode_to(&self, original_out: &mut BytesMut) -> Result<(), Error> {
281        self.write_header(original_out); // TODO: this needs space reserving for it
282
283        // todo is there a way to stream through the zlibencoder so we don't have to
284        // have this intermediate bytesmut and vec?
285        let mut out = BytesMut::new();
286
287        let size = self.uncompressed_size();
288        original_out.reserve(size);
289        // the data ends up getting compressed but we'll need at least this many bytes
290        out.reserve(size);
291
292        match self {
293            Self::Commit(commit) => {
294                commit.encode_to(&mut out)?;
295            }
296            Self::Tree(items) => {
297                for item in items {
298                    item.encode_to(&mut out)?;
299                }
300            }
301            Self::Blob(data) => {
302                out.extend_from_slice(data);
303            }
304        }
305
306        debug_assert_eq!(out.len(), size);
307
308        let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
309        e.write_all(&out).map_err(Error::CompressWrite)?;
310        let compressed_data = e.finish().map_err(Error::Compress)?;
311
312        original_out.extend_from_slice(&compressed_data);
313
314        Ok(())
315    }
316
317    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
318    #[must_use]
319    pub fn uncompressed_size(&self) -> usize {
320        match self {
321            Self::Commit(commit) => commit.size(),
322            Self::Tree(items) => items.iter().map(TreeItem::size).sum(),
323            Self::Blob(data) => data.len(),
324        }
325    }
326
327    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), err))]
328    pub fn hash(&self) -> Result<HashOutput, Error> {
329        let size = self.uncompressed_size();
330
331        let file_prefix = match self {
332            Self::Commit(_) => "commit",
333            Self::Tree(_) => "tree",
334            Self::Blob(_) => "blob",
335        };
336
337        let size_len = itoa::Buffer::new().format(size).len();
338
339        let mut out =
340            BytesMut::with_capacity(file_prefix.len() + " ".len() + size_len + "\n".len() + size);
341
342        write!(out, "{file_prefix} {size}\0")?;
343        match self {
344            Self::Commit(commit) => {
345                commit.encode_to(&mut out)?;
346            }
347            Self::Tree(items) => {
348                for item in items {
349                    item.encode_to(&mut out)?;
350                }
351            }
352            Self::Blob(blob) => {
353                out.extend_from_slice(blob);
354            }
355        }
356
357        Ok(sha1::Sha1::digest(&out).into())
358    }
359}
360
361#[cfg(test)]
362mod test {
363    mod packfile {
364        use crate::low_level::{
365            Commit, CommitUserInfo, PackFile, PackFileEntry, TreeItem, TreeItemKind,
366        };
367        use bytes::{Bytes, BytesMut};
368
369        fn example() -> Bytes {
370            let blob = PackFileEntry::Blob(Bytes::from("hello world"));
371
372            let tree = PackFileEntry::Tree(vec![TreeItem {
373                kind: TreeItemKind::File,
374                name: "helloworld.txt".into(),
375                hash: blob.hash().unwrap(),
376                sort_name: "helloworld.txt".to_string(),
377            }]);
378
379            let commit = PackFileEntry::Commit(Commit {
380                tree: tree.hash().unwrap(),
381                author: CommitUserInfo {
382                    name: "example",
383                    email: "example@me.com",
384                    time: time::OffsetDateTime::UNIX_EPOCH,
385                },
386                committer: CommitUserInfo {
387                    name: "example",
388                    email: "example@me.com",
389                    time: time::OffsetDateTime::UNIX_EPOCH,
390                },
391                message: "initial commit",
392            });
393
394            let mut out = BytesMut::new();
395
396            PackFile::new(&[blob, tree, commit])
397                .encode_to(&mut out)
398                .unwrap();
399
400            out.freeze()
401        }
402
403        #[test]
404        fn snapshot() {
405            let actual = example();
406            insta::assert_debug_snapshot!(actual);
407        }
408
409        #[test]
410        fn is_readable_by_git() {
411            let stdout = crate::test::verify_pack_file(example());
412
413            insta::with_settings!({filters => vec![
414                (r"/(.*)/example.pack", "/path/to/example.pack")
415            ]}, {
416                insta::assert_snapshot!(stdout);
417            });
418        }
419    }
420
421    mod packfile_entry {
422        use crate::low_level::PackFileEntry;
423        use bytes::{Bytes, BytesMut};
424
425        #[test]
426        fn header_size_bytes_large() {
427            let entry = PackFileEntry::Blob(Bytes::from(vec![0u8; 16]));
428
429            let mut header = BytesMut::new();
430            entry.write_header(&mut header);
431
432            assert_eq!(header.to_vec(), &[0xb0, 0x01]);
433        }
434
435        #[test]
436        fn header_size_bytes_small() {
437            let entry = PackFileEntry::Blob(Bytes::from(vec![0u8; 15]));
438
439            let mut header = BytesMut::new();
440            entry.write_header(&mut header);
441
442            assert_eq!(header.to_vec(), &[0xbf, 0x00]);
443        }
444
445        mod commit {
446            use crate::low_level::{Commit, CommitUserInfo, PackFileEntry};
447            use bytes::BytesMut;
448
449            fn example() -> PackFileEntry {
450                PackFileEntry::Commit(Commit {
451                    tree: [0; 20],
452                    author: CommitUserInfo {
453                        name: "author",
454                        email: "author@example.com",
455                        time: time::OffsetDateTime::from_unix_timestamp(1_688_494_158).unwrap(),
456                    },
457                    committer: CommitUserInfo {
458                        name: "committer",
459                        email: "committer@example.com",
460                        time: time::OffsetDateTime::from_unix_timestamp(1_687_494_158).unwrap(),
461                    },
462                    message: "hello world!",
463                })
464            }
465
466            #[test]
467            fn hash() {
468                let commit = example();
469
470                let actual = hex::encode(commit.hash().unwrap());
471                let expected = "0cc33510a70f7e9ad5f35738385d7ace25d0bbf4";
472                assert_eq!(actual, expected);
473            }
474
475            #[test]
476            fn uncompressed_size() {
477                let commit = example();
478
479                let actual = commit.uncompressed_size();
480                let expected = 172;
481                assert_eq!(actual, expected);
482            }
483
484            #[test]
485            fn headers() {
486                let commit = example();
487
488                let mut actual = BytesMut::new();
489                commit.write_header(&mut actual);
490
491                let expected = &[0x9c, 0x0a];
492
493                assert_eq!(actual.to_vec(), expected);
494            }
495
496            #[test]
497            fn full() {
498                let commit = example();
499
500                let mut actual = BytesMut::new();
501                commit.encode_to(&mut actual).unwrap();
502
503                insta::assert_debug_snapshot!(actual);
504            }
505        }
506
507        mod tree {
508            use crate::low_level::{PackFileEntry, TreeItem, TreeItemKind};
509            use bytes::BytesMut;
510
511            fn example() -> PackFileEntry {
512                PackFileEntry::Tree(vec![TreeItem {
513                    kind: TreeItemKind::File,
514                    name: "hello".into(),
515                    hash: [0u8; 20],
516                    sort_name: "/hello".to_string(),
517                }])
518            }
519
520            #[test]
521            fn hash() {
522                let commit = example();
523
524                let actual = hex::encode(commit.hash().unwrap());
525                let expected = "9fc911650c548e4aa7b6dfd085a9347df8743e17";
526                assert_eq!(actual, expected);
527            }
528
529            #[test]
530            fn uncompressed_size() {
531                let commit = example();
532
533                let actual = commit.uncompressed_size();
534                let expected = 33;
535                assert_eq!(actual, expected);
536            }
537
538            #[test]
539            fn headers() {
540                let commit = example();
541
542                let mut actual = BytesMut::new();
543                commit.write_header(&mut actual);
544
545                let expected = &[0xa1, 0x02];
546
547                assert_eq!(actual.to_vec(), expected);
548            }
549
550            #[test]
551            fn full() {
552                let commit = example();
553
554                let mut actual = BytesMut::new();
555                commit.encode_to(&mut actual).unwrap();
556
557                insta::assert_debug_snapshot!(actual);
558            }
559        }
560
561        mod blob {
562            use crate::low_level::PackFileEntry;
563            use bytes::{Bytes, BytesMut};
564
565            fn example() -> PackFileEntry {
566                PackFileEntry::Blob(Bytes::from("hello world"))
567            }
568
569            #[test]
570            fn hash() {
571                let commit = example();
572
573                let actual = hex::encode(commit.hash().unwrap());
574                let expected = "95d09f2b10159347eece71399a7e2e907ea3df4f";
575                assert_eq!(actual, expected);
576            }
577
578            #[test]
579            fn uncompressed_size() {
580                let commit = example();
581
582                let actual = commit.uncompressed_size();
583                let expected = 11;
584                assert_eq!(actual, expected);
585            }
586
587            #[test]
588            fn headers() {
589                let commit = example();
590
591                let mut actual = BytesMut::new();
592                commit.write_header(&mut actual);
593
594                let expected = &[0xbb, 0x00];
595
596                assert_eq!(actual.to_vec(), expected);
597            }
598
599            #[test]
600            fn full() {
601                let commit = example();
602
603                let mut actual = BytesMut::new();
604                commit.encode_to(&mut actual).unwrap();
605
606                insta::assert_debug_snapshot!(actual);
607            }
608        }
609    }
610}