guts_git/
pack.rs

1//! Git pack file format implementation.
2//!
3//! Pack files are the format used by git for efficient object transfer.
4//! See: https://git-scm.com/docs/pack-format
5
6use crate::{GitError, Result};
7use flate2::read::ZlibDecoder;
8use flate2::write::ZlibEncoder;
9use flate2::Compression;
10use guts_storage::{GitObject, ObjectId, ObjectStore, ObjectType};
11use sha1::{Digest, Sha1};
12use std::io::{Read, Write};
13
14/// Magic bytes at the start of a pack file.
15const PACK_SIGNATURE: &[u8; 4] = b"PACK";
16/// Pack file version we support.
17const PACK_VERSION: u32 = 2;
18
19/// Builds a pack file from a set of objects.
20pub struct PackBuilder {
21    objects: Vec<GitObject>,
22}
23
24impl PackBuilder {
25    /// Creates a new pack builder.
26    pub fn new() -> Self {
27        Self {
28            objects: Vec::new(),
29        }
30    }
31
32    /// Adds an object to the pack.
33    pub fn add(&mut self, object: GitObject) {
34        self.objects.push(object);
35    }
36
37    /// Adds an object from the store by ID.
38    pub fn add_from_store(&mut self, store: &ObjectStore, id: &ObjectId) -> Result<()> {
39        let object = store.get(id)?;
40        self.objects.push(object);
41        Ok(())
42    }
43
44    /// Builds the pack file.
45    pub fn build(self) -> Result<Vec<u8>> {
46        let mut pack = Vec::new();
47
48        // Write header
49        pack.extend_from_slice(PACK_SIGNATURE);
50        pack.extend_from_slice(&PACK_VERSION.to_be_bytes());
51        pack.extend_from_slice(&(self.objects.len() as u32).to_be_bytes());
52
53        // Write objects
54        for object in &self.objects {
55            Self::write_object(&mut pack, object)?;
56        }
57
58        // Compute and append checksum
59        let mut hasher = Sha1::new();
60        hasher.update(&pack);
61        let checksum = hasher.finalize();
62        pack.extend_from_slice(&checksum);
63
64        Ok(pack)
65    }
66
67    /// Writes a single object entry.
68    fn write_object(pack: &mut Vec<u8>, object: &GitObject) -> Result<()> {
69        let obj_type = object.object_type.pack_type();
70        let size = object.data.len();
71
72        // Write type and size in variable-length encoding
73        // First byte: (MSB=more bytes) (3 bits type) (4 bits size)
74        let mut first_byte = (obj_type << 4) | ((size & 0x0F) as u8);
75        let mut remaining_size = size >> 4;
76
77        if remaining_size > 0 {
78            first_byte |= 0x80; // More bytes follow
79        }
80        pack.push(first_byte);
81
82        // Additional size bytes (7 bits each, MSB=continue)
83        while remaining_size > 0 {
84            let mut byte = (remaining_size & 0x7F) as u8;
85            remaining_size >>= 7;
86            if remaining_size > 0 {
87                byte |= 0x80;
88            }
89            pack.push(byte);
90        }
91
92        // Compress and write data
93        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
94        encoder
95            .write_all(&object.data)
96            .map_err(|e| GitError::InvalidPack(e.to_string()))?;
97        let compressed = encoder
98            .finish()
99            .map_err(|e| GitError::InvalidPack(e.to_string()))?;
100        pack.extend_from_slice(&compressed);
101
102        Ok(())
103    }
104}
105
106impl Default for PackBuilder {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112/// Parses a pack file and extracts objects.
113pub struct PackParser<'a> {
114    data: &'a [u8],
115    pos: usize,
116}
117
118impl<'a> PackParser<'a> {
119    /// Creates a new pack parser.
120    pub fn new(data: &'a [u8]) -> Self {
121        Self { data, pos: 0 }
122    }
123
124    /// Parses the pack file and stores objects.
125    pub fn parse(&mut self, store: &ObjectStore) -> Result<Vec<ObjectId>> {
126        // Verify header
127        if self.data.len() < 12 {
128            return Err(GitError::InvalidPack("pack too small".to_string()));
129        }
130
131        if &self.data[0..4] != PACK_SIGNATURE {
132            return Err(GitError::InvalidPack("invalid signature".to_string()));
133        }
134
135        let version = u32::from_be_bytes([self.data[4], self.data[5], self.data[6], self.data[7]]);
136        if version != PACK_VERSION {
137            return Err(GitError::InvalidPack(format!(
138                "unsupported version: {}",
139                version
140            )));
141        }
142
143        let object_count =
144            u32::from_be_bytes([self.data[8], self.data[9], self.data[10], self.data[11]]) as usize;
145
146        self.pos = 12;
147
148        // Parse objects
149        let mut ids = Vec::with_capacity(object_count);
150        for _ in 0..object_count {
151            let id = self.parse_object(store)?;
152            ids.push(id);
153        }
154
155        // Verify checksum (last 20 bytes)
156        let checksum_start = self.data.len() - 20;
157        let mut hasher = Sha1::new();
158        hasher.update(&self.data[..checksum_start]);
159        let computed = hasher.finalize();
160
161        if computed.as_slice() != &self.data[checksum_start..] {
162            return Err(GitError::InvalidPack("checksum mismatch".to_string()));
163        }
164
165        Ok(ids)
166    }
167
168    /// Parses a single object.
169    fn parse_object(&mut self, store: &ObjectStore) -> Result<ObjectId> {
170        if self.pos >= self.data.len() {
171            return Err(GitError::InvalidPack("unexpected end of pack".to_string()));
172        }
173
174        // Read type and size
175        let first_byte = self.data[self.pos];
176        self.pos += 1;
177
178        let obj_type_code = (first_byte >> 4) & 0x07;
179        let mut size = (first_byte & 0x0F) as usize;
180        let mut shift = 4;
181
182        // Read remaining size bytes
183        if first_byte & 0x80 != 0 {
184            loop {
185                if self.pos >= self.data.len() {
186                    return Err(GitError::InvalidPack("unexpected end in size".to_string()));
187                }
188                let byte = self.data[self.pos];
189                self.pos += 1;
190                size |= ((byte & 0x7F) as usize) << shift;
191                shift += 7;
192                if byte & 0x80 == 0 {
193                    break;
194                }
195            }
196        }
197
198        let object_type = ObjectType::from_pack_type(obj_type_code)?;
199
200        // Decompress data
201        let remaining = &self.data[self.pos..self.data.len() - 20]; // Exclude checksum
202        let mut decoder = ZlibDecoder::new(remaining);
203        let mut decompressed = vec![0u8; size];
204        decoder
205            .read_exact(&mut decompressed)
206            .map_err(|e| GitError::InvalidPack(format!("decompression failed: {}", e)))?;
207
208        // Update position based on how much was consumed
209        let consumed = decoder.total_in() as usize;
210        self.pos += consumed;
211
212        // Create and store object
213        let object = GitObject::new(object_type, decompressed);
214        let id = object.id;
215        store.put(object);
216
217        Ok(id)
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn test_pack_roundtrip() {
227        let _store = ObjectStore::new();
228
229        // Create some objects
230        let blob1 = GitObject::blob(b"Hello, World!".to_vec());
231        let blob2 = GitObject::blob(b"Goodbye, World!".to_vec());
232
233        let id1 = blob1.id;
234        let id2 = blob2.id;
235
236        // Build pack
237        let mut builder = PackBuilder::new();
238        builder.add(blob1);
239        builder.add(blob2);
240        let pack = builder.build().unwrap();
241
242        // Parse pack into a new store
243        let store2 = ObjectStore::new();
244        let mut parser = PackParser::new(&pack);
245        let ids = parser.parse(&store2).unwrap();
246
247        assert_eq!(ids.len(), 2);
248        assert!(ids.contains(&id1));
249        assert!(ids.contains(&id2));
250
251        // Verify objects
252        let obj1 = store2.get(&id1).unwrap();
253        assert_eq!(obj1.data.as_ref(), b"Hello, World!");
254    }
255
256    #[test]
257    fn test_pack_empty() {
258        // Empty pack should still have valid header and checksum
259        let builder = PackBuilder::new();
260        let pack = builder.build().unwrap();
261
262        // Should have header (12 bytes) + checksum (20 bytes)
263        assert_eq!(pack.len(), 32);
264
265        // Parse empty pack
266        let store = ObjectStore::new();
267        let mut parser = PackParser::new(&pack);
268        let ids = parser.parse(&store).unwrap();
269        assert!(ids.is_empty());
270    }
271
272    #[test]
273    fn test_pack_single_object() {
274        let blob = GitObject::blob(b"single".to_vec());
275        let id = blob.id;
276
277        let mut builder = PackBuilder::new();
278        builder.add(blob);
279        let pack = builder.build().unwrap();
280
281        let store = ObjectStore::new();
282        let mut parser = PackParser::new(&pack);
283        let ids = parser.parse(&store).unwrap();
284
285        assert_eq!(ids.len(), 1);
286        assert_eq!(ids[0], id);
287    }
288
289    #[test]
290    fn test_pack_all_object_types() {
291        // Test blob, tree, commit, and tag
292        let blob = GitObject::blob(b"blob content".to_vec());
293        let tree = GitObject::new(ObjectType::Tree, b"tree content".to_vec());
294        let commit = GitObject::new(ObjectType::Commit, b"commit content".to_vec());
295        let tag = GitObject::new(ObjectType::Tag, b"tag content".to_vec());
296
297        let ids: Vec<_> = [&blob, &tree, &commit, &tag].iter().map(|o| o.id).collect();
298
299        let mut builder = PackBuilder::new();
300        builder.add(blob);
301        builder.add(tree);
302        builder.add(commit);
303        builder.add(tag);
304        let pack = builder.build().unwrap();
305
306        let store = ObjectStore::new();
307        let mut parser = PackParser::new(&pack);
308        let parsed_ids = parser.parse(&store).unwrap();
309
310        assert_eq!(parsed_ids.len(), 4);
311        for id in &ids {
312            assert!(parsed_ids.contains(id));
313        }
314    }
315
316    #[test]
317    fn test_pack_large_object() {
318        // Test with a large object (1MB)
319        let large_data: Vec<u8> = (0..1024 * 1024).map(|i| (i % 256) as u8).collect();
320        let blob = GitObject::blob(large_data.clone());
321        let id = blob.id;
322
323        let mut builder = PackBuilder::new();
324        builder.add(blob);
325        let pack = builder.build().unwrap();
326
327        let store = ObjectStore::new();
328        let mut parser = PackParser::new(&pack);
329        let ids = parser.parse(&store).unwrap();
330
331        assert_eq!(ids.len(), 1);
332        assert_eq!(ids[0], id);
333
334        let obj = store.get(&id).unwrap();
335        assert_eq!(obj.data.len(), large_data.len());
336    }
337
338    #[test]
339    fn test_pack_invalid_signature() {
340        let mut pack = vec![b'P', b'A', b'C', b'X']; // Wrong signature
341        pack.extend_from_slice(&[0, 0, 0, 2]); // Version
342        pack.extend_from_slice(&[0, 0, 0, 0]); // Object count
343        pack.extend_from_slice(&[0u8; 20]); // Fake checksum
344
345        let store = ObjectStore::new();
346        let mut parser = PackParser::new(&pack);
347        let result = parser.parse(&store);
348        assert!(result.is_err());
349    }
350
351    #[test]
352    fn test_pack_invalid_version() {
353        let mut pack = b"PACK".to_vec();
354        pack.extend_from_slice(&[0, 0, 0, 99]); // Invalid version
355        pack.extend_from_slice(&[0, 0, 0, 0]); // Object count
356        pack.extend_from_slice(&[0u8; 20]); // Fake checksum
357
358        let store = ObjectStore::new();
359        let mut parser = PackParser::new(&pack);
360        let result = parser.parse(&store);
361        assert!(result.is_err());
362    }
363
364    #[test]
365    fn test_pack_too_small() {
366        let pack = vec![0u8; 10]; // Too small for header
367
368        let store = ObjectStore::new();
369        let mut parser = PackParser::new(&pack);
370        let result = parser.parse(&store);
371        assert!(result.is_err());
372    }
373
374    #[test]
375    fn test_pack_checksum_mismatch() {
376        // Build a valid pack
377        let blob = GitObject::blob(b"test".to_vec());
378        let mut builder = PackBuilder::new();
379        builder.add(blob);
380        let mut pack = builder.build().unwrap();
381
382        // Corrupt the checksum
383        let len = pack.len();
384        pack[len - 1] ^= 0xFF;
385
386        let store = ObjectStore::new();
387        let mut parser = PackParser::new(&pack);
388        let result = parser.parse(&store);
389        assert!(result.is_err());
390    }
391
392    #[test]
393    fn test_pack_builder_default() {
394        let builder = PackBuilder::default();
395        let pack = builder.build().unwrap();
396        assert!(!pack.is_empty());
397    }
398
399    #[test]
400    fn test_pack_add_from_store() {
401        let store = ObjectStore::new();
402        let blob = GitObject::blob(b"stored".to_vec());
403        let id = blob.id;
404        store.put(blob);
405
406        let mut builder = PackBuilder::new();
407        builder.add_from_store(&store, &id).unwrap();
408        let pack = builder.build().unwrap();
409
410        let store2 = ObjectStore::new();
411        let mut parser = PackParser::new(&pack);
412        let ids = parser.parse(&store2).unwrap();
413
414        assert_eq!(ids.len(), 1);
415        assert_eq!(ids[0], id);
416    }
417
418    #[test]
419    fn test_pack_many_objects() {
420        // Test with many small objects
421        let mut builder = PackBuilder::new();
422        let mut expected_ids = Vec::new();
423
424        for i in 0..100 {
425            let blob = GitObject::blob(format!("object {}", i).into_bytes());
426            expected_ids.push(blob.id);
427            builder.add(blob);
428        }
429
430        let pack = builder.build().unwrap();
431
432        let store = ObjectStore::new();
433        let mut parser = PackParser::new(&pack);
434        let ids = parser.parse(&store).unwrap();
435
436        assert_eq!(ids.len(), 100);
437        for id in &expected_ids {
438            assert!(ids.contains(id));
439        }
440    }
441
442    #[test]
443    fn test_pack_binary_content() {
444        // Test with binary content including null bytes
445        let binary_data: Vec<u8> = (0..256).map(|i| i as u8).collect();
446        let blob = GitObject::blob(binary_data.clone());
447        let id = blob.id;
448
449        let mut builder = PackBuilder::new();
450        builder.add(blob);
451        let pack = builder.build().unwrap();
452
453        let store = ObjectStore::new();
454        let mut parser = PackParser::new(&pack);
455        let ids = parser.parse(&store).unwrap();
456
457        let obj = store.get(&ids[0]).unwrap();
458        assert_eq!(obj.data.as_ref(), binary_data.as_slice());
459        assert_eq!(ids[0], id);
460    }
461}
462
463#[cfg(test)]
464mod proptests {
465    use super::*;
466    use proptest::prelude::*;
467
468    proptest! {
469        /// Property: Pack roundtrip preserves blob content
470        #[test]
471        fn prop_pack_roundtrip_blob(data in prop::collection::vec(any::<u8>(), 0..10000)) {
472            let blob = GitObject::blob(data.clone());
473            let id = blob.id;
474
475            let mut builder = PackBuilder::new();
476            builder.add(blob);
477            let pack = builder.build().unwrap();
478
479            let store = ObjectStore::new();
480            let mut parser = PackParser::new(&pack);
481            let ids = parser.parse(&store).unwrap();
482
483            prop_assert_eq!(ids.len(), 1);
484            prop_assert_eq!(ids[0], id);
485
486            let obj = store.get(&id).unwrap();
487            prop_assert_eq!(obj.data.as_ref(), data.as_slice());
488        }
489
490        /// Property: Multiple unique objects roundtrip correctly
491        #[test]
492        fn prop_pack_roundtrip_multiple(
493            blobs in prop::collection::vec(prop::collection::vec(any::<u8>(), 1..1000), 1..20)
494        ) {
495            // Ensure unique content to avoid duplicate object IDs
496            let mut seen_ids = std::collections::HashSet::new();
497            let objects: Vec<GitObject> = blobs.iter()
498                .map(|data| GitObject::blob(data.clone()))
499                .filter(|obj| seen_ids.insert(obj.id))
500                .collect();
501
502            if objects.is_empty() {
503                return Ok(());
504            }
505
506            let expected_ids: Vec<ObjectId> = objects.iter().map(|o| o.id).collect();
507
508            let mut builder = PackBuilder::new();
509            for obj in objects {
510                builder.add(obj);
511            }
512            let pack = builder.build().unwrap();
513
514            let store = ObjectStore::new();
515            let mut parser = PackParser::new(&pack);
516            let ids = parser.parse(&store).unwrap();
517
518            prop_assert_eq!(ids.len(), expected_ids.len());
519            for id in &expected_ids {
520                prop_assert!(ids.contains(id));
521            }
522        }
523
524        /// Property: Invalid pack data doesn't panic
525        #[test]
526        fn prop_invalid_pack_no_panic(data in prop::collection::vec(any::<u8>(), 0..1000)) {
527            let store = ObjectStore::new();
528            let mut parser = PackParser::new(&data);
529            // Should return error or Ok, but never panic
530            let _ = parser.parse(&store);
531        }
532
533        /// Property: Corrupted checksum is detected
534        #[test]
535        fn prop_corrupted_checksum_detected(
536            content in prop::collection::vec(any::<u8>(), 1..1000),
537            corrupt_byte in 0u8..20
538        ) {
539            let blob = GitObject::blob(content);
540            let mut builder = PackBuilder::new();
541            builder.add(blob);
542            let mut pack = builder.build().unwrap();
543
544            // Corrupt the checksum (last 20 bytes)
545            let len = pack.len();
546            pack[len - 1 - (corrupt_byte as usize % 20)] ^= 0xFF;
547
548            let store = ObjectStore::new();
549            let mut parser = PackParser::new(&pack);
550            let result = parser.parse(&store);
551            prop_assert!(result.is_err());
552        }
553    }
554}