1use crate::{GitError, Result};
7use flate2::read::ZlibDecoder;
8use flate2::write::ZlibEncoder;
9use flate2::Compression;
10use guts_storage::{GitObject, ObjectId, ObjectStore, ObjectType};
11use sha1::{Digest, Sha1};
12use std::io::{Read, Write};
13
14const PACK_SIGNATURE: &[u8; 4] = b"PACK";
16const PACK_VERSION: u32 = 2;
18
19pub struct PackBuilder {
21 objects: Vec<GitObject>,
22}
23
24impl PackBuilder {
25 pub fn new() -> Self {
27 Self {
28 objects: Vec::new(),
29 }
30 }
31
32 pub fn add(&mut self, object: GitObject) {
34 self.objects.push(object);
35 }
36
37 pub fn add_from_store(&mut self, store: &ObjectStore, id: &ObjectId) -> Result<()> {
39 let object = store.get(id)?;
40 self.objects.push(object);
41 Ok(())
42 }
43
44 pub fn build(self) -> Result<Vec<u8>> {
46 let mut pack = Vec::new();
47
48 pack.extend_from_slice(PACK_SIGNATURE);
50 pack.extend_from_slice(&PACK_VERSION.to_be_bytes());
51 pack.extend_from_slice(&(self.objects.len() as u32).to_be_bytes());
52
53 for object in &self.objects {
55 Self::write_object(&mut pack, object)?;
56 }
57
58 let mut hasher = Sha1::new();
60 hasher.update(&pack);
61 let checksum = hasher.finalize();
62 pack.extend_from_slice(&checksum);
63
64 Ok(pack)
65 }
66
67 fn write_object(pack: &mut Vec<u8>, object: &GitObject) -> Result<()> {
69 let obj_type = object.object_type.pack_type();
70 let size = object.data.len();
71
72 let mut first_byte = (obj_type << 4) | ((size & 0x0F) as u8);
75 let mut remaining_size = size >> 4;
76
77 if remaining_size > 0 {
78 first_byte |= 0x80; }
80 pack.push(first_byte);
81
82 while remaining_size > 0 {
84 let mut byte = (remaining_size & 0x7F) as u8;
85 remaining_size >>= 7;
86 if remaining_size > 0 {
87 byte |= 0x80;
88 }
89 pack.push(byte);
90 }
91
92 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
94 encoder
95 .write_all(&object.data)
96 .map_err(|e| GitError::InvalidPack(e.to_string()))?;
97 let compressed = encoder
98 .finish()
99 .map_err(|e| GitError::InvalidPack(e.to_string()))?;
100 pack.extend_from_slice(&compressed);
101
102 Ok(())
103 }
104}
105
106impl Default for PackBuilder {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112pub struct PackParser<'a> {
114 data: &'a [u8],
115 pos: usize,
116}
117
118impl<'a> PackParser<'a> {
119 pub fn new(data: &'a [u8]) -> Self {
121 Self { data, pos: 0 }
122 }
123
124 pub fn parse(&mut self, store: &ObjectStore) -> Result<Vec<ObjectId>> {
126 if self.data.len() < 12 {
128 return Err(GitError::InvalidPack("pack too small".to_string()));
129 }
130
131 if &self.data[0..4] != PACK_SIGNATURE {
132 return Err(GitError::InvalidPack("invalid signature".to_string()));
133 }
134
135 let version = u32::from_be_bytes([self.data[4], self.data[5], self.data[6], self.data[7]]);
136 if version != PACK_VERSION {
137 return Err(GitError::InvalidPack(format!(
138 "unsupported version: {}",
139 version
140 )));
141 }
142
143 let object_count =
144 u32::from_be_bytes([self.data[8], self.data[9], self.data[10], self.data[11]]) as usize;
145
146 self.pos = 12;
147
148 let mut ids = Vec::with_capacity(object_count);
150 for _ in 0..object_count {
151 let id = self.parse_object(store)?;
152 ids.push(id);
153 }
154
155 let checksum_start = self.data.len() - 20;
157 let mut hasher = Sha1::new();
158 hasher.update(&self.data[..checksum_start]);
159 let computed = hasher.finalize();
160
161 if computed.as_slice() != &self.data[checksum_start..] {
162 return Err(GitError::InvalidPack("checksum mismatch".to_string()));
163 }
164
165 Ok(ids)
166 }
167
168 fn parse_object(&mut self, store: &ObjectStore) -> Result<ObjectId> {
170 if self.pos >= self.data.len() {
171 return Err(GitError::InvalidPack("unexpected end of pack".to_string()));
172 }
173
174 let first_byte = self.data[self.pos];
176 self.pos += 1;
177
178 let obj_type_code = (first_byte >> 4) & 0x07;
179 let mut size = (first_byte & 0x0F) as usize;
180 let mut shift = 4;
181
182 if first_byte & 0x80 != 0 {
184 loop {
185 if self.pos >= self.data.len() {
186 return Err(GitError::InvalidPack("unexpected end in size".to_string()));
187 }
188 let byte = self.data[self.pos];
189 self.pos += 1;
190 size |= ((byte & 0x7F) as usize) << shift;
191 shift += 7;
192 if byte & 0x80 == 0 {
193 break;
194 }
195 }
196 }
197
198 let object_type = ObjectType::from_pack_type(obj_type_code)?;
199
200 let remaining = &self.data[self.pos..self.data.len() - 20]; let mut decoder = ZlibDecoder::new(remaining);
203 let mut decompressed = vec![0u8; size];
204 decoder
205 .read_exact(&mut decompressed)
206 .map_err(|e| GitError::InvalidPack(format!("decompression failed: {}", e)))?;
207
208 let consumed = decoder.total_in() as usize;
210 self.pos += consumed;
211
212 let object = GitObject::new(object_type, decompressed);
214 let id = object.id;
215 store.put(object);
216
217 Ok(id)
218 }
219}
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224
225 #[test]
226 fn test_pack_roundtrip() {
227 let _store = ObjectStore::new();
228
229 let blob1 = GitObject::blob(b"Hello, World!".to_vec());
231 let blob2 = GitObject::blob(b"Goodbye, World!".to_vec());
232
233 let id1 = blob1.id;
234 let id2 = blob2.id;
235
236 let mut builder = PackBuilder::new();
238 builder.add(blob1);
239 builder.add(blob2);
240 let pack = builder.build().unwrap();
241
242 let store2 = ObjectStore::new();
244 let mut parser = PackParser::new(&pack);
245 let ids = parser.parse(&store2).unwrap();
246
247 assert_eq!(ids.len(), 2);
248 assert!(ids.contains(&id1));
249 assert!(ids.contains(&id2));
250
251 let obj1 = store2.get(&id1).unwrap();
253 assert_eq!(obj1.data.as_ref(), b"Hello, World!");
254 }
255
256 #[test]
257 fn test_pack_empty() {
258 let builder = PackBuilder::new();
260 let pack = builder.build().unwrap();
261
262 assert_eq!(pack.len(), 32);
264
265 let store = ObjectStore::new();
267 let mut parser = PackParser::new(&pack);
268 let ids = parser.parse(&store).unwrap();
269 assert!(ids.is_empty());
270 }
271
272 #[test]
273 fn test_pack_single_object() {
274 let blob = GitObject::blob(b"single".to_vec());
275 let id = blob.id;
276
277 let mut builder = PackBuilder::new();
278 builder.add(blob);
279 let pack = builder.build().unwrap();
280
281 let store = ObjectStore::new();
282 let mut parser = PackParser::new(&pack);
283 let ids = parser.parse(&store).unwrap();
284
285 assert_eq!(ids.len(), 1);
286 assert_eq!(ids[0], id);
287 }
288
289 #[test]
290 fn test_pack_all_object_types() {
291 let blob = GitObject::blob(b"blob content".to_vec());
293 let tree = GitObject::new(ObjectType::Tree, b"tree content".to_vec());
294 let commit = GitObject::new(ObjectType::Commit, b"commit content".to_vec());
295 let tag = GitObject::new(ObjectType::Tag, b"tag content".to_vec());
296
297 let ids: Vec<_> = [&blob, &tree, &commit, &tag].iter().map(|o| o.id).collect();
298
299 let mut builder = PackBuilder::new();
300 builder.add(blob);
301 builder.add(tree);
302 builder.add(commit);
303 builder.add(tag);
304 let pack = builder.build().unwrap();
305
306 let store = ObjectStore::new();
307 let mut parser = PackParser::new(&pack);
308 let parsed_ids = parser.parse(&store).unwrap();
309
310 assert_eq!(parsed_ids.len(), 4);
311 for id in &ids {
312 assert!(parsed_ids.contains(id));
313 }
314 }
315
316 #[test]
317 fn test_pack_large_object() {
318 let large_data: Vec<u8> = (0..1024 * 1024).map(|i| (i % 256) as u8).collect();
320 let blob = GitObject::blob(large_data.clone());
321 let id = blob.id;
322
323 let mut builder = PackBuilder::new();
324 builder.add(blob);
325 let pack = builder.build().unwrap();
326
327 let store = ObjectStore::new();
328 let mut parser = PackParser::new(&pack);
329 let ids = parser.parse(&store).unwrap();
330
331 assert_eq!(ids.len(), 1);
332 assert_eq!(ids[0], id);
333
334 let obj = store.get(&id).unwrap();
335 assert_eq!(obj.data.len(), large_data.len());
336 }
337
338 #[test]
339 fn test_pack_invalid_signature() {
340 let mut pack = vec![b'P', b'A', b'C', b'X']; pack.extend_from_slice(&[0, 0, 0, 2]); pack.extend_from_slice(&[0, 0, 0, 0]); pack.extend_from_slice(&[0u8; 20]); let store = ObjectStore::new();
346 let mut parser = PackParser::new(&pack);
347 let result = parser.parse(&store);
348 assert!(result.is_err());
349 }
350
351 #[test]
352 fn test_pack_invalid_version() {
353 let mut pack = b"PACK".to_vec();
354 pack.extend_from_slice(&[0, 0, 0, 99]); pack.extend_from_slice(&[0, 0, 0, 0]); pack.extend_from_slice(&[0u8; 20]); let store = ObjectStore::new();
359 let mut parser = PackParser::new(&pack);
360 let result = parser.parse(&store);
361 assert!(result.is_err());
362 }
363
364 #[test]
365 fn test_pack_too_small() {
366 let pack = vec![0u8; 10]; let store = ObjectStore::new();
369 let mut parser = PackParser::new(&pack);
370 let result = parser.parse(&store);
371 assert!(result.is_err());
372 }
373
374 #[test]
375 fn test_pack_checksum_mismatch() {
376 let blob = GitObject::blob(b"test".to_vec());
378 let mut builder = PackBuilder::new();
379 builder.add(blob);
380 let mut pack = builder.build().unwrap();
381
382 let len = pack.len();
384 pack[len - 1] ^= 0xFF;
385
386 let store = ObjectStore::new();
387 let mut parser = PackParser::new(&pack);
388 let result = parser.parse(&store);
389 assert!(result.is_err());
390 }
391
392 #[test]
393 fn test_pack_builder_default() {
394 let builder = PackBuilder::default();
395 let pack = builder.build().unwrap();
396 assert!(!pack.is_empty());
397 }
398
399 #[test]
400 fn test_pack_add_from_store() {
401 let store = ObjectStore::new();
402 let blob = GitObject::blob(b"stored".to_vec());
403 let id = blob.id;
404 store.put(blob);
405
406 let mut builder = PackBuilder::new();
407 builder.add_from_store(&store, &id).unwrap();
408 let pack = builder.build().unwrap();
409
410 let store2 = ObjectStore::new();
411 let mut parser = PackParser::new(&pack);
412 let ids = parser.parse(&store2).unwrap();
413
414 assert_eq!(ids.len(), 1);
415 assert_eq!(ids[0], id);
416 }
417
418 #[test]
419 fn test_pack_many_objects() {
420 let mut builder = PackBuilder::new();
422 let mut expected_ids = Vec::new();
423
424 for i in 0..100 {
425 let blob = GitObject::blob(format!("object {}", i).into_bytes());
426 expected_ids.push(blob.id);
427 builder.add(blob);
428 }
429
430 let pack = builder.build().unwrap();
431
432 let store = ObjectStore::new();
433 let mut parser = PackParser::new(&pack);
434 let ids = parser.parse(&store).unwrap();
435
436 assert_eq!(ids.len(), 100);
437 for id in &expected_ids {
438 assert!(ids.contains(id));
439 }
440 }
441
442 #[test]
443 fn test_pack_binary_content() {
444 let binary_data: Vec<u8> = (0..256).map(|i| i as u8).collect();
446 let blob = GitObject::blob(binary_data.clone());
447 let id = blob.id;
448
449 let mut builder = PackBuilder::new();
450 builder.add(blob);
451 let pack = builder.build().unwrap();
452
453 let store = ObjectStore::new();
454 let mut parser = PackParser::new(&pack);
455 let ids = parser.parse(&store).unwrap();
456
457 let obj = store.get(&ids[0]).unwrap();
458 assert_eq!(obj.data.as_ref(), binary_data.as_slice());
459 assert_eq!(ids[0], id);
460 }
461}
462
463#[cfg(test)]
464mod proptests {
465 use super::*;
466 use proptest::prelude::*;
467
468 proptest! {
469 #[test]
471 fn prop_pack_roundtrip_blob(data in prop::collection::vec(any::<u8>(), 0..10000)) {
472 let blob = GitObject::blob(data.clone());
473 let id = blob.id;
474
475 let mut builder = PackBuilder::new();
476 builder.add(blob);
477 let pack = builder.build().unwrap();
478
479 let store = ObjectStore::new();
480 let mut parser = PackParser::new(&pack);
481 let ids = parser.parse(&store).unwrap();
482
483 prop_assert_eq!(ids.len(), 1);
484 prop_assert_eq!(ids[0], id);
485
486 let obj = store.get(&id).unwrap();
487 prop_assert_eq!(obj.data.as_ref(), data.as_slice());
488 }
489
490 #[test]
492 fn prop_pack_roundtrip_multiple(
493 blobs in prop::collection::vec(prop::collection::vec(any::<u8>(), 1..1000), 1..20)
494 ) {
495 let mut seen_ids = std::collections::HashSet::new();
497 let objects: Vec<GitObject> = blobs.iter()
498 .map(|data| GitObject::blob(data.clone()))
499 .filter(|obj| seen_ids.insert(obj.id))
500 .collect();
501
502 if objects.is_empty() {
503 return Ok(());
504 }
505
506 let expected_ids: Vec<ObjectId> = objects.iter().map(|o| o.id).collect();
507
508 let mut builder = PackBuilder::new();
509 for obj in objects {
510 builder.add(obj);
511 }
512 let pack = builder.build().unwrap();
513
514 let store = ObjectStore::new();
515 let mut parser = PackParser::new(&pack);
516 let ids = parser.parse(&store).unwrap();
517
518 prop_assert_eq!(ids.len(), expected_ids.len());
519 for id in &expected_ids {
520 prop_assert!(ids.contains(id));
521 }
522 }
523
524 #[test]
526 fn prop_invalid_pack_no_panic(data in prop::collection::vec(any::<u8>(), 0..1000)) {
527 let store = ObjectStore::new();
528 let mut parser = PackParser::new(&data);
529 let _ = parser.parse(&store);
531 }
532
533 #[test]
535 fn prop_corrupted_checksum_detected(
536 content in prop::collection::vec(any::<u8>(), 1..1000),
537 corrupt_byte in 0u8..20
538 ) {
539 let blob = GitObject::blob(content);
540 let mut builder = PackBuilder::new();
541 builder.add(blob);
542 let mut pack = builder.build().unwrap();
543
544 let len = pack.len();
546 pack[len - 1 - (corrupt_byte as usize % 20)] ^= 0xFF;
547
548 let store = ObjectStore::new();
549 let mut parser = PackParser::new(&pack);
550 let result = parser.parse(&store);
551 prop_assert!(result.is_err());
552 }
553 }
554}