1use sha1::{Sha1, Digest};
7use flate2::write::ZlibEncoder;
8use flate2::read::ZlibDecoder;
9use flate2::Compression;
10use std::io::{Read, Write};
11
12use crate::object::{ObjectId, ObjectType, GitObject};
13use crate::storage::GitStorage;
14use crate::{Error, Result};
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18#[repr(u8)]
19pub enum PackObjectType {
20 Commit = 1,
21 Tree = 2,
22 Blob = 3,
23 Tag = 4,
24 }
26
27impl PackObjectType {
28 pub fn from_object_type(t: ObjectType) -> Self {
29 match t {
30 ObjectType::Commit => PackObjectType::Commit,
31 ObjectType::Tree => PackObjectType::Tree,
32 ObjectType::Blob => PackObjectType::Blob,
33 ObjectType::Tag => PackObjectType::Tag,
34 }
35 }
36
37 pub fn to_object_type(self) -> ObjectType {
38 match self {
39 PackObjectType::Commit => ObjectType::Commit,
40 PackObjectType::Tree => ObjectType::Tree,
41 PackObjectType::Blob => ObjectType::Blob,
42 PackObjectType::Tag => ObjectType::Tag,
43 }
44 }
45
46 pub fn from_u8(v: u8) -> Option<Self> {
47 match v {
48 1 => Some(PackObjectType::Commit),
49 2 => Some(PackObjectType::Tree),
50 3 => Some(PackObjectType::Blob),
51 4 => Some(PackObjectType::Tag),
52 _ => None,
53 }
54 }
55}
56
57pub fn generate_packfile(storage: &GitStorage, oids: &[ObjectId]) -> Result<Vec<u8>> {
59 let mut pack = Vec::new();
60
61 pack.extend_from_slice(b"PACK");
63 pack.extend_from_slice(&2u32.to_be_bytes()); pack.extend_from_slice(&(oids.len() as u32).to_be_bytes());
65
66 for oid in oids {
68 let obj = storage.read_object(oid)?;
69 write_pack_object(&mut pack, &obj)?;
70 }
71
72 let mut hasher = Sha1::new();
74 hasher.update(&pack);
75 let checksum = hasher.finalize();
76 pack.extend_from_slice(&checksum);
77
78 Ok(pack)
79}
80
81fn write_pack_object(pack: &mut Vec<u8>, obj: &GitObject) -> Result<()> {
83 let pack_type = PackObjectType::from_object_type(obj.obj_type);
84 let size = obj.content.len();
85
86 let mut c = ((pack_type as u8) << 4) | ((size & 0x0F) as u8);
89 let mut remaining = size >> 4;
90
91 if remaining > 0 {
92 c |= 0x80; }
94 pack.push(c);
95
96 while remaining > 0 {
98 let mut byte = (remaining & 0x7F) as u8;
99 remaining >>= 7;
100 if remaining > 0 {
101 byte |= 0x80;
102 }
103 pack.push(byte);
104 }
105
106 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
108 encoder.write_all(&obj.content)?;
109 let compressed = encoder.finish()?;
110 pack.extend_from_slice(&compressed);
111
112 Ok(())
113}
114
115pub fn parse_packfile(storage: &GitStorage, data: &[u8]) -> Result<Vec<ObjectId>> {
117 if data.len() < 20 {
118 return Err(Error::PackError("packfile too small".into()));
119 }
120
121 if &data[0..4] != b"PACK" {
123 return Err(Error::PackError("invalid packfile magic".into()));
124 }
125
126 let version = u32::from_be_bytes([data[4], data[5], data[6], data[7]]);
127 if version != 2 {
128 return Err(Error::PackError(format!("unsupported pack version: {}", version)));
129 }
130
131 let object_count = u32::from_be_bytes([data[8], data[9], data[10], data[11]]);
132
133 let checksum_start = data.len() - 20;
135 let mut hasher = Sha1::new();
136 hasher.update(&data[..checksum_start]);
137 let computed = hasher.finalize();
138 if &computed[..] != &data[checksum_start..] {
139 return Err(Error::PackError("checksum mismatch".into()));
140 }
141
142 let mut pos = 12; let mut oids = Vec::with_capacity(object_count as usize);
145
146 for _ in 0..object_count {
147 let (obj, bytes_consumed) = parse_pack_object(&data[pos..checksum_start])?;
148 pos += bytes_consumed;
149
150 let oid = storage.write_object(&obj)?;
151 oids.push(oid);
152 }
153
154 Ok(oids)
155}
156
157fn parse_pack_object(data: &[u8]) -> Result<(GitObject, usize)> {
159 let mut pos = 0;
160
161 let first_byte = data[pos];
163 pos += 1;
164
165 let type_bits = (first_byte >> 4) & 0x07;
166 let pack_type = PackObjectType::from_u8(type_bits)
167 .ok_or_else(|| Error::PackError(format!("unsupported object type: {}", type_bits)))?;
168
169 let mut size = (first_byte & 0x0F) as usize;
170 let mut shift = 4;
171
172 if first_byte & 0x80 != 0 {
174 loop {
175 if pos >= data.len() {
176 return Err(Error::PackError("truncated size".into()));
177 }
178 let byte = data[pos];
179 pos += 1;
180 size |= ((byte & 0x7F) as usize) << shift;
181 shift += 7;
182 if byte & 0x80 == 0 {
183 break;
184 }
185 }
186 }
187
188 let mut decoder = ZlibDecoder::new(&data[pos..]);
190 let mut content = vec![0u8; size];
191 decoder.read_exact(&mut content)?;
192
193 let compressed_size = decoder.total_in() as usize;
195 pos += compressed_size;
196
197 let obj = GitObject::new(pack_type.to_object_type(), content);
198 Ok((obj, pos))
199}
200
201pub struct PackBuilder<'a> {
204 storage: &'a GitStorage,
205 want: Vec<ObjectId>,
207 have: Vec<ObjectId>,
209}
210
211impl<'a> PackBuilder<'a> {
212 pub fn new(storage: &'a GitStorage) -> Self {
213 Self {
214 storage,
215 want: Vec::new(),
216 have: Vec::new(),
217 }
218 }
219
220 pub fn want(&mut self, oid: ObjectId) {
221 self.want.push(oid);
222 }
223
224 pub fn have(&mut self, oid: ObjectId) {
225 self.have.push(oid);
226 }
227
228 pub fn build(self) -> Result<Vec<u8>> {
230 let mut needed = std::collections::HashSet::new();
231 let have_set: std::collections::HashSet<_> = self.have.iter().copied().collect();
232
233 for oid in &self.want {
235 Self::walk_object_static(self.storage, *oid, &have_set, &mut needed)?;
236 }
237
238 let oids: Vec<_> = needed.into_iter().collect();
240 generate_packfile(self.storage, &oids)
241 }
242
243 fn walk_object_static(
245 storage: &GitStorage,
246 oid: ObjectId,
247 have: &std::collections::HashSet<ObjectId>,
248 needed: &mut std::collections::HashSet<ObjectId>,
249 ) -> Result<()> {
250 if have.contains(&oid) || needed.contains(&oid) {
251 return Ok(());
252 }
253
254 if !storage.has_object(&oid)? {
255 return Ok(()); }
257
258 needed.insert(oid);
259
260 let obj = storage.read_object(&oid)?;
261
262 match obj.obj_type {
263 ObjectType::Commit => {
264 let content = String::from_utf8_lossy(&obj.content);
266 for line in content.lines() {
267 if let Some(tree_hex) = line.strip_prefix("tree ") {
268 if let Some(tree_oid) = ObjectId::from_hex(tree_hex.trim()) {
269 Self::walk_object_static(storage, tree_oid, have, needed)?;
270 }
271 } else if let Some(parent_hex) = line.strip_prefix("parent ") {
272 if let Some(parent_oid) = ObjectId::from_hex(parent_hex.trim()) {
273 Self::walk_object_static(storage, parent_oid, have, needed)?;
274 }
275 } else if line.is_empty() {
276 break; }
278 }
279 }
280 ObjectType::Tree => {
281 let entries = crate::object::parse_tree(&obj.content)?;
283 for entry in entries {
284 Self::walk_object_static(storage, entry.oid, have, needed)?;
285 }
286 }
287 ObjectType::Tag => {
288 let content = String::from_utf8_lossy(&obj.content);
290 for line in content.lines() {
291 if let Some(obj_hex) = line.strip_prefix("object ") {
292 if let Some(obj_oid) = ObjectId::from_hex(obj_hex.trim()) {
293 Self::walk_object_static(storage, obj_oid, have, needed)?;
294 }
295 }
296 }
297 }
298 ObjectType::Blob => {
299 }
301 }
302
303 Ok(())
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310 use tempfile::tempdir;
311
312 #[test]
313 fn test_packfile_roundtrip() {
314 let dir = tempdir().unwrap();
315 let storage = GitStorage::open(dir.path().join("git")).unwrap();
316
317 let blob1 = storage.write_blob(b"hello").unwrap();
319 let blob2 = storage.write_blob(b"world").unwrap();
320
321 let pack = generate_packfile(&storage, &[blob1, blob2]).unwrap();
323
324 assert_eq!(&pack[0..4], b"PACK");
326
327 let dir2 = tempdir().unwrap();
329 let storage2 = GitStorage::open(dir2.path().join("git")).unwrap();
330 let parsed_oids = parse_packfile(&storage2, &pack).unwrap();
331
332 assert_eq!(parsed_oids.len(), 2);
333 assert!(storage2.has_object(&blob1).unwrap());
334 assert!(storage2.has_object(&blob2).unwrap());
335 }
336}