git_plumber/git/pack/
object.rs

1use flate2::read::ZlibDecoder;
2use nom::{
3    IResult,
4    error::{Error, ErrorKind},
5};
6use std::fmt;
7use std::io::Read;
8
9use crate::git::pack::PackError;
10use crate::git::pack::delta;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum ObjectType {
14    Invalid = 0,
15    Commit = 1,
16    Tree = 2,
17    Blob = 3,
18    Tag = 4,
19    Reserved = 5,
20    OfsDelta = 6,
21    RefDelta = 7,
22}
23
24impl TryFrom<u8> for ObjectType {
25    type Error = PackError;
26
27    fn try_from(value: u8) -> Result<Self, Self::Error> {
28        match value {
29            0 => Ok(ObjectType::Invalid),
30            1 => Ok(ObjectType::Commit),
31            2 => Ok(ObjectType::Tree),
32            3 => Ok(ObjectType::Blob),
33            4 => Ok(ObjectType::Tag),
34            5 => Ok(ObjectType::Reserved),
35            6 => Ok(ObjectType::OfsDelta),
36            7 => Ok(ObjectType::RefDelta),
37            _ => Err(PackError::InvalidObjectType(value)),
38        }
39    }
40}
41
42impl fmt::Display for ObjectType {
43    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44        write!(
45            f,
46            "{}",
47            match self {
48                Self::Invalid => "invalid",
49                Self::Commit => "commit",
50                Self::Tree => "tree",
51                Self::Blob => "blob",
52                Self::Tag => "tag",
53                Self::Reserved => "reserved",
54                Self::OfsDelta => "ofs_delta",
55                Self::RefDelta => "ref_delta",
56            }
57        )
58    }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub enum ObjectHeader {
63    Regular {
64        obj_type: ObjectType, // Commit, Tree, Blob, Tag
65        uncompressed_data_size: usize,
66        raw_data: Vec<u8>,
67    },
68    OfsDelta {
69        uncompressed_data_size: usize,
70        base_offset: i64,
71        raw_data: Vec<u8>,
72    },
73    RefDelta {
74        uncompressed_data_size: usize,
75        base_ref: [u8; 20],
76        raw_data: Vec<u8>,
77    },
78}
79
80impl ObjectHeader {
81    // Helper method to get the object type
82    pub fn obj_type(&self) -> ObjectType {
83        match self {
84            Self::Regular { obj_type, .. } => *obj_type,
85            Self::OfsDelta { .. } => ObjectType::OfsDelta,
86            Self::RefDelta { .. } => ObjectType::RefDelta,
87        }
88    }
89
90    // Helper method to get the uncompressed size
91    pub fn uncompressed_data_size(&self) -> usize {
92        match self {
93            Self::Regular {
94                uncompressed_data_size,
95                ..
96            }
97            | Self::OfsDelta {
98                uncompressed_data_size,
99                ..
100            }
101            | Self::RefDelta {
102                uncompressed_data_size,
103                ..
104            } => *uncompressed_data_size,
105        }
106    }
107
108    // Helper method to get the raw header data
109    pub fn raw_data(&self) -> &[u8] {
110        match self {
111            Self::Regular { raw_data, .. }
112            | Self::OfsDelta { raw_data, .. }
113            | Self::RefDelta { raw_data, .. } => raw_data,
114        }
115    }
116
117    /// Every byte of the header has its Most Significant Bit used as
118    /// a continuation bit:
119    /// 0 -> this is the last byte
120    /// 1 -> there is the next byte
121    ///
122    /// After the continuation bit in the first byte there are 3 bits for the type.
123    /// Type 5 is reserved for future expansion. Type 0 is invalid.
124    pub fn parse(input: &[u8]) -> IResult<&[u8], Self> {
125        let original_input = input;
126        let mut i = 0;
127
128        // Check if we have at least one byte
129        if i >= input.len() {
130            return Err(nom::Err::Incomplete(nom::Needed::new(1)));
131        }
132
133        // First byte special handling
134        let first_byte = input[i];
135        i += 1;
136
137        let obj_type: ObjectType = ((first_byte >> 4) & 0x7).try_into().unwrap();
138        let mut size = (first_byte & 0x0F) as usize;
139
140        // If MSB is set, we have more bytes for the size
141        if first_byte & 0x80 != 0 {
142            let mut shift = 4; // We already have 4 bits from the first byte
143
144            // Process additional bytes
145            loop {
146                if i >= input.len() {
147                    return Err(nom::Err::Incomplete(nom::Needed::new(1)));
148                }
149
150                let byte = input[i];
151                i += 1;
152
153                // Add the 7 least significant bits to our size, shifted appropriately
154                size |= ((byte & 0x7F) as usize) << shift;
155                shift += 7;
156
157                // If MSB is 0, we're done
158                if byte & 0x80 == 0 {
159                    break;
160                }
161            }
162        }
163
164        // Handle delta objects
165        let header = match obj_type {
166            ObjectType::OfsDelta => {
167                // Parse variable-length offset encoding (see git packfile format)
168                let mut offset: u64 = 0;
169                let mut c: u8;
170                loop {
171                    if i >= input.len() {
172                        return Err(nom::Err::Incomplete(nom::Needed::new(1)));
173                    }
174                    c = input[i];
175                    i += 1;
176                    offset = (offset << 7) | (u64::from(c) & 0x7F);
177                    if c & 0x80 == 0 {
178                        break;
179                    }
180                }
181                // Calculate header size and store raw data
182                let header_size = i;
183                let raw_data = original_input[..header_size].to_vec();
184
185                // The offset is stored as the distance backwards from the current object's header
186                Self::OfsDelta {
187                    uncompressed_data_size: size,
188                    base_offset: offset as i64,
189                    raw_data,
190                }
191            }
192            ObjectType::RefDelta => {
193                if i + 20 > input.len() {
194                    return Err(nom::Err::Incomplete(nom::Needed::new(20)));
195                }
196                // Read the 20-byte base object SHA-1
197                let mut ref_bytes = [0u8; 20];
198                ref_bytes.copy_from_slice(&input[i..i + 20]);
199                i += 20;
200
201                // Calculate header size and store raw data
202                let header_size = i;
203                let raw_data = original_input[..header_size].to_vec();
204
205                Self::RefDelta {
206                    uncompressed_data_size: size,
207                    base_ref: ref_bytes,
208                    raw_data,
209                }
210            }
211            _ => {
212                // Calculate header size and store raw data for regular objects
213                let header_size = i;
214                let raw_data = original_input[..header_size].to_vec();
215
216                Self::Regular {
217                    obj_type,
218                    uncompressed_data_size: size,
219                    raw_data,
220                }
221            }
222        };
223
224        Ok((&input[i..], header))
225    }
226}
227
228#[derive(Debug, Clone)]
229pub struct Object {
230    pub header: ObjectHeader,
231    pub uncompressed_data: Vec<u8>,
232    pub compressed_data: Vec<u8>, // Raw compressed bytes
233    pub compressed_size: usize,   // Size of the compressed data
234    pub data_offset: usize,       // Where compressed data begins
235}
236
237impl Object {
238    pub fn parse(input: &[u8]) -> IResult<&[u8], Self> {
239        let (input, header) = ObjectHeader::parse(input)?;
240        let pre_parse_input_size = input.len();
241        let (remaining_input, data) = Self::parse_data(input, header.uncompressed_data_size())?;
242        let compressed_size = pre_parse_input_size - remaining_input.len();
243
244        // Store the compressed data bytes (before they were consumed by parse_data)
245        let compressed_data = input[..compressed_size].to_vec();
246
247        // If this is a delta object, parse and display the delta instructions
248        let obj_type = header.obj_type();
249        let uncompressed_data =
250            if obj_type == ObjectType::OfsDelta || obj_type == ObjectType::RefDelta {
251                delta::parse_delta_object(&data)
252            } else {
253                data
254            };
255
256        Ok((
257            remaining_input,
258            Self {
259                header,
260                uncompressed_data,
261                compressed_data,
262                compressed_size,
263                data_offset: 0,
264            },
265        ))
266    }
267
268    /// Parses the compressed object data.
269    /// Returns the decompressed data and the remaining input.
270    /// The input should start with a zlib header (0x78).
271    fn parse_data(input: &[u8], max_size: usize) -> IResult<&[u8], Vec<u8>> {
272        // Check for zlib header
273        if input.is_empty() || input[0] != 0x78 {
274            return Err(nom::Err::Error(Error::new(input, ErrorKind::Tag)));
275        }
276
277        // Create a decoder
278        let mut decoder = ZlibDecoder::new(input);
279        let mut decompressed = Vec::with_capacity(max_size);
280
281        // Read all decompressed data
282        match decoder.read_to_end(&mut decompressed) {
283            Ok(_) => {
284                // Get the number of bytes consumed by the decoder
285                let consumed = usize::try_from(decoder.total_in()).unwrap();
286                Ok((&input[consumed..], decompressed))
287            }
288            Err(_) => Err(nom::Err::Error(Error::new(input, ErrorKind::Tag))),
289        }
290    }
291}
292
293impl fmt::Display for Object {
294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295        let obj_type = self.header.obj_type();
296        writeln!(f, "Object type: {obj_type}")?;
297        writeln!(f, "Object size: {}", self.header.uncompressed_data_size())?;
298        writeln!(f, "Object compressed size: {}", self.compressed_size)?;
299
300        if obj_type == ObjectType::OfsDelta || obj_type == ObjectType::RefDelta {
301            if let Ok((_, instructions)) = delta::parse_delta_instructions(&self.uncompressed_data)
302            {
303                writeln!(f, "Delta instructions:")?;
304                for (i, instruction) in instructions.iter().enumerate() {
305                    writeln!(f, "  {}. {}", i + 1, instruction)?;
306                }
307            }
308        } else {
309            writeln!(
310                f,
311                "Object data: {:?}",
312                String::from_utf8_lossy(&self.uncompressed_data)
313            )?;
314        }
315        Ok(())
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use std::io::Write;
323
324    #[test]
325    fn parse_object_header() {
326        // 9    e    0    e    7    8
327        // 1001 1110 0000 1110 0111 1000
328        // 1 - continuation bit
329        // 001 - object type
330        // 1110 - less significant part of uncompressed size
331        // 0 - continuation bit
332        // 111 1000 - more significant part of uncompressed size
333        let data = &[0x9e, 0x0e, 0x78];
334
335        let (_, header) = ObjectHeader::parse(data).unwrap();
336        match header {
337            ObjectHeader::Regular {
338                obj_type,
339                uncompressed_data_size,
340                raw_data,
341            } => {
342                assert_eq!(obj_type, ObjectType::Commit);
343                assert_eq!(uncompressed_data_size, 238);
344                // Verify raw data contains the first 2 bytes (header portion)
345                assert_eq!(raw_data.len(), 2);
346                assert_eq!(raw_data, vec![0x9e, 0x0e]);
347            }
348            _ => panic!("Expected Regular header variant"),
349        }
350    }
351
352    #[test]
353    fn parse_object_data() {
354        // TODO: use a real object
355        let test_data = b"Hello, World!";
356        let mut encoder =
357            flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::default());
358        encoder.write_all(test_data).unwrap();
359        let compressed = encoder.finish().unwrap();
360
361        // Parse the compressed data
362        let (remaining, decompressed) = Object::parse_data(&compressed, test_data.len()).unwrap();
363
364        // Verify the decompressed data matches the original
365        assert_eq!(decompressed, test_data);
366        // Verify we consumed all the compressed data
367        assert!(remaining.is_empty());
368    }
369}