guts_storage/
object.rs

1//! Git object types and utilities.
2
3use crate::{Result, StorageError};
4use bytes::Bytes;
5use serde::{Deserialize, Deserializer, Serialize, Serializer};
6use sha1::{Digest, Sha1};
7use std::fmt;
8
9/// A 20-byte SHA-1 object identifier.
10#[derive(Clone, Copy, PartialEq, Eq, Hash)]
11pub struct ObjectId([u8; 20]);
12
13impl Serialize for ObjectId {
14    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
15    where
16        S: Serializer,
17    {
18        serializer.serialize_str(&self.to_hex())
19    }
20}
21
22impl<'de> Deserialize<'de> for ObjectId {
23    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
24    where
25        D: Deserializer<'de>,
26    {
27        let s = String::deserialize(deserializer)?;
28        ObjectId::from_hex(&s).map_err(serde::de::Error::custom)
29    }
30}
31
32impl ObjectId {
33    /// Creates an ObjectId from raw bytes.
34    pub fn from_bytes(bytes: [u8; 20]) -> Self {
35        Self(bytes)
36    }
37
38    /// Creates an ObjectId from a hex string.
39    pub fn from_hex(hex: &str) -> Result<Self> {
40        if hex.len() != 40 {
41            return Err(StorageError::InvalidObject(format!(
42                "invalid object id length: {}",
43                hex.len()
44            )));
45        }
46        let mut bytes = [0u8; 20];
47        hex::decode_to_slice(hex, &mut bytes)
48            .map_err(|e| StorageError::InvalidObject(e.to_string()))?;
49        Ok(Self(bytes))
50    }
51
52    /// Returns the raw bytes.
53    pub fn as_bytes(&self) -> &[u8; 20] {
54        &self.0
55    }
56
57    /// Returns the hex representation.
58    pub fn to_hex(&self) -> String {
59        hex::encode(self.0)
60    }
61
62    /// Computes the SHA-1 hash of data with a git object header.
63    pub fn hash_object(object_type: ObjectType, data: &[u8]) -> Self {
64        let header = format!("{} {}\0", object_type.as_str(), data.len());
65        let mut hasher = Sha1::new();
66        hasher.update(header.as_bytes());
67        hasher.update(data);
68        let result = hasher.finalize();
69        let mut bytes = [0u8; 20];
70        bytes.copy_from_slice(&result);
71        Self(bytes)
72    }
73}
74
75impl fmt::Debug for ObjectId {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        write!(f, "ObjectId({})", self.to_hex())
78    }
79}
80
81impl fmt::Display for ObjectId {
82    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83        write!(f, "{}", self.to_hex())
84    }
85}
86
87/// Git object types.
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89pub enum ObjectType {
90    /// File content.
91    Blob,
92    /// Directory listing.
93    Tree,
94    /// Commit object.
95    Commit,
96    /// Annotated tag.
97    Tag,
98}
99
100impl ObjectType {
101    /// Returns the string representation used in git.
102    pub fn as_str(&self) -> &'static str {
103        match self {
104            Self::Blob => "blob",
105            Self::Tree => "tree",
106            Self::Commit => "commit",
107            Self::Tag => "tag",
108        }
109    }
110
111    /// Parses an object type from a string.
112    pub fn parse(s: &str) -> Result<Self> {
113        match s {
114            "blob" => Ok(Self::Blob),
115            "tree" => Ok(Self::Tree),
116            "commit" => Ok(Self::Commit),
117            "tag" => Ok(Self::Tag),
118            _ => Err(StorageError::InvalidObject(format!(
119                "unknown object type: {}",
120                s
121            ))),
122        }
123    }
124
125    /// Returns the type code used in pack files.
126    pub fn pack_type(&self) -> u8 {
127        match self {
128            Self::Commit => 1,
129            Self::Tree => 2,
130            Self::Blob => 3,
131            Self::Tag => 4,
132        }
133    }
134
135    /// Parses an object type from a pack file type code.
136    pub fn from_pack_type(code: u8) -> Result<Self> {
137        match code {
138            1 => Ok(Self::Commit),
139            2 => Ok(Self::Tree),
140            3 => Ok(Self::Blob),
141            4 => Ok(Self::Tag),
142            _ => Err(StorageError::InvalidObject(format!(
143                "unknown pack type: {}",
144                code
145            ))),
146        }
147    }
148}
149
150/// A git object (blob, tree, commit, or tag).
151#[derive(Debug, Clone)]
152pub struct GitObject {
153    /// The object's unique identifier (SHA-1 hash).
154    pub id: ObjectId,
155    /// The type of object.
156    pub object_type: ObjectType,
157    /// The raw object data (uncompressed).
158    pub data: Bytes,
159}
160
161impl GitObject {
162    /// Creates a new git object, computing its ID from the data.
163    pub fn new(object_type: ObjectType, data: impl Into<Bytes>) -> Self {
164        let data = data.into();
165        let id = ObjectId::hash_object(object_type, &data);
166        Self {
167            id,
168            object_type,
169            data,
170        }
171    }
172
173    /// Creates a blob object from file content.
174    pub fn blob(content: impl Into<Bytes>) -> Self {
175        Self::new(ObjectType::Blob, content)
176    }
177
178    /// Creates a commit object.
179    pub fn commit(
180        tree_id: &ObjectId,
181        parents: &[ObjectId],
182        author: &str,
183        committer: &str,
184        message: &str,
185    ) -> Self {
186        let mut content = format!("tree {}\n", tree_id);
187        for parent in parents {
188            content.push_str(&format!("parent {}\n", parent));
189        }
190        content.push_str(&format!("author {}\n", author));
191        content.push_str(&format!("committer {}\n", committer));
192        content.push_str(&format!("\n{}", message));
193        Self::new(ObjectType::Commit, content.into_bytes())
194    }
195
196    /// Returns the size of the object data.
197    pub fn size(&self) -> usize {
198        self.data.len()
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_object_id_hex_roundtrip() {
208        let hex = "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3";
209        let id = ObjectId::from_hex(hex).unwrap();
210        assert_eq!(id.to_hex(), hex);
211    }
212
213    #[test]
214    fn test_blob_hash() {
215        // "hello\n" should hash to a well-known value
216        let obj = GitObject::blob(b"hello\n".to_vec());
217        // This is the actual git hash for "hello\n"
218        assert_eq!(obj.id.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
219    }
220
221    #[test]
222    fn test_object_type_roundtrip() {
223        for ot in [
224            ObjectType::Blob,
225            ObjectType::Tree,
226            ObjectType::Commit,
227            ObjectType::Tag,
228        ] {
229            let s = ot.as_str();
230            let parsed = ObjectType::parse(s).unwrap();
231            assert_eq!(ot, parsed);
232        }
233    }
234
235    #[test]
236    fn test_object_id_from_bytes() {
237        let bytes = [0xab; 20];
238        let id = ObjectId::from_bytes(bytes);
239        assert_eq!(*id.as_bytes(), bytes);
240    }
241
242    #[test]
243    fn test_object_id_invalid_hex_length() {
244        let result = ObjectId::from_hex("abc");
245        assert!(result.is_err());
246
247        let result = ObjectId::from_hex("a94a8fe5ccb19ba61c4c0873d391e987982fbbd3ff");
248        assert!(result.is_err());
249    }
250
251    #[test]
252    fn test_object_id_invalid_hex_chars() {
253        let result = ObjectId::from_hex("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
254        assert!(result.is_err());
255    }
256
257    #[test]
258    fn test_object_id_display() {
259        let id = ObjectId::from_bytes([0u8; 20]);
260        assert_eq!(format!("{}", id), "0".repeat(40));
261    }
262
263    #[test]
264    fn test_object_id_debug() {
265        let id = ObjectId::from_bytes([0u8; 20]);
266        let debug = format!("{:?}", id);
267        assert!(debug.contains("ObjectId"));
268        assert!(debug.contains(&"0".repeat(40)));
269    }
270
271    #[test]
272    fn test_object_type_parse_invalid() {
273        let result = ObjectType::parse("invalid");
274        assert!(result.is_err());
275    }
276
277    #[test]
278    fn test_object_type_pack_type_roundtrip() {
279        for ot in [
280            ObjectType::Commit,
281            ObjectType::Tree,
282            ObjectType::Blob,
283            ObjectType::Tag,
284        ] {
285            let code = ot.pack_type();
286            let parsed = ObjectType::from_pack_type(code).unwrap();
287            assert_eq!(ot, parsed);
288        }
289    }
290
291    #[test]
292    fn test_object_type_from_pack_type_invalid() {
293        assert!(ObjectType::from_pack_type(0).is_err());
294        assert!(ObjectType::from_pack_type(5).is_err());
295        assert!(ObjectType::from_pack_type(255).is_err());
296    }
297
298    #[test]
299    fn test_git_object_blob() {
300        let content = b"Hello, World!";
301        let obj = GitObject::blob(content.to_vec());
302
303        assert_eq!(obj.object_type, ObjectType::Blob);
304        assert_eq!(obj.data.as_ref(), content);
305        assert_eq!(obj.size(), content.len());
306    }
307
308    #[test]
309    fn test_git_object_commit() {
310        let tree_id = ObjectId::from_bytes([1u8; 20]);
311        let parents = vec![ObjectId::from_bytes([2u8; 20])];
312        let author = "Alice <alice@example.com> 1234567890 +0000";
313        let committer = "Bob <bob@example.com> 1234567890 +0000";
314        let message = "Initial commit";
315
316        let obj = GitObject::commit(&tree_id, &parents, author, committer, message);
317
318        assert_eq!(obj.object_type, ObjectType::Commit);
319        let content = String::from_utf8_lossy(&obj.data);
320        assert!(content.contains(&format!("tree {}", tree_id)));
321        assert!(content.contains(&format!("parent {}", parents[0])));
322        assert!(content.contains(author));
323        assert!(content.contains(message));
324    }
325
326    #[test]
327    fn test_git_object_commit_no_parents() {
328        let tree_id = ObjectId::from_bytes([1u8; 20]);
329        let parents: Vec<ObjectId> = vec![];
330        let author = "Alice <alice@example.com>";
331        let message = "First commit";
332
333        let obj = GitObject::commit(&tree_id, &parents, author, author, message);
334
335        let content = String::from_utf8_lossy(&obj.data);
336        assert!(!content.contains("parent"));
337    }
338
339    #[test]
340    fn test_git_object_commit_multiple_parents() {
341        let tree_id = ObjectId::from_bytes([1u8; 20]);
342        let parents = vec![
343            ObjectId::from_bytes([2u8; 20]),
344            ObjectId::from_bytes([3u8; 20]),
345        ];
346        let author = "Alice <alice@example.com>";
347        let message = "Merge commit";
348
349        let obj = GitObject::commit(&tree_id, &parents, author, author, message);
350
351        let content = String::from_utf8_lossy(&obj.data);
352        assert!(content.contains(&format!("parent {}", parents[0])));
353        assert!(content.contains(&format!("parent {}", parents[1])));
354    }
355
356    #[test]
357    fn test_git_object_new() {
358        let data = b"tree data";
359        let obj = GitObject::new(ObjectType::Tree, data.to_vec());
360
361        assert_eq!(obj.object_type, ObjectType::Tree);
362        assert_eq!(obj.data.as_ref(), data);
363    }
364
365    #[test]
366    fn test_object_id_hash_object() {
367        // Known git hash for "blob 4\0test"
368        let id = ObjectId::hash_object(ObjectType::Blob, b"test");
369        assert_eq!(id.to_hex().len(), 40);
370    }
371
372    #[test]
373    fn test_git_object_clone() {
374        let obj = GitObject::blob(b"data".to_vec());
375        let cloned = obj.clone();
376
377        assert_eq!(obj.id, cloned.id);
378        assert_eq!(obj.object_type, cloned.object_type);
379        assert_eq!(obj.data, cloned.data);
380    }
381
382    #[test]
383    fn test_object_id_serialization() {
384        let id = ObjectId::from_bytes([0xab; 20]);
385        let json = serde_json::to_string(&id).unwrap();
386        let parsed: ObjectId = serde_json::from_str(&json).unwrap();
387
388        assert_eq!(id, parsed);
389    }
390
391    #[test]
392    fn test_object_id_equality() {
393        let id1 = ObjectId::from_bytes([1u8; 20]);
394        let id2 = ObjectId::from_bytes([1u8; 20]);
395        let id3 = ObjectId::from_bytes([2u8; 20]);
396
397        assert_eq!(id1, id2);
398        assert_ne!(id1, id3);
399    }
400
401    #[test]
402    fn test_object_id_hash_trait() {
403        use std::collections::HashSet;
404
405        let id1 = ObjectId::from_bytes([1u8; 20]);
406        let id2 = ObjectId::from_bytes([2u8; 20]);
407
408        let mut set = HashSet::new();
409        set.insert(id1);
410        set.insert(id2);
411        set.insert(id1); // Duplicate
412
413        assert_eq!(set.len(), 2);
414    }
415
416    #[test]
417    fn test_object_id_copy_trait() {
418        let id1 = ObjectId::from_bytes([1u8; 20]);
419        let id2 = id1; // Copy
420        assert_eq!(id1, id2);
421    }
422
423    #[test]
424    fn test_git_object_size() {
425        let obj = GitObject::blob(b"12345".to_vec());
426        assert_eq!(obj.size(), 5);
427    }
428
429    #[test]
430    fn test_git_object_empty_blob() {
431        let obj = GitObject::blob(b"".to_vec());
432        assert_eq!(obj.size(), 0);
433        // Empty blob has a known git hash
434        assert_eq!(obj.id.to_hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
435    }
436}