hashtree_git/
object.rs

1//! Git object types and serialization
2//!
3//! Git has four object types: blob, tree, commit, and tag.
4//! Each is content-addressed by SHA-1 hash of: "{type} {size}\0{content}"
5
6use sha1::{Sha1, Digest};
7use std::fmt;
8
9/// The four git object types
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum ObjectType {
12    Blob,
13    Tree,
14    Commit,
15    Tag,
16}
17
18impl ObjectType {
19    pub fn as_str(&self) -> &'static str {
20        match self {
21            ObjectType::Blob => "blob",
22            ObjectType::Tree => "tree",
23            ObjectType::Commit => "commit",
24            ObjectType::Tag => "tag",
25        }
26    }
27
28    pub fn from_str(s: &str) -> Option<Self> {
29        match s {
30            "blob" => Some(ObjectType::Blob),
31            "tree" => Some(ObjectType::Tree),
32            "commit" => Some(ObjectType::Commit),
33            "tag" => Some(ObjectType::Tag),
34            _ => None,
35        }
36    }
37}
38
39impl fmt::Display for ObjectType {
40    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41        write!(f, "{}", self.as_str())
42    }
43}
44
45/// A 20-byte SHA-1 object ID
46#[derive(Clone, Copy, PartialEq, Eq, Hash)]
47pub struct ObjectId([u8; 20]);
48
49impl ObjectId {
50    pub const ZERO: ObjectId = ObjectId([0u8; 20]);
51
52    pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
53        if bytes.len() == 20 {
54            let mut arr = [0u8; 20];
55            arr.copy_from_slice(bytes);
56            Some(ObjectId(arr))
57        } else {
58            None
59        }
60    }
61
62    pub fn from_hex(hex: &str) -> Option<Self> {
63        if hex.len() != 40 {
64            return None;
65        }
66        let bytes = hex::decode(hex).ok()?;
67        Self::from_bytes(&bytes)
68    }
69
70    pub fn as_bytes(&self) -> &[u8; 20] {
71        &self.0
72    }
73
74    pub fn to_hex(&self) -> String {
75        hex::encode(self.0)
76    }
77
78    /// Compute object ID from raw object data (type + content)
79    pub fn hash_object(obj_type: ObjectType, content: &[u8]) -> Self {
80        let header = format!("{} {}\0", obj_type.as_str(), content.len());
81        let mut hasher = Sha1::new();
82        hasher.update(header.as_bytes());
83        hasher.update(content);
84        let result = hasher.finalize();
85        let mut id = [0u8; 20];
86        id.copy_from_slice(&result);
87        ObjectId(id)
88    }
89}
90
91impl fmt::Debug for ObjectId {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        write!(f, "ObjectId({})", self.to_hex())
94    }
95}
96
97impl fmt::Display for ObjectId {
98    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99        write!(f, "{}", self.to_hex())
100    }
101}
102
103/// A git object with type and content
104#[derive(Debug, Clone)]
105pub struct GitObject {
106    pub obj_type: ObjectType,
107    pub content: Vec<u8>,
108}
109
110impl GitObject {
111    pub fn new(obj_type: ObjectType, content: Vec<u8>) -> Self {
112        Self { obj_type, content }
113    }
114
115    pub fn blob(content: Vec<u8>) -> Self {
116        Self::new(ObjectType::Blob, content)
117    }
118
119    pub fn id(&self) -> ObjectId {
120        ObjectId::hash_object(self.obj_type, &self.content)
121    }
122
123    /// Serialize to loose object format (for storage)
124    pub fn to_loose_format(&self) -> Vec<u8> {
125        let header = format!("{} {}\0", self.obj_type.as_str(), self.content.len());
126        let mut data = header.into_bytes();
127        data.extend_from_slice(&self.content);
128        data
129    }
130
131    /// Parse from loose object format
132    pub fn from_loose_format(data: &[u8]) -> crate::Result<Self> {
133        let null_pos = data.iter().position(|&b| b == 0)
134            .ok_or_else(|| crate::Error::InvalidObjectFormat("missing null byte".into()))?;
135
136        let header = std::str::from_utf8(&data[..null_pos])
137            .map_err(|_| crate::Error::InvalidObjectFormat("invalid header".into()))?;
138
139        let mut parts = header.split(' ');
140        let type_str = parts.next()
141            .ok_or_else(|| crate::Error::InvalidObjectFormat("missing type".into()))?;
142        let size_str = parts.next()
143            .ok_or_else(|| crate::Error::InvalidObjectFormat("missing size".into()))?;
144
145        let obj_type = ObjectType::from_str(type_str)
146            .ok_or_else(|| crate::Error::InvalidObjectType(type_str.into()))?;
147        let size: usize = size_str.parse()
148            .map_err(|_| crate::Error::InvalidObjectFormat("invalid size".into()))?;
149
150        let content = data[null_pos + 1..].to_vec();
151        if content.len() != size {
152            return Err(crate::Error::InvalidObjectFormat(
153                format!("size mismatch: expected {}, got {}", size, content.len())
154            ));
155        }
156
157        Ok(Self { obj_type, content })
158    }
159}
160
161/// Tree entry (mode, name, object id)
162#[derive(Debug, Clone)]
163pub struct TreeEntry {
164    pub mode: u32,
165    pub name: String,
166    pub oid: ObjectId,
167}
168
169impl TreeEntry {
170    pub fn new(mode: u32, name: String, oid: ObjectId) -> Self {
171        Self { mode, name, oid }
172    }
173
174    /// Parse mode from octal string
175    pub fn mode_str(&self) -> String {
176        format!("{:o}", self.mode)
177    }
178
179    pub fn is_tree(&self) -> bool {
180        self.mode == 0o40000
181    }
182
183    pub fn is_blob(&self) -> bool {
184        self.mode == 0o100644 || self.mode == 0o100755
185    }
186}
187
188/// Parse tree content into entries
189pub fn parse_tree(content: &[u8]) -> crate::Result<Vec<TreeEntry>> {
190    let mut entries = Vec::new();
191    let mut pos = 0;
192
193    while pos < content.len() {
194        // Find space after mode
195        let space_pos = content[pos..].iter().position(|&b| b == b' ')
196            .ok_or_else(|| crate::Error::InvalidObjectFormat("tree: missing space".into()))?;
197        let mode_str = std::str::from_utf8(&content[pos..pos + space_pos])
198            .map_err(|_| crate::Error::InvalidObjectFormat("tree: invalid mode".into()))?;
199        let mode = u32::from_str_radix(mode_str, 8)
200            .map_err(|_| crate::Error::InvalidObjectFormat("tree: invalid mode octal".into()))?;
201        pos += space_pos + 1;
202
203        // Find null after name
204        let null_pos = content[pos..].iter().position(|&b| b == 0)
205            .ok_or_else(|| crate::Error::InvalidObjectFormat("tree: missing null".into()))?;
206        let name = std::str::from_utf8(&content[pos..pos + null_pos])
207            .map_err(|_| crate::Error::InvalidObjectFormat("tree: invalid name".into()))?
208            .to_string();
209        pos += null_pos + 1;
210
211        // Read 20-byte SHA
212        if pos + 20 > content.len() {
213            return Err(crate::Error::InvalidObjectFormat("tree: truncated sha".into()));
214        }
215        let oid = ObjectId::from_bytes(&content[pos..pos + 20])
216            .ok_or_else(|| crate::Error::InvalidObjectFormat("tree: invalid sha".into()))?;
217        pos += 20;
218
219        entries.push(TreeEntry { mode, name, oid });
220    }
221
222    Ok(entries)
223}
224
225/// Serialize tree entries to content
226pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
227    let mut content = Vec::new();
228    for entry in entries {
229        content.extend_from_slice(entry.mode_str().as_bytes());
230        content.push(b' ');
231        content.extend_from_slice(entry.name.as_bytes());
232        content.push(0);
233        content.extend_from_slice(entry.oid.as_bytes());
234    }
235    content
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_object_id_hex() {
244        let hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
245        let oid = ObjectId::from_hex(hex).unwrap();
246        assert_eq!(oid.to_hex(), hex);
247    }
248
249    #[test]
250    fn test_blob_hash() {
251        // Empty blob has known hash
252        let empty_blob_hash = "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391";
253        let oid = ObjectId::hash_object(ObjectType::Blob, &[]);
254        assert_eq!(oid.to_hex(), empty_blob_hash);
255    }
256
257    #[test]
258    fn test_hello_world_blob() {
259        // "hello world\n" has known hash
260        let content = b"hello world\n";
261        let expected = "3b18e512dba79e4c8300dd08aeb37f8e728b8dad";
262        let oid = ObjectId::hash_object(ObjectType::Blob, content);
263        assert_eq!(oid.to_hex(), expected);
264    }
265
266    #[test]
267    fn test_loose_format_roundtrip() {
268        let obj = GitObject::blob(b"test content".to_vec());
269        let loose = obj.to_loose_format();
270        let parsed = GitObject::from_loose_format(&loose).unwrap();
271        assert_eq!(parsed.obj_type, ObjectType::Blob);
272        assert_eq!(parsed.content, b"test content");
273    }
274}