Skip to main content

git_remote_htree/git/
object.rs

1//! Git object types and serialization
2//!
3//! Git has four object types: blob, tree, commit, and tag.
4//! Each is content-addressed by SHA-1 hash of: "{type} {size}\0{content}"
5
6use super::{Error, Result};
7use sha1::{Digest, Sha1};
8use std::fmt;
9
10/// The four git object types
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum ObjectType {
13    Blob,
14    Tree,
15    Commit,
16    Tag,
17}
18
19impl ObjectType {
20    pub fn as_str(&self) -> &'static str {
21        match self {
22            ObjectType::Blob => "blob",
23            ObjectType::Tree => "tree",
24            ObjectType::Commit => "commit",
25            ObjectType::Tag => "tag",
26        }
27    }
28
29    #[allow(dead_code)]
30    pub fn from_str(s: &str) -> Option<Self> {
31        match s {
32            "blob" => Some(ObjectType::Blob),
33            "tree" => Some(ObjectType::Tree),
34            "commit" => Some(ObjectType::Commit),
35            "tag" => Some(ObjectType::Tag),
36            _ => None,
37        }
38    }
39}
40
41impl fmt::Display for ObjectType {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        write!(f, "{}", self.as_str())
44    }
45}
46
47/// A 20-byte SHA-1 object ID
48#[derive(Clone, Copy, PartialEq, Eq, Hash)]
49pub struct ObjectId([u8; 20]);
50
51impl ObjectId {
52    pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
53        if bytes.len() == 20 {
54            let mut arr = [0u8; 20];
55            arr.copy_from_slice(bytes);
56            Some(ObjectId(arr))
57        } else {
58            None
59        }
60    }
61
62    pub fn from_hex(hex: &str) -> Option<Self> {
63        if hex.len() != 40 {
64            return None;
65        }
66        let bytes = hex::decode(hex).ok()?;
67        Self::from_bytes(&bytes)
68    }
69
70    pub fn to_hex(&self) -> String {
71        hex::encode(self.0)
72    }
73
74    /// Compute object ID from raw object data (type + content)
75    pub fn hash_object(obj_type: ObjectType, content: &[u8]) -> Self {
76        let header = format!("{} {}\0", obj_type.as_str(), content.len());
77        let mut hasher = Sha1::new();
78        hasher.update(header.as_bytes());
79        hasher.update(content);
80        let result = hasher.finalize();
81        let mut id = [0u8; 20];
82        id.copy_from_slice(&result);
83        ObjectId(id)
84    }
85}
86
87impl fmt::Debug for ObjectId {
88    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89        write!(f, "ObjectId({})", self.to_hex())
90    }
91}
92
93impl fmt::Display for ObjectId {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        write!(f, "{}", self.to_hex())
96    }
97}
98
99/// A git object with type and content
100#[derive(Debug, Clone)]
101pub struct GitObject {
102    pub obj_type: ObjectType,
103    pub content: Vec<u8>,
104}
105
106impl GitObject {
107    pub fn new(obj_type: ObjectType, content: Vec<u8>) -> Self {
108        Self { obj_type, content }
109    }
110
111    pub fn id(&self) -> ObjectId {
112        ObjectId::hash_object(self.obj_type, &self.content)
113    }
114
115    /// Serialize to loose object format (for storage)
116    pub fn to_loose_format(&self) -> Vec<u8> {
117        let header = format!("{} {}\0", self.obj_type.as_str(), self.content.len());
118        let mut data = header.into_bytes();
119        data.extend_from_slice(&self.content);
120        data
121    }
122
123    /// Parse from loose object format
124    #[allow(dead_code)]
125    pub fn from_loose_format(data: &[u8]) -> Result<Self> {
126        let null_pos = data
127            .iter()
128            .position(|&b| b == 0)
129            .ok_or_else(|| Error::InvalidObjectFormat("missing null byte".into()))?;
130
131        let header = std::str::from_utf8(&data[..null_pos])
132            .map_err(|_| Error::InvalidObjectFormat("invalid header".into()))?;
133
134        let mut parts = header.split(' ');
135        let type_str = parts
136            .next()
137            .ok_or_else(|| Error::InvalidObjectFormat("missing type".into()))?;
138        let size_str = parts
139            .next()
140            .ok_or_else(|| Error::InvalidObjectFormat("missing size".into()))?;
141
142        let obj_type = ObjectType::from_str(type_str)
143            .ok_or_else(|| Error::InvalidObjectType(type_str.into()))?;
144        let size: usize = size_str
145            .parse()
146            .map_err(|_| Error::InvalidObjectFormat("invalid size".into()))?;
147
148        let content = data[null_pos + 1..].to_vec();
149        if content.len() != size {
150            return Err(Error::InvalidObjectFormat(format!(
151                "size mismatch: expected {}, got {}",
152                size,
153                content.len()
154            )));
155        }
156
157        Ok(Self { obj_type, content })
158    }
159}
160
161/// Tree entry (mode, name, object id)
162#[derive(Debug, Clone)]
163pub struct TreeEntry {
164    pub mode: u32,
165    pub name: String,
166    pub oid: ObjectId,
167}
168
169impl TreeEntry {
170    pub fn is_tree(&self) -> bool {
171        self.mode == 0o40000
172    }
173}
174
175/// Parse tree content into entries
176pub fn parse_tree(content: &[u8]) -> Result<Vec<TreeEntry>> {
177    let mut entries = Vec::new();
178    let mut pos = 0;
179
180    while pos < content.len() {
181        let space_pos = content[pos..]
182            .iter()
183            .position(|&b| b == b' ')
184            .ok_or_else(|| Error::InvalidObjectFormat("tree: missing space".into()))?;
185        let mode_str = std::str::from_utf8(&content[pos..pos + space_pos])
186            .map_err(|_| Error::InvalidObjectFormat("tree: invalid mode".into()))?;
187        let mode = u32::from_str_radix(mode_str, 8)
188            .map_err(|_| Error::InvalidObjectFormat("tree: invalid mode octal".into()))?;
189        pos += space_pos + 1;
190
191        let null_pos = content[pos..]
192            .iter()
193            .position(|&b| b == 0)
194            .ok_or_else(|| Error::InvalidObjectFormat("tree: missing null".into()))?;
195        let name = std::str::from_utf8(&content[pos..pos + null_pos])
196            .map_err(|_| Error::InvalidObjectFormat("tree: invalid name".into()))?
197            .to_string();
198        pos += null_pos + 1;
199
200        if pos + 20 > content.len() {
201            return Err(Error::InvalidObjectFormat("tree: truncated sha".into()));
202        }
203        let oid = ObjectId::from_bytes(&content[pos..pos + 20])
204            .ok_or_else(|| Error::InvalidObjectFormat("tree: invalid sha".into()))?;
205        pos += 20;
206
207        entries.push(TreeEntry { mode, name, oid });
208    }
209
210    Ok(entries)
211}
212
213// Test helpers
214#[cfg(test)]
215pub use test_helpers::*;
216
217#[cfg(test)]
218mod test_helpers {
219    use super::*;
220
221    impl TreeEntry {
222        pub fn new(mode: u32, name: String, oid: ObjectId) -> Self {
223            Self { mode, name, oid }
224        }
225    }
226
227    pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
228        let mut content = Vec::new();
229        for entry in entries {
230            content.extend_from_slice(format!("{:o}", entry.mode).as_bytes());
231            content.push(b' ');
232            content.extend_from_slice(entry.name.as_bytes());
233            content.push(0);
234            content.extend_from_slice(&entry.oid.0);
235        }
236        content
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn test_object_id_hex() {
246        let hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
247        let oid = ObjectId::from_hex(hex).unwrap();
248        assert_eq!(oid.to_hex(), hex);
249    }
250
251    #[test]
252    fn test_blob_hash() {
253        let empty_blob_hash = "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391";
254        let oid = ObjectId::hash_object(ObjectType::Blob, &[]);
255        assert_eq!(oid.to_hex(), empty_blob_hash);
256    }
257
258    #[test]
259    fn test_hello_world_blob() {
260        let content = b"hello world\n";
261        let expected = "3b18e512dba79e4c8300dd08aeb37f8e728b8dad";
262        let oid = ObjectId::hash_object(ObjectType::Blob, content);
263        assert_eq!(oid.to_hex(), expected);
264    }
265
266    #[test]
267    fn test_loose_format_roundtrip() {
268        let obj = GitObject::new(ObjectType::Blob, b"test content".to_vec());
269        let loose = obj.to_loose_format();
270        let parsed = GitObject::from_loose_format(&loose).unwrap();
271        assert_eq!(parsed.obj_type, ObjectType::Blob);
272        assert_eq!(parsed.content, b"test content");
273    }
274}