Skip to main content

git_remote_htree/git/
object.rs

1//! Git object types and serialization
2//!
3//! Git has four object types: blob, tree, commit, and tag.
4//! Each is content-addressed by SHA-1 hash of: "{type} {size}\0{content}"
5
6use super::{Error, Result};
7use sha1::{Digest, Sha1};
8use std::{fmt, str::FromStr};
9
10/// The four git object types
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum ObjectType {
13    Blob,
14    Tree,
15    Commit,
16    Tag,
17}
18
19impl ObjectType {
20    pub fn as_str(&self) -> &'static str {
21        match self {
22            ObjectType::Blob => "blob",
23            ObjectType::Tree => "tree",
24            ObjectType::Commit => "commit",
25            ObjectType::Tag => "tag",
26        }
27    }
28
29    #[allow(dead_code)]
30    pub fn parse(s: &str) -> Option<Self> {
31        match s {
32            "blob" => Some(ObjectType::Blob),
33            "tree" => Some(ObjectType::Tree),
34            "commit" => Some(ObjectType::Commit),
35            "tag" => Some(ObjectType::Tag),
36            _ => None,
37        }
38    }
39}
40
41impl FromStr for ObjectType {
42    type Err = ();
43
44    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
45        Self::parse(s).ok_or(())
46    }
47}
48
49impl fmt::Display for ObjectType {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        write!(f, "{}", self.as_str())
52    }
53}
54
55/// A 20-byte SHA-1 object ID
56#[derive(Clone, Copy, PartialEq, Eq, Hash)]
57pub struct ObjectId([u8; 20]);
58
59impl ObjectId {
60    pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
61        if bytes.len() == 20 {
62            let mut arr = [0u8; 20];
63            arr.copy_from_slice(bytes);
64            Some(ObjectId(arr))
65        } else {
66            None
67        }
68    }
69
70    pub fn from_hex(hex: &str) -> Option<Self> {
71        if hex.len() != 40 {
72            return None;
73        }
74        let bytes = hex::decode(hex).ok()?;
75        Self::from_bytes(&bytes)
76    }
77
78    pub fn to_hex(&self) -> String {
79        hex::encode(self.0)
80    }
81
82    /// Compute object ID from raw object data (type + content)
83    pub fn hash_object(obj_type: ObjectType, content: &[u8]) -> Self {
84        let header = format!("{} {}\0", obj_type.as_str(), content.len());
85        let mut hasher = Sha1::new();
86        hasher.update(header.as_bytes());
87        hasher.update(content);
88        let result = hasher.finalize();
89        let mut id = [0u8; 20];
90        id.copy_from_slice(&result);
91        ObjectId(id)
92    }
93}
94
95impl fmt::Debug for ObjectId {
96    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97        write!(f, "ObjectId({})", self.to_hex())
98    }
99}
100
101impl fmt::Display for ObjectId {
102    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103        write!(f, "{}", self.to_hex())
104    }
105}
106
107/// A git object with type and content
108#[derive(Debug, Clone)]
109pub struct GitObject {
110    pub obj_type: ObjectType,
111    pub content: Vec<u8>,
112}
113
114impl GitObject {
115    pub fn new(obj_type: ObjectType, content: Vec<u8>) -> Self {
116        Self { obj_type, content }
117    }
118
119    pub fn id(&self) -> ObjectId {
120        ObjectId::hash_object(self.obj_type, &self.content)
121    }
122
123    /// Serialize to loose object format (for storage)
124    pub fn to_loose_format(&self) -> Vec<u8> {
125        let header = format!("{} {}\0", self.obj_type.as_str(), self.content.len());
126        let mut data = header.into_bytes();
127        data.extend_from_slice(&self.content);
128        data
129    }
130
131    /// Parse from loose object format
132    #[allow(dead_code)]
133    pub fn from_loose_format(data: &[u8]) -> Result<Self> {
134        let null_pos = data
135            .iter()
136            .position(|&b| b == 0)
137            .ok_or_else(|| Error::InvalidObjectFormat("missing null byte".into()))?;
138
139        let header = std::str::from_utf8(&data[..null_pos])
140            .map_err(|_| Error::InvalidObjectFormat("invalid header".into()))?;
141
142        let mut parts = header.split(' ');
143        let type_str = parts
144            .next()
145            .ok_or_else(|| Error::InvalidObjectFormat("missing type".into()))?;
146        let size_str = parts
147            .next()
148            .ok_or_else(|| Error::InvalidObjectFormat("missing size".into()))?;
149
150        let obj_type = type_str
151            .parse::<ObjectType>()
152            .map_err(|_| Error::InvalidObjectType(type_str.into()))?;
153        let size: usize = size_str
154            .parse()
155            .map_err(|_| Error::InvalidObjectFormat("invalid size".into()))?;
156
157        let content = data[null_pos + 1..].to_vec();
158        if content.len() != size {
159            return Err(Error::InvalidObjectFormat(format!(
160                "size mismatch: expected {}, got {}",
161                size,
162                content.len()
163            )));
164        }
165
166        Ok(Self { obj_type, content })
167    }
168}
169
170/// Tree entry (mode, name, object id)
171#[derive(Debug, Clone)]
172pub struct TreeEntry {
173    pub mode: u32,
174    pub name: String,
175    pub oid: ObjectId,
176}
177
178impl TreeEntry {
179    pub fn is_tree(&self) -> bool {
180        self.mode == 0o40000
181    }
182}
183
184/// Parse tree content into entries
185pub fn parse_tree(content: &[u8]) -> Result<Vec<TreeEntry>> {
186    let mut entries = Vec::new();
187    let mut pos = 0;
188
189    while pos < content.len() {
190        let space_pos = content[pos..]
191            .iter()
192            .position(|&b| b == b' ')
193            .ok_or_else(|| Error::InvalidObjectFormat("tree: missing space".into()))?;
194        let mode_str = std::str::from_utf8(&content[pos..pos + space_pos])
195            .map_err(|_| Error::InvalidObjectFormat("tree: invalid mode".into()))?;
196        let mode = u32::from_str_radix(mode_str, 8)
197            .map_err(|_| Error::InvalidObjectFormat("tree: invalid mode octal".into()))?;
198        pos += space_pos + 1;
199
200        let null_pos = content[pos..]
201            .iter()
202            .position(|&b| b == 0)
203            .ok_or_else(|| Error::InvalidObjectFormat("tree: missing null".into()))?;
204        let name = std::str::from_utf8(&content[pos..pos + null_pos])
205            .map_err(|_| Error::InvalidObjectFormat("tree: invalid name".into()))?
206            .to_string();
207        pos += null_pos + 1;
208
209        if pos + 20 > content.len() {
210            return Err(Error::InvalidObjectFormat("tree: truncated sha".into()));
211        }
212        let oid = ObjectId::from_bytes(&content[pos..pos + 20])
213            .ok_or_else(|| Error::InvalidObjectFormat("tree: invalid sha".into()))?;
214        pos += 20;
215
216        entries.push(TreeEntry { mode, name, oid });
217    }
218
219    Ok(entries)
220}
221
222// Test helpers
223#[cfg(test)]
224pub use test_helpers::*;
225
226#[cfg(test)]
227mod test_helpers {
228    use super::*;
229
230    impl TreeEntry {
231        pub fn new(mode: u32, name: String, oid: ObjectId) -> Self {
232            Self { mode, name, oid }
233        }
234    }
235
236    pub fn serialize_tree(entries: &[TreeEntry]) -> Vec<u8> {
237        let mut content = Vec::new();
238        for entry in entries {
239            content.extend_from_slice(format!("{:o}", entry.mode).as_bytes());
240            content.push(b' ');
241            content.extend_from_slice(entry.name.as_bytes());
242            content.push(0);
243            content.extend_from_slice(&entry.oid.0);
244        }
245        content
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    #[test]
254    fn test_object_id_hex() {
255        let hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
256        let oid = ObjectId::from_hex(hex).unwrap();
257        assert_eq!(oid.to_hex(), hex);
258    }
259
260    #[test]
261    fn test_blob_hash() {
262        let empty_blob_hash = "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391";
263        let oid = ObjectId::hash_object(ObjectType::Blob, &[]);
264        assert_eq!(oid.to_hex(), empty_blob_hash);
265    }
266
267    #[test]
268    fn test_hello_world_blob() {
269        let content = b"hello world\n";
270        let expected = "3b18e512dba79e4c8300dd08aeb37f8e728b8dad";
271        let oid = ObjectId::hash_object(ObjectType::Blob, content);
272        assert_eq!(oid.to_hex(), expected);
273    }
274
275    #[test]
276    fn test_loose_format_roundtrip() {
277        let obj = GitObject::new(ObjectType::Blob, b"test content".to_vec());
278        let loose = obj.to_loose_format();
279        let parsed = GitObject::from_loose_format(&loose).unwrap();
280        assert_eq!(parsed.obj_type, ObjectType::Blob);
281        assert_eq!(parsed.content, b"test content");
282    }
283}