git_plumber/git/
loose_object.rs

1use flate2::read::ZlibDecoder;
2use std::fs;
3use std::io::Read;
4use std::path::Path;
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum LooseObjectError {
9    #[error("IO error: {0}")]
10    IoError(#[from] std::io::Error),
11
12    #[error("Invalid object format: {0}")]
13    InvalidFormat(String),
14
15    #[error("Unknown object type: {0}")]
16    UnknownType(String),
17
18    #[error("Decompression error: {0}")]
19    DecompressionError(String),
20}
21
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum LooseObjectType {
24    Commit,
25    Tree,
26    Blob,
27    Tag,
28}
29
30impl std::fmt::Display for LooseObjectType {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        match self {
33            Self::Commit => write!(f, "commit"),
34            Self::Tree => write!(f, "tree"),
35            Self::Blob => write!(f, "blob"),
36            Self::Tag => write!(f, "tag"),
37        }
38    }
39}
40
41impl std::str::FromStr for LooseObjectType {
42    type Err = LooseObjectError;
43
44    fn from_str(s: &str) -> Result<Self, Self::Err> {
45        match s {
46            "commit" => Ok(Self::Commit),
47            "tree" => Ok(Self::Tree),
48            "blob" => Ok(Self::Blob),
49            "tag" => Ok(Self::Tag),
50            _ => Err(LooseObjectError::UnknownType(s.to_string())),
51        }
52    }
53}
54
55// Parsed commit object structure
56#[derive(Debug, Clone)]
57pub struct CommitObject {
58    pub tree: String,
59    pub parents: Vec<String>,
60    pub author: String,
61    pub author_date: String,
62    pub committer: String,
63    pub committer_date: String,
64    pub message: String,
65}
66
67// Parsed tree entry structure
68#[derive(Debug, Clone)]
69pub struct TreeEntry {
70    pub mode: String,
71    pub name: String,
72    pub sha1: String,
73    pub object_type: TreeEntryType,
74}
75
76#[derive(Debug, Clone)]
77pub enum TreeEntryType {
78    Blob,
79    Tree,
80    Executable,
81    Symlink,
82    Submodule,
83}
84
85// Parsed tree object structure
86#[derive(Debug, Clone)]
87pub struct TreeObject {
88    pub entries: Vec<TreeEntry>,
89}
90
91// Parsed tag object structure
92#[derive(Debug, Clone)]
93pub struct TagObject {
94    pub object: String,
95    pub object_type: String,
96    pub tag: String,
97    pub tagger: Option<String>,
98    pub tagger_date: Option<String>,
99    pub message: String,
100}
101
102// Parsed object content
103#[derive(Debug, Clone)]
104pub enum ParsedContent {
105    Commit(CommitObject),
106    Tree(TreeObject),
107    Blob(Vec<u8>),
108    Tag(TagObject),
109}
110
111#[derive(Debug, Clone)]
112pub struct LooseObject {
113    pub object_type: LooseObjectType,
114    pub size: usize,
115    pub content: Vec<u8>,
116    pub object_id: String,
117    pub parsed_content: Option<ParsedContent>,
118}
119
120impl LooseObject {
121    /// Read and parse a loose object from the given path
122    ///
123    /// # Errors
124    ///
125    /// This function will return an error if:
126    /// - The file path cannot be read
127    /// - The object ID cannot be extracted from the path
128    /// - The file cannot be decompressed
129    /// - The object format is invalid
130    /// - The object type is unknown
131    pub fn read_from_path(path: &Path) -> Result<Self, LooseObjectError> {
132        // Extract object ID from path
133        let object_id = Self::extract_object_id(path)?;
134
135        // Read the compressed file
136        let compressed_data = fs::read(path)?;
137
138        // Decompress the data
139        let mut decoder = ZlibDecoder::new(&compressed_data[..]);
140        let mut decompressed = Vec::new();
141        decoder
142            .read_to_end(&mut decompressed)
143            .map_err(|e| LooseObjectError::DecompressionError(e.to_string()))?;
144
145        // Parse the object header and content
146        Self::parse_object_data(&decompressed, object_id)
147    }
148
149    /// Extract object ID from the file path
150    /// Path format: .git/objects/ab/cdef123456...
151    fn extract_object_id(path: &Path) -> Result<String, LooseObjectError> {
152        let filename = path
153            .file_name()
154            .ok_or_else(|| LooseObjectError::InvalidFormat("No filename".to_string()))?
155            .to_string_lossy();
156
157        let parent_dir = path
158            .parent()
159            .ok_or_else(|| LooseObjectError::InvalidFormat("No parent directory".to_string()))?
160            .file_name()
161            .ok_or_else(|| LooseObjectError::InvalidFormat("No parent directory name".to_string()))?
162            .to_string_lossy();
163
164        if parent_dir.len() != 2 {
165            return Err(LooseObjectError::InvalidFormat(
166                "Parent directory should be 2 characters".to_string(),
167            ));
168        }
169
170        if filename.len() != 38 {
171            return Err(LooseObjectError::InvalidFormat(
172                "Filename should be 38 characters".to_string(),
173            ));
174        }
175
176        Ok(format!("{parent_dir}{filename}"))
177    }
178
179    /// Parse the decompressed object data
180    /// Format: "<type> <size>\0<content>"
181    fn parse_object_data(data: &[u8], object_id: String) -> Result<Self, LooseObjectError> {
182        // Find the null terminator that separates header from content
183        let null_pos = data.iter().position(|&b| b == 0).ok_or_else(|| {
184            LooseObjectError::InvalidFormat("No null terminator found".to_string())
185        })?;
186
187        // Split header and content
188        let header = &data[..null_pos];
189        let content = &data[null_pos + 1..];
190
191        // Parse header: "<type> <size>"
192        let header_str = String::from_utf8_lossy(header);
193        let parts: Vec<&str> = header_str.split(' ').collect();
194
195        if parts.len() != 2 {
196            return Err(LooseObjectError::InvalidFormat(
197                "Header should contain type and size".to_string(),
198            ));
199        }
200
201        let object_type = parts[0].parse::<LooseObjectType>()?;
202        let size = parts[1]
203            .parse::<usize>()
204            .map_err(|_| LooseObjectError::InvalidFormat("Invalid size".to_string()))?;
205
206        // Verify size matches content length
207        if size != content.len() {
208            return Err(LooseObjectError::InvalidFormat(format!(
209                "Size mismatch: header says {}, content is {}",
210                size,
211                content.len()
212            )));
213        }
214
215        // Parse type-specific content
216        let parsed_content = match object_type {
217            LooseObjectType::Commit => {
218                Some(ParsedContent::Commit(Self::parse_commit_content(content)))
219            }
220            LooseObjectType::Tree => Some(ParsedContent::Tree(Self::parse_tree_content(content))),
221            LooseObjectType::Blob => Some(ParsedContent::Blob(content.to_vec())),
222            LooseObjectType::Tag => Some(ParsedContent::Tag(Self::parse_tag_content(content))),
223        };
224
225        Ok(Self {
226            object_type,
227            size,
228            content: content.to_vec(),
229            object_id,
230            parsed_content,
231        })
232    }
233
234    /// Parse commit object content
235    fn parse_commit_content(content: &[u8]) -> CommitObject {
236        let content_str = String::from_utf8_lossy(content);
237        let lines = content_str.lines();
238
239        let mut tree = String::new();
240        let mut parents = Vec::new();
241        let mut author = String::new();
242        let mut author_date = String::new();
243        let mut committer = String::new();
244        let mut committer_date = String::new();
245        let mut message = String::new();
246
247        // Parse header lines
248        let mut in_message = false;
249        for line in lines {
250            if in_message {
251                if !message.is_empty() {
252                    message.push('\n');
253                }
254                message.push_str(line);
255            } else if line.is_empty() {
256                in_message = true;
257            } else if let Some(stripped) = line.strip_prefix("tree ") {
258                tree = stripped.to_string();
259            } else if let Some(stripped) = line.strip_prefix("parent ") {
260                parents.push(stripped.to_string());
261            } else if let Some(author_line) = line.strip_prefix("author ") {
262                if let Some(date_start) = author_line.rfind(' ')
263                    && let Some(name_end) = author_line[..date_start].rfind(' ')
264                {
265                    author = author_line[..name_end].to_string();
266                    author_date = author_line[name_end + 1..].to_string();
267                }
268            } else if let Some(committer_line) = line.strip_prefix("committer ")
269                && let Some(date_start) = committer_line.rfind(' ')
270                && let Some(name_end) = committer_line[..date_start].rfind(' ')
271            {
272                committer = committer_line[..name_end].to_string();
273                committer_date = committer_line[name_end + 1..].to_string();
274            }
275        }
276
277        CommitObject {
278            tree,
279            parents,
280            author,
281            author_date,
282            committer,
283            committer_date,
284            message,
285        }
286    }
287
288    /// Parse tree object content
289    fn parse_tree_content(content: &[u8]) -> TreeObject {
290        let mut entries = Vec::new();
291        let mut i = 0;
292
293        while i < content.len() {
294            // Read mode (until space)
295            let mode_start = i;
296            while i < content.len() && content[i] != b' ' {
297                i += 1;
298            }
299            if i >= content.len() {
300                break;
301            }
302            let mode = String::from_utf8_lossy(&content[mode_start..i]).to_string();
303            i += 1; // Skip space
304
305            // Read filename (until null)
306            let name_start = i;
307            while i < content.len() && content[i] != 0 {
308                i += 1;
309            }
310            if i >= content.len() {
311                break;
312            }
313            let name = String::from_utf8_lossy(&content[name_start..i]).to_string();
314            i += 1; // Skip null
315
316            // Read SHA-1 (20 bytes)
317            if i + 20 > content.len() {
318                break;
319            }
320            let sha1 = hex::encode(&content[i..i + 20]);
321            i += 20;
322
323            // Determine object type from mode
324            let object_type = match mode.as_str() {
325                "100755" => TreeEntryType::Executable,
326                "120000" => TreeEntryType::Symlink,
327                "160000" => TreeEntryType::Submodule,
328                "040000" => TreeEntryType::Tree,
329                _ => TreeEntryType::Blob, // Default fallback for "100644" and others
330            };
331
332            entries.push(TreeEntry {
333                mode,
334                name,
335                sha1,
336                object_type,
337            });
338        }
339
340        TreeObject { entries }
341    }
342
343    /// Parse tag object content
344    fn parse_tag_content(content: &[u8]) -> TagObject {
345        let content_str = String::from_utf8_lossy(content);
346        let lines = content_str.lines();
347
348        let mut object = String::new();
349        let mut object_type = String::new();
350        let mut tag = String::new();
351        let mut tagger = None;
352        let mut tagger_date = None;
353        let mut message = String::new();
354
355        // Parse header lines
356        let mut in_message = false;
357        for line in lines {
358            if in_message {
359                if !message.is_empty() {
360                    message.push('\n');
361                }
362                message.push_str(line);
363            } else if line.is_empty() {
364                in_message = true;
365            } else if let Some(stripped) = line.strip_prefix("object ") {
366                object = stripped.to_string();
367            } else if let Some(stripped) = line.strip_prefix("type ") {
368                object_type = stripped.to_string();
369            } else if let Some(stripped) = line.strip_prefix("tag ") {
370                tag = stripped.to_string();
371            } else if let Some(tagger_line) = line.strip_prefix("tagger ")
372                && let Some(date_start) = tagger_line.rfind(' ')
373                && let Some(name_end) = tagger_line[..date_start].rfind(' ')
374            {
375                tagger = Some(tagger_line[..name_end].to_string());
376                tagger_date = Some(tagger_line[name_end + 1..].to_string());
377            }
378        }
379
380        TagObject {
381            object,
382            object_type,
383            tag,
384            tagger,
385            tagger_date,
386            message,
387        }
388    }
389
390    /// Get the content as a UTF-8 string (for text objects like commits)
391    #[must_use]
392    pub fn content_as_string(&self) -> String {
393        String::from_utf8_lossy(&self.content).to_string()
394    }
395
396    /// Check if this object is binary (likely a blob)
397    #[must_use]
398    pub fn is_binary(&self) -> bool {
399        // Simple heuristic: if content contains null bytes, it's likely binary
400        self.content.contains(&0) || matches!(self.object_type, LooseObjectType::Blob)
401    }
402
403    /// Get parsed content if available
404    #[must_use]
405    pub const fn get_parsed_content(&self) -> Option<&ParsedContent> {
406        self.parsed_content.as_ref()
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413    use flate2::Compression;
414    use flate2::write::ZlibEncoder;
415    use std::io::Write;
416
417    #[test]
418    fn test_extract_object_id() {
419        let path = Path::new(".git/objects/ab/cdef1234567890123456789012345678901234");
420        let object_id = LooseObject::extract_object_id(path).unwrap();
421        assert_eq!(object_id, "abcdef1234567890123456789012345678901234");
422    }
423
424    #[test]
425    fn test_parse_object_data() {
426        let content = b"Hello, World!";
427        let header = b"blob 13\0";
428        let mut data = Vec::new();
429        data.extend_from_slice(header);
430        data.extend_from_slice(content);
431
432        let object = LooseObject::parse_object_data(&data, "test123".to_string()).unwrap();
433        assert_eq!(object.object_type, LooseObjectType::Blob);
434        assert_eq!(object.size, 13);
435        assert_eq!(object.content, content);
436        assert_eq!(object.object_id, "test123");
437    }
438
439    #[test]
440    fn test_create_and_read_loose_object() {
441        let temp_dir = tempfile::tempdir().unwrap();
442        let objects_dir = temp_dir.path().join("objects").join("ab");
443        std::fs::create_dir_all(&objects_dir).unwrap();
444
445        // Create test object content
446        let content = b"Hello, World!";
447        let header = b"blob 13\0";
448        let mut data = Vec::new();
449        data.extend_from_slice(header);
450        data.extend_from_slice(content);
451
452        // Compress the data
453        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
454        encoder.write_all(&data).unwrap();
455        let compressed = encoder.finish().unwrap();
456
457        // Write to file
458        let file_path = objects_dir.join("cdef1234567890123456789012345678901234");
459        std::fs::write(&file_path, compressed).unwrap();
460
461        // Read and parse
462        let object = LooseObject::read_from_path(&file_path).unwrap();
463        assert_eq!(object.object_type, LooseObjectType::Blob);
464        assert_eq!(object.size, 13);
465        assert_eq!(object.content, content);
466        assert_eq!(object.object_id, "abcdef1234567890123456789012345678901234");
467    }
468
469    #[test]
470    fn test_parse_commit_content() {
471        let content = b"tree 1234567890123456789012345678901234567890\nparent abcdef1234567890123456789012345678901234\nauthor John Doe <john@example.com> 1234567890 +0000\ncommitter John Doe <john@example.com> 1234567890 +0000\n\nInitial commit\n";
472
473        let commit = LooseObject::parse_commit_content(content);
474        assert_eq!(commit.tree, "1234567890123456789012345678901234567890");
475        assert_eq!(commit.parents.len(), 1);
476        assert_eq!(
477            commit.parents[0],
478            "abcdef1234567890123456789012345678901234"
479        );
480        assert!(commit.author.contains("John Doe"));
481        assert_eq!(commit.message, "Initial commit");
482    }
483}