1use flate2::read::ZlibDecoder;
2use std::fs;
3use std::io::Read;
4use std::path::Path;
5use thiserror::Error;
6
7#[derive(Debug, Error)]
8pub enum LooseObjectError {
9 #[error("IO error: {0}")]
10 IoError(#[from] std::io::Error),
11
12 #[error("Invalid object format: {0}")]
13 InvalidFormat(String),
14
15 #[error("Unknown object type: {0}")]
16 UnknownType(String),
17
18 #[error("Decompression error: {0}")]
19 DecompressionError(String),
20}
21
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum LooseObjectType {
24 Commit,
25 Tree,
26 Blob,
27 Tag,
28}
29
30impl std::fmt::Display for LooseObjectType {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 match self {
33 Self::Commit => write!(f, "commit"),
34 Self::Tree => write!(f, "tree"),
35 Self::Blob => write!(f, "blob"),
36 Self::Tag => write!(f, "tag"),
37 }
38 }
39}
40
41impl std::str::FromStr for LooseObjectType {
42 type Err = LooseObjectError;
43
44 fn from_str(s: &str) -> Result<Self, Self::Err> {
45 match s {
46 "commit" => Ok(Self::Commit),
47 "tree" => Ok(Self::Tree),
48 "blob" => Ok(Self::Blob),
49 "tag" => Ok(Self::Tag),
50 _ => Err(LooseObjectError::UnknownType(s.to_string())),
51 }
52 }
53}
54
55#[derive(Debug, Clone)]
57pub struct CommitObject {
58 pub tree: String,
59 pub parents: Vec<String>,
60 pub author: String,
61 pub author_date: String,
62 pub committer: String,
63 pub committer_date: String,
64 pub message: String,
65}
66
67#[derive(Debug, Clone)]
69pub struct TreeEntry {
70 pub mode: String,
71 pub name: String,
72 pub sha1: String,
73 pub object_type: TreeEntryType,
74}
75
76#[derive(Debug, Clone)]
77pub enum TreeEntryType {
78 Blob,
79 Tree,
80 Executable,
81 Symlink,
82 Submodule,
83}
84
85#[derive(Debug, Clone)]
87pub struct TreeObject {
88 pub entries: Vec<TreeEntry>,
89}
90
91#[derive(Debug, Clone)]
93pub struct TagObject {
94 pub object: String,
95 pub object_type: String,
96 pub tag: String,
97 pub tagger: Option<String>,
98 pub tagger_date: Option<String>,
99 pub message: String,
100}
101
102#[derive(Debug, Clone)]
104pub enum ParsedContent {
105 Commit(CommitObject),
106 Tree(TreeObject),
107 Blob(Vec<u8>),
108 Tag(TagObject),
109}
110
111#[derive(Debug, Clone)]
112pub struct LooseObject {
113 pub object_type: LooseObjectType,
114 pub size: usize,
115 pub content: Vec<u8>,
116 pub object_id: String,
117 pub parsed_content: Option<ParsedContent>,
118}
119
120impl LooseObject {
121 pub fn read_from_path(path: &Path) -> Result<Self, LooseObjectError> {
132 let object_id = Self::extract_object_id(path)?;
134
135 let compressed_data = fs::read(path)?;
137
138 let mut decoder = ZlibDecoder::new(&compressed_data[..]);
140 let mut decompressed = Vec::new();
141 decoder
142 .read_to_end(&mut decompressed)
143 .map_err(|e| LooseObjectError::DecompressionError(e.to_string()))?;
144
145 Self::parse_object_data(&decompressed, object_id)
147 }
148
149 fn extract_object_id(path: &Path) -> Result<String, LooseObjectError> {
152 let filename = path
153 .file_name()
154 .ok_or_else(|| LooseObjectError::InvalidFormat("No filename".to_string()))?
155 .to_string_lossy();
156
157 let parent_dir = path
158 .parent()
159 .ok_or_else(|| LooseObjectError::InvalidFormat("No parent directory".to_string()))?
160 .file_name()
161 .ok_or_else(|| LooseObjectError::InvalidFormat("No parent directory name".to_string()))?
162 .to_string_lossy();
163
164 if parent_dir.len() != 2 {
165 return Err(LooseObjectError::InvalidFormat(
166 "Parent directory should be 2 characters".to_string(),
167 ));
168 }
169
170 if filename.len() != 38 {
171 return Err(LooseObjectError::InvalidFormat(
172 "Filename should be 38 characters".to_string(),
173 ));
174 }
175
176 Ok(format!("{parent_dir}{filename}"))
177 }
178
179 fn parse_object_data(data: &[u8], object_id: String) -> Result<Self, LooseObjectError> {
182 let null_pos = data.iter().position(|&b| b == 0).ok_or_else(|| {
184 LooseObjectError::InvalidFormat("No null terminator found".to_string())
185 })?;
186
187 let header = &data[..null_pos];
189 let content = &data[null_pos + 1..];
190
191 let header_str = String::from_utf8_lossy(header);
193 let parts: Vec<&str> = header_str.split(' ').collect();
194
195 if parts.len() != 2 {
196 return Err(LooseObjectError::InvalidFormat(
197 "Header should contain type and size".to_string(),
198 ));
199 }
200
201 let object_type = parts[0].parse::<LooseObjectType>()?;
202 let size = parts[1]
203 .parse::<usize>()
204 .map_err(|_| LooseObjectError::InvalidFormat("Invalid size".to_string()))?;
205
206 if size != content.len() {
208 return Err(LooseObjectError::InvalidFormat(format!(
209 "Size mismatch: header says {}, content is {}",
210 size,
211 content.len()
212 )));
213 }
214
215 let parsed_content = match object_type {
217 LooseObjectType::Commit => {
218 Some(ParsedContent::Commit(Self::parse_commit_content(content)))
219 }
220 LooseObjectType::Tree => Some(ParsedContent::Tree(Self::parse_tree_content(content))),
221 LooseObjectType::Blob => Some(ParsedContent::Blob(content.to_vec())),
222 LooseObjectType::Tag => Some(ParsedContent::Tag(Self::parse_tag_content(content))),
223 };
224
225 Ok(Self {
226 object_type,
227 size,
228 content: content.to_vec(),
229 object_id,
230 parsed_content,
231 })
232 }
233
234 fn parse_commit_content(content: &[u8]) -> CommitObject {
236 let content_str = String::from_utf8_lossy(content);
237 let lines = content_str.lines();
238
239 let mut tree = String::new();
240 let mut parents = Vec::new();
241 let mut author = String::new();
242 let mut author_date = String::new();
243 let mut committer = String::new();
244 let mut committer_date = String::new();
245 let mut message = String::new();
246
247 let mut in_message = false;
249 for line in lines {
250 if in_message {
251 if !message.is_empty() {
252 message.push('\n');
253 }
254 message.push_str(line);
255 } else if line.is_empty() {
256 in_message = true;
257 } else if let Some(stripped) = line.strip_prefix("tree ") {
258 tree = stripped.to_string();
259 } else if let Some(stripped) = line.strip_prefix("parent ") {
260 parents.push(stripped.to_string());
261 } else if let Some(author_line) = line.strip_prefix("author ") {
262 if let Some(date_start) = author_line.rfind(' ')
263 && let Some(name_end) = author_line[..date_start].rfind(' ')
264 {
265 author = author_line[..name_end].to_string();
266 author_date = author_line[name_end + 1..].to_string();
267 }
268 } else if let Some(committer_line) = line.strip_prefix("committer ")
269 && let Some(date_start) = committer_line.rfind(' ')
270 && let Some(name_end) = committer_line[..date_start].rfind(' ')
271 {
272 committer = committer_line[..name_end].to_string();
273 committer_date = committer_line[name_end + 1..].to_string();
274 }
275 }
276
277 CommitObject {
278 tree,
279 parents,
280 author,
281 author_date,
282 committer,
283 committer_date,
284 message,
285 }
286 }
287
288 fn parse_tree_content(content: &[u8]) -> TreeObject {
290 let mut entries = Vec::new();
291 let mut i = 0;
292
293 while i < content.len() {
294 let mode_start = i;
296 while i < content.len() && content[i] != b' ' {
297 i += 1;
298 }
299 if i >= content.len() {
300 break;
301 }
302 let mode = String::from_utf8_lossy(&content[mode_start..i]).to_string();
303 i += 1; let name_start = i;
307 while i < content.len() && content[i] != 0 {
308 i += 1;
309 }
310 if i >= content.len() {
311 break;
312 }
313 let name = String::from_utf8_lossy(&content[name_start..i]).to_string();
314 i += 1; if i + 20 > content.len() {
318 break;
319 }
320 let sha1 = hex::encode(&content[i..i + 20]);
321 i += 20;
322
323 let object_type = match mode.as_str() {
325 "100755" => TreeEntryType::Executable,
326 "120000" => TreeEntryType::Symlink,
327 "160000" => TreeEntryType::Submodule,
328 "040000" => TreeEntryType::Tree,
329 _ => TreeEntryType::Blob, };
331
332 entries.push(TreeEntry {
333 mode,
334 name,
335 sha1,
336 object_type,
337 });
338 }
339
340 TreeObject { entries }
341 }
342
343 fn parse_tag_content(content: &[u8]) -> TagObject {
345 let content_str = String::from_utf8_lossy(content);
346 let lines = content_str.lines();
347
348 let mut object = String::new();
349 let mut object_type = String::new();
350 let mut tag = String::new();
351 let mut tagger = None;
352 let mut tagger_date = None;
353 let mut message = String::new();
354
355 let mut in_message = false;
357 for line in lines {
358 if in_message {
359 if !message.is_empty() {
360 message.push('\n');
361 }
362 message.push_str(line);
363 } else if line.is_empty() {
364 in_message = true;
365 } else if let Some(stripped) = line.strip_prefix("object ") {
366 object = stripped.to_string();
367 } else if let Some(stripped) = line.strip_prefix("type ") {
368 object_type = stripped.to_string();
369 } else if let Some(stripped) = line.strip_prefix("tag ") {
370 tag = stripped.to_string();
371 } else if let Some(tagger_line) = line.strip_prefix("tagger ")
372 && let Some(date_start) = tagger_line.rfind(' ')
373 && let Some(name_end) = tagger_line[..date_start].rfind(' ')
374 {
375 tagger = Some(tagger_line[..name_end].to_string());
376 tagger_date = Some(tagger_line[name_end + 1..].to_string());
377 }
378 }
379
380 TagObject {
381 object,
382 object_type,
383 tag,
384 tagger,
385 tagger_date,
386 message,
387 }
388 }
389
390 #[must_use]
392 pub fn content_as_string(&self) -> String {
393 String::from_utf8_lossy(&self.content).to_string()
394 }
395
396 #[must_use]
398 pub fn is_binary(&self) -> bool {
399 self.content.contains(&0) || matches!(self.object_type, LooseObjectType::Blob)
401 }
402
403 #[must_use]
405 pub const fn get_parsed_content(&self) -> Option<&ParsedContent> {
406 self.parsed_content.as_ref()
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413 use flate2::Compression;
414 use flate2::write::ZlibEncoder;
415 use std::io::Write;
416
417 #[test]
418 fn test_extract_object_id() {
419 let path = Path::new(".git/objects/ab/cdef1234567890123456789012345678901234");
420 let object_id = LooseObject::extract_object_id(path).unwrap();
421 assert_eq!(object_id, "abcdef1234567890123456789012345678901234");
422 }
423
424 #[test]
425 fn test_parse_object_data() {
426 let content = b"Hello, World!";
427 let header = b"blob 13\0";
428 let mut data = Vec::new();
429 data.extend_from_slice(header);
430 data.extend_from_slice(content);
431
432 let object = LooseObject::parse_object_data(&data, "test123".to_string()).unwrap();
433 assert_eq!(object.object_type, LooseObjectType::Blob);
434 assert_eq!(object.size, 13);
435 assert_eq!(object.content, content);
436 assert_eq!(object.object_id, "test123");
437 }
438
439 #[test]
440 fn test_create_and_read_loose_object() {
441 let temp_dir = tempfile::tempdir().unwrap();
442 let objects_dir = temp_dir.path().join("objects").join("ab");
443 std::fs::create_dir_all(&objects_dir).unwrap();
444
445 let content = b"Hello, World!";
447 let header = b"blob 13\0";
448 let mut data = Vec::new();
449 data.extend_from_slice(header);
450 data.extend_from_slice(content);
451
452 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
454 encoder.write_all(&data).unwrap();
455 let compressed = encoder.finish().unwrap();
456
457 let file_path = objects_dir.join("cdef1234567890123456789012345678901234");
459 std::fs::write(&file_path, compressed).unwrap();
460
461 let object = LooseObject::read_from_path(&file_path).unwrap();
463 assert_eq!(object.object_type, LooseObjectType::Blob);
464 assert_eq!(object.size, 13);
465 assert_eq!(object.content, content);
466 assert_eq!(object.object_id, "abcdef1234567890123456789012345678901234");
467 }
468
469 #[test]
470 fn test_parse_commit_content() {
471 let content = b"tree 1234567890123456789012345678901234567890\nparent abcdef1234567890123456789012345678901234\nauthor John Doe <john@example.com> 1234567890 +0000\ncommitter John Doe <john@example.com> 1234567890 +0000\n\nInitial commit\n";
472
473 let commit = LooseObject::parse_commit_content(content);
474 assert_eq!(commit.tree, "1234567890123456789012345678901234567890");
475 assert_eq!(commit.parents.len(), 1);
476 assert_eq!(
477 commit.parents[0],
478 "abcdef1234567890123456789012345678901234"
479 );
480 assert!(commit.author.contains("John Doe"));
481 assert_eq!(commit.message, "Initial commit");
482 }
483}