git_plumber/git/
repository.rs

1use crate::git::loose_object::{LooseObject, LooseObjectError};
2use crate::git::pack::{PackError, PackIndex};
3use std::collections::HashMap;
4use std::fs;
5use std::path::{Path, PathBuf};
6use thiserror::Error;
7
8/// Represents a group of pack-related files with the same base name
9#[derive(Debug, Clone)]
10pub struct PackGroup {
11    pub base_name: String,
12    pub pack_file: Option<PathBuf>,
13    pub idx_file: Option<PathBuf>,
14    pub rev_file: Option<PathBuf>,
15    pub mtimes_file: Option<PathBuf>,
16}
17
18impl PackGroup {
19    /// Creates a new PackGroup with the given base name
20    pub fn new(base_name: &str) -> Self {
21        Self {
22            base_name: base_name.to_string(),
23            pack_file: None,
24            idx_file: None,
25            rev_file: None,
26            mtimes_file: None,
27        }
28    }
29
30    /// Returns true if this group has at least a .pack file
31    pub fn is_valid(&self) -> bool {
32        self.pack_file.is_some()
33    }
34
35    /// Returns true if this group has both .pack and .idx files
36    pub fn has_index(&self) -> bool {
37        self.pack_file.is_some() && self.idx_file.is_some()
38    }
39
40    /// Returns all available file paths in this group
41    pub fn get_all_files(&self) -> Vec<(&str, &PathBuf)> {
42        let mut files = Vec::new();
43
44        if let Some(ref path) = self.pack_file {
45            files.push(("packfile", path));
46        }
47        if let Some(ref path) = self.idx_file {
48            files.push(("index", path));
49        }
50        if let Some(ref path) = self.rev_file {
51            files.push(("xedni", path)); // reversed index
52        }
53        if let Some(ref path) = self.mtimes_file {
54            files.push(("mtime", path)); // mtimes
55        }
56
57        files
58    }
59
60    /// Load and parse the index file if available
61    pub fn load_index(&self) -> Result<Option<PackIndex>, PackError> {
62        if let Some(ref idx_path) = self.idx_file {
63            match std::fs::read(idx_path) {
64                Ok(data) => match PackIndex::parse(&data) {
65                    Ok((_, index)) => Ok(Some(index)),
66                    Err(e) => Err(PackError::ParseError(format!(
67                        "Failed to parse index: {:?}",
68                        e
69                    ))),
70                },
71                Err(e) => Err(PackError::DecompressionError(e)),
72            }
73        } else {
74            Ok(None)
75        }
76    }
77
78    /// Look up an object by SHA-1 hash using the index file
79    /// Returns the byte offset in the pack file if found
80    pub fn lookup_object_offset(&self, sha1: &[u8; 20]) -> Result<Option<u64>, PackError> {
81        match self.load_index()? {
82            Some(index) => Ok(index.lookup_object(sha1)),
83            None => Ok(None),
84        }
85    }
86
87    /// Get basic statistics about the pack group
88    pub fn get_stats(&self) -> Result<PackGroupStats, PackError> {
89        let mut stats = PackGroupStats {
90            base_name: self.base_name.clone(),
91            has_pack: self.pack_file.is_some(),
92            has_index: self.idx_file.is_some(),
93            has_rev: self.rev_file.is_some(),
94            has_mtimes: self.mtimes_file.is_some(),
95            object_count: None,
96            pack_size: None,
97            index_size: None,
98        };
99
100        // Get pack file size
101        if let Some(ref pack_path) = self.pack_file
102            && let Ok(metadata) = std::fs::metadata(pack_path)
103        {
104            stats.pack_size = Some(metadata.len());
105        }
106
107        // Get index file size and object count
108        if let Some(ref idx_path) = self.idx_file {
109            if let Ok(metadata) = std::fs::metadata(idx_path) {
110                stats.index_size = Some(metadata.len());
111            }
112
113            // Load index to get object count
114            if let Ok(Some(index)) = self.load_index() {
115                stats.object_count = Some(index.object_count());
116            }
117        }
118
119        Ok(stats)
120    }
121}
122
123/// Statistics about a pack group
124#[derive(Debug, Clone)]
125pub struct PackGroupStats {
126    pub base_name: String,
127    pub has_pack: bool,
128    pub has_index: bool,
129    pub has_rev: bool,
130    pub has_mtimes: bool,
131    pub object_count: Option<usize>,
132    pub pack_size: Option<u64>,
133    pub index_size: Option<u64>,
134}
135
136impl std::fmt::Display for PackGroupStats {
137    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138        writeln!(f, "Pack Group: {}", self.base_name)?;
139
140        if let Some(count) = self.object_count {
141            writeln!(f, "Objects: {}", count)?;
142        }
143
144        if let Some(size) = self.pack_size {
145            writeln!(f, "Pack size: {} bytes", size)?;
146        }
147
148        if let Some(size) = self.index_size {
149            writeln!(f, "Index size: {} bytes", size)?;
150        }
151
152        writeln!(f, "Files present:")?;
153        writeln!(f, "  Pack: {}", if self.has_pack { "✓" } else { "✗" })?;
154        writeln!(f, "  Index: {}", if self.has_index { "✓" } else { "✗" })?;
155        writeln!(f, "  Rev: {}", if self.has_rev { "✓" } else { "✗" })?;
156        writeln!(f, "  Mtimes: {}", if self.has_mtimes { "✓" } else { "✗" })?;
157
158        Ok(())
159    }
160}
161
162#[derive(Debug, Error)]
163pub enum RepositoryError {
164    #[error("IO error: {0}")]
165    IoError(#[from] std::io::Error),
166
167    #[error("Not a git repository: {0}")]
168    NotGitRepository(String),
169
170    #[error("Loose object error: {0}")]
171    LooseObjectError(#[from] LooseObjectError),
172
173    #[error("Pack error: {0}")]
174    PackError(#[from] PackError),
175}
176
177/// Statistics about loose objects in the repository
178#[derive(Debug, Clone, Default)]
179pub struct LooseObjectStats {
180    pub total_count: usize,
181    pub total_size: usize,
182    pub commit_count: usize,
183    pub tree_count: usize,
184    pub blob_count: usize,
185    pub tag_count: usize,
186}
187
188impl LooseObjectStats {
189    /// Get a formatted summary of the statistics
190    #[must_use]
191    pub fn summary(&self) -> String {
192        format!(
193            "Total: {} objects ({} bytes)\nCommits: {}, Trees: {}, Blobs: {}, Annotated Tags: {}",
194            self.total_count,
195            self.total_size,
196            self.commit_count,
197            self.tree_count,
198            self.blob_count,
199            self.tag_count
200        )
201    }
202}
203
204/// Represents a Git repository
205pub struct Repository {
206    path: PathBuf,
207}
208
209impl Repository {
210    /// Creates a new Repository instance from a path
211    ///
212    /// # Errors
213    ///
214    /// This function will return an error if:
215    /// - The provided path does not contain a .git directory
216    pub fn new(path: impl AsRef<Path>) -> Result<Self, RepositoryError> {
217        let path = path.as_ref().to_path_buf();
218        let git_dir = path.join(".git");
219
220        if !git_dir.exists() {
221            return Err(RepositoryError::NotGitRepository(
222                "No .git directory found".to_string(),
223            ));
224        }
225
226        Ok(Self { path })
227    }
228
229    /// Returns the path to the repository
230    #[must_use]
231    pub fn get_path(&self) -> &Path {
232        &self.path
233    }
234
235    /// Lists all pack files in the repository
236    ///
237    /// # Errors
238    ///
239    /// This function will return an error if:
240    /// - File system operations fail when reading the objects/pack directory
241    pub fn list_pack_files(&self) -> Result<Vec<PathBuf>, RepositoryError> {
242        let pack_dir = self.path.join(".git/objects/pack");
243
244        if !pack_dir.exists() {
245            return Ok(Vec::new()); // Return empty list if pack directory doesn't exist
246        }
247
248        let mut pack_files = Vec::new();
249        for entry in fs::read_dir(pack_dir)? {
250            let entry = entry?;
251            let path = entry.path();
252            if path.extension().is_some_and(|ext| ext == "pack") {
253                pack_files.push(path);
254            }
255        }
256
257        Ok(pack_files)
258    }
259
260    /// Lists all pack-related files grouped by their base name (without extension)
261    ///
262    /// Returns a map where keys are base names (e.g., "pack-abc123") and values are
263    /// structs containing paths to all related files (.pack, .idx, .rev, .mtimes)
264    ///
265    /// # Errors
266    ///
267    /// This function will return an error if:
268    /// - File system operations fail when reading the objects/pack directory
269    pub fn list_pack_groups(&self) -> Result<HashMap<String, PackGroup>, RepositoryError> {
270        let pack_dir = self.path.join(".git/objects/pack");
271
272        if !pack_dir.exists() {
273            return Ok(HashMap::new());
274        }
275
276        let mut pack_groups: HashMap<String, PackGroup> = HashMap::new();
277
278        for entry in fs::read_dir(pack_dir)? {
279            let entry = entry?;
280            let path = entry.path();
281
282            if let Some(extension) = path.extension().and_then(|ext| ext.to_str())
283                && let Some(file_stem) = path.file_stem().and_then(|stem| stem.to_str())
284            {
285                let group = pack_groups
286                    .entry(file_stem.to_string())
287                    .or_insert_with(|| PackGroup::new(file_stem));
288
289                match extension {
290                    "pack" => group.pack_file = Some(path),
291                    "idx" => group.idx_file = Some(path),
292                    "rev" => group.rev_file = Some(path),
293                    "mtimes" => group.mtimes_file = Some(path),
294                    _ => {} // Ignore other extensions
295                }
296            }
297        }
298
299        Ok(pack_groups)
300    }
301
302    /// Lists all head refs (local branches) in the repository
303    ///
304    /// # Errors
305    ///
306    /// This function will return an error if:
307    /// - File system operations fail when reading the refs/heads directory
308    pub fn list_head_refs(&self) -> Result<Vec<PathBuf>, RepositoryError> {
309        Self::list_refs_in_dir(self.path.join(".git/refs/heads"))
310    }
311
312    /// Lists all remote refs grouped by remote name
313    ///
314    /// # Errors
315    ///
316    /// This function will return an error if:
317    /// - File system operations fail when reading the refs/remotes directory
318    pub fn list_remote_refs(&self) -> Result<Vec<(String, Vec<PathBuf>)>, RepositoryError> {
319        let remotes_dir = self.path.join(".git/refs/remotes");
320        if !remotes_dir.exists() {
321            return Ok(Vec::new());
322        }
323
324        let mut remotes = Vec::new();
325        for entry in fs::read_dir(remotes_dir)? {
326            let entry = entry?;
327            if entry.path().is_dir() {
328                let remote_name = entry.file_name().to_string_lossy().to_string();
329
330                let remote_refs = Self::list_refs_in_dir(entry.path())?;
331                remotes.push((remote_name, remote_refs));
332            }
333        }
334
335        Ok(remotes)
336    }
337
338    /// Lists all tag refs in the repository
339    ///
340    /// # Errors
341    ///
342    /// This function will return an error if:
343    /// - File system operations fail when reading the refs/tags directory
344    pub fn list_tag_refs(&self) -> Result<Vec<PathBuf>, RepositoryError> {
345        Self::list_refs_in_dir(self.path.join(".git/refs/tags"))
346    }
347
348    /// Checks if stash ref exists
349    ///
350    /// # Errors
351    ///
352    /// This function will return an error if:
353    /// - File system operations fail when checking for stash refs
354    pub fn has_stash_ref(&self) -> Result<bool, RepositoryError> {
355        let stash_path = self.path.join(".git/refs/stash");
356        Ok(stash_path.exists())
357    }
358
359    /// Helper method to list refs in a directory
360    fn list_refs_in_dir(dir_path: PathBuf) -> Result<Vec<PathBuf>, RepositoryError> {
361        if !dir_path.exists() {
362            return Ok(Vec::new());
363        }
364
365        let mut refs = Vec::new();
366        for entry in fs::read_dir(dir_path)? {
367            let entry = entry?;
368            let path = entry.path();
369            if path.is_file() {
370                refs.push(path);
371            }
372        }
373
374        Ok(refs)
375    }
376
377    /// Lists a sample of loose objects in the repository
378    /// Limit parameter controls the maximum number of objects to return
379    ///
380    /// # Errors
381    ///
382    /// This function will return an error if:
383    /// - File system operations fail when reading loose object directories
384    pub fn list_loose_objects(&self, limit: usize) -> Result<Vec<PathBuf>, RepositoryError> {
385        let objects_dir = self.path.join(".git/objects");
386        if !objects_dir.exists() {
387            return Ok(Vec::new());
388        }
389
390        let mut loose_objects = Vec::new();
391        let mut count = 0;
392
393        for entry in fs::read_dir(&objects_dir)? {
394            let entry = entry?;
395            let dir_name = entry.file_name().to_string_lossy().to_string();
396
397            // Skip info and pack directories
398            if dir_name == "info" || dir_name == "pack" || !entry.path().is_dir() {
399                continue;
400            }
401
402            if let Ok(subentries) = fs::read_dir(entry.path()) {
403                for subentry in subentries.flatten() {
404                    if count < limit {
405                        loose_objects.push(subentry.path());
406                        count += 1;
407                    } else {
408                        return Ok(loose_objects);
409                    }
410                }
411            }
412        }
413
414        Ok(loose_objects)
415    }
416
417    /// Reads and parses a loose object from the given path
418    ///
419    /// # Errors
420    ///
421    /// This function will return an error if:
422    /// - The file cannot be read
423    /// - The object cannot be decompressed or parsed
424    pub fn read_loose_object(&self, path: &Path) -> Result<LooseObject, RepositoryError> {
425        Ok(LooseObject::read_from_path(path)?)
426    }
427
428    /// Reads and parses a loose object by its hash
429    ///
430    /// # Errors
431    ///
432    /// This function will return an error if:
433    /// - The object file cannot be found or read
434    /// - The object cannot be decompressed or parsed
435    pub fn read_loose_object_by_hash(&self, hash: &str) -> Result<LooseObject, RepositoryError> {
436        if hash.len() != 40 {
437            return Err(RepositoryError::LooseObjectError(
438                LooseObjectError::InvalidFormat("Hash must be 40 characters".to_string()),
439            ));
440        }
441
442        let (dir, file) = hash.split_at(2);
443        let path = self.path.join(".git/objects").join(dir).join(file);
444
445        self.read_loose_object(&path)
446    }
447
448    /// List parsed loose objects with a limit
449    ///
450    /// # Errors
451    ///
452    /// This function will return an error if:
453    /// - File system operations fail when reading loose object directories  
454    /// - Objects cannot be parsed or decompressed
455    pub fn list_parsed_loose_objects(
456        &self,
457        limit: usize,
458    ) -> Result<Vec<LooseObject>, RepositoryError> {
459        let loose_object_paths = self.list_loose_objects(limit)?;
460        let mut parsed_objects = Vec::new();
461
462        for path in loose_object_paths {
463            match self.read_loose_object(&path) {
464                Ok(object) => parsed_objects.push(object),
465                Err(e) => {
466                    // Log the error but continue processing other objects
467                    eprintln!(
468                        "Warning: Failed to parse loose object {}: {e}",
469                        path.display()
470                    );
471                }
472            }
473        }
474
475        Ok(parsed_objects)
476    }
477
478    /// Check if a loose object exists by its hash
479    #[must_use]
480    pub fn loose_object_exists(&self, hash: &str) -> bool {
481        if hash.len() != 40 {
482            return false;
483        }
484
485        let (dir, file) = hash.split_at(2);
486        let path = self.path.join(".git/objects").join(dir).join(file);
487
488        path.exists() && path.is_file()
489    }
490
491    /// Get statistics about all loose objects in the repository
492    ///
493    /// # Errors
494    ///
495    /// This function will return an error if:
496    /// - File system operations fail when reading loose object directories
497    /// - Objects cannot be parsed or analyzed
498    pub fn get_loose_object_stats(&self) -> Result<LooseObjectStats, RepositoryError> {
499        let objects_dir = self.path.join(".git/objects");
500        if !objects_dir.exists() {
501            return Ok(LooseObjectStats::default());
502        }
503
504        let mut stats = LooseObjectStats::default();
505
506        for entry in fs::read_dir(&objects_dir)? {
507            let entry = entry?;
508            let dir_name = entry.file_name().to_string_lossy().to_string();
509
510            // Skip info and pack directories
511            if dir_name == "info" || dir_name == "pack" || !entry.path().is_dir() {
512                continue;
513            }
514
515            if let Ok(subentries) = fs::read_dir(entry.path()) {
516                for subentry in subentries.flatten() {
517                    if let Ok(object) = self.read_loose_object(&subentry.path()) {
518                        stats.total_count += 1;
519                        stats.total_size += object.size;
520
521                        match object.object_type {
522                            crate::git::loose_object::LooseObjectType::Commit => {
523                                stats.commit_count += 1;
524                            }
525                            crate::git::loose_object::LooseObjectType::Tree => {
526                                stats.tree_count += 1;
527                            }
528                            crate::git::loose_object::LooseObjectType::Blob => {
529                                stats.blob_count += 1;
530                            }
531                            crate::git::loose_object::LooseObjectType::Tag => stats.tag_count += 1,
532                        }
533                    }
534                }
535            }
536        }
537
538        Ok(stats)
539    }
540}
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545    use std::fs;
546
547    #[test]
548    fn test_list_pack_files() {
549        // Create a temporary directory structure
550        let temp_dir = tempfile::tempdir().unwrap();
551        let git_dir = temp_dir.path().join(".git/objects/pack");
552        fs::create_dir_all(&git_dir).unwrap();
553
554        // Create some test pack files
555        fs::write(git_dir.join("pack-1.pack"), b"").unwrap();
556        fs::write(git_dir.join("pack-2.pack"), b"").unwrap();
557        fs::write(git_dir.join("pack-1.idx"), b"").unwrap(); // Should be ignored
558
559        let repo = Repository::new(temp_dir.path()).unwrap();
560        let pack_files = repo.list_pack_files().unwrap();
561
562        assert_eq!(pack_files.len(), 2);
563        assert!(pack_files.iter().all(|p| p.extension().unwrap() == "pack"));
564    }
565}