Skip to main content

null_e/analysis/
git.rs

1//! Git repository analysis and optimization
2//!
3//! Detects:
4//! - Large .git directories that could benefit from `git gc`
5//! - Loose objects that need packing
6//! - Git LFS cache
7//! - Large files in history
8
9use super::{Recommendation, RecommendationKind, RiskLevel};
10use crate::cleaners::calculate_dir_size;
11use crate::error::Result;
12use rayon::prelude::*;
13use std::path::{Path, PathBuf};
14use std::process::Command;
15use walkdir::WalkDir;
16
17/// Git repository analyzer
18pub struct GitAnalyzer {
19    /// Minimum .git size to report (default 100MB)
20    pub min_git_size: u64,
21    /// Minimum loose objects to suggest gc
22    pub min_loose_objects: usize,
23}
24
25impl Default for GitAnalyzer {
26    fn default() -> Self {
27        Self {
28            min_git_size: 100_000_000, // 100MB
29            min_loose_objects: 1000,
30        }
31    }
32}
33
34/// Information about a git repository
35#[derive(Debug, Clone)]
36pub struct GitRepoInfo {
37    /// Path to the repository root
38    pub path: PathBuf,
39    /// Size of .git directory
40    pub git_size: u64,
41    /// Number of loose objects
42    pub loose_objects: usize,
43    /// Number of pack files
44    pub pack_count: usize,
45    /// Size of pack files
46    pub pack_size: u64,
47    /// Whether gc would help
48    pub gc_recommended: bool,
49    /// Estimated savings from gc
50    pub estimated_savings: u64,
51    /// Last commit date (if available)
52    pub last_commit: Option<String>,
53}
54
55impl GitAnalyzer {
56    /// Create a new git analyzer
57    pub fn new() -> Self {
58        Self::default()
59    }
60
61    /// Scan a directory for git repositories and analyze them
62    pub fn scan(&self, root: &Path, max_depth: usize) -> Result<Vec<Recommendation>> {
63        let repos = self.find_git_repos(root, max_depth)?;
64
65        let recommendations: Vec<Recommendation> = repos
66            .par_iter()
67            .filter_map(|repo_path| self.analyze_repo(repo_path).ok())
68            .flatten()
69            .collect();
70
71        Ok(recommendations)
72    }
73
74    /// Find all .git directories under root
75    fn find_git_repos(&self, root: &Path, max_depth: usize) -> Result<Vec<PathBuf>> {
76        let mut repos = Vec::new();
77
78        for entry in WalkDir::new(root)
79            .max_depth(max_depth)
80            .follow_links(false)
81            .into_iter()
82            .filter_map(|e| e.ok())
83        {
84            let path = entry.path();
85
86            // Skip common non-project directories
87            let name = path.file_name().map(|n| n.to_string_lossy()).unwrap_or_default();
88            if name == "node_modules" || name == ".cargo" || name == "target" || name == "venv" {
89                continue;
90            }
91
92            if path.is_dir() && path.file_name().map(|n| n == ".git").unwrap_or(false) {
93                if let Some(parent) = path.parent() {
94                    repos.push(parent.to_path_buf());
95                }
96            }
97        }
98
99        Ok(repos)
100    }
101
102    /// Analyze a single git repository
103    fn analyze_repo(&self, repo_path: &Path) -> Result<Vec<Recommendation>> {
104        let mut recommendations = Vec::new();
105        let git_dir = repo_path.join(".git");
106
107        if !git_dir.exists() {
108            return Ok(recommendations);
109        }
110
111        // Calculate .git size
112        let (git_size, _) = calculate_dir_size(&git_dir)?;
113
114        if git_size < self.min_git_size {
115            return Ok(recommendations);
116        }
117
118        // Count loose objects
119        let objects_dir = git_dir.join("objects");
120        let (loose_count, loose_size) = self.count_loose_objects(&objects_dir);
121
122        // Count pack files
123        let pack_dir = objects_dir.join("pack");
124        let (pack_count, pack_size) = self.count_packs(&pack_dir);
125
126        // Check if gc would help
127        let gc_recommended = loose_count > self.min_loose_objects ||
128                            (loose_size > 50_000_000 && loose_count > 500);
129
130        // Estimate savings (loose objects can usually be compressed 50-80%)
131        let estimated_savings = if gc_recommended {
132            (loose_size as f64 * 0.6) as u64
133        } else {
134            0
135        };
136
137        // Get last commit info
138        let last_commit = self.get_last_commit_date(repo_path);
139
140        let _info = GitRepoInfo {
141            path: repo_path.to_path_buf(),
142            git_size,
143            loose_objects: loose_count,
144            pack_count,
145            pack_size,
146            gc_recommended,
147            estimated_savings,
148            last_commit: last_commit.clone(),
149        };
150
151        // Create recommendation for large .git
152        if git_size > self.min_git_size {
153            let title = format!(
154                "Large .git: {} ({})",
155                repo_path.file_name().unwrap_or_default().to_string_lossy(),
156                format_size(git_size)
157            );
158
159            let description = if gc_recommended {
160                format!(
161                    "{} loose objects ({} bytes). Running 'git gc' could save ~{}.",
162                    loose_count,
163                    format_size(loose_size),
164                    format_size(estimated_savings)
165                )
166            } else {
167                format!(
168                    "Large repository with {} pack files. Already well-packed.",
169                    pack_count
170                )
171            };
172
173            let fix_command = if gc_recommended {
174                Some(format!("cd {:?} && git gc --aggressive --prune=now", repo_path))
175            } else {
176                None
177            };
178
179            recommendations.push(Recommendation {
180                kind: RecommendationKind::GitOptimization,
181                title,
182                description,
183                path: repo_path.to_path_buf(),
184                potential_savings: estimated_savings,
185                fix_command,
186                risk: RiskLevel::None,
187            });
188        }
189
190        Ok(recommendations)
191    }
192
193    /// Count loose objects in objects directory
194    fn count_loose_objects(&self, objects_dir: &Path) -> (usize, u64) {
195        let mut count = 0;
196        let mut size = 0u64;
197
198        // Loose objects are in objects/XX/YYYYYYYY... subdirectories
199        if let Ok(entries) = std::fs::read_dir(objects_dir) {
200            for entry in entries.filter_map(|e| e.ok()) {
201                let name = entry.file_name();
202                let name_str = name.to_string_lossy();
203
204                // Skip pack and info directories
205                if name_str == "pack" || name_str == "info" {
206                    continue;
207                }
208
209                // Two-character hex directories contain loose objects
210                if name_str.len() == 2 && name_str.chars().all(|c| c.is_ascii_hexdigit()) {
211                    if let Ok(subentries) = std::fs::read_dir(entry.path()) {
212                        for subentry in subentries.filter_map(|e| e.ok()) {
213                            count += 1;
214                            if let Ok(meta) = subentry.metadata() {
215                                size += meta.len();
216                            }
217                        }
218                    }
219                }
220            }
221        }
222
223        (count, size)
224    }
225
226    /// Count pack files
227    fn count_packs(&self, pack_dir: &Path) -> (usize, u64) {
228        let mut count = 0;
229        let mut size = 0u64;
230
231        if let Ok(entries) = std::fs::read_dir(pack_dir) {
232            for entry in entries.filter_map(|e| e.ok()) {
233                let name = entry.file_name();
234                if name.to_string_lossy().ends_with(".pack") {
235                    count += 1;
236                    if let Ok(meta) = entry.metadata() {
237                        size += meta.len();
238                    }
239                }
240            }
241        }
242
243        (count, size)
244    }
245
246    /// Get last commit date
247    fn get_last_commit_date(&self, repo_path: &Path) -> Option<String> {
248        let output = Command::new("git")
249            .args(["log", "-1", "--format=%ci"])
250            .current_dir(repo_path)
251            .output()
252            .ok()?;
253
254        if output.status.success() {
255            let date = String::from_utf8_lossy(&output.stdout);
256            Some(date.trim().to_string())
257        } else {
258            None
259        }
260    }
261
262    /// Detect Git LFS cache
263    pub fn detect_lfs_cache(&self) -> Result<Vec<Recommendation>> {
264        let mut recommendations = Vec::new();
265        let home = dirs::home_dir().unwrap_or_default();
266
267        // Git LFS cache locations
268        let lfs_paths = [
269            home.join(".cache/git-lfs"),
270            home.join("Library/Caches/git-lfs"), // macOS
271        ];
272
273        for lfs_path in lfs_paths {
274            if !lfs_path.exists() {
275                continue;
276            }
277
278            let (size, file_count) = calculate_dir_size(&lfs_path)?;
279            if size < 100_000_000 {
280                continue;
281            }
282
283            recommendations.push(Recommendation {
284                kind: RecommendationKind::GitLfsCache,
285                title: format!("Git LFS Cache ({})", format_size(size)),
286                description: format!(
287                    "Git LFS cached files ({} files). Can be pruned if not actively using LFS.",
288                    file_count
289                ),
290                path: lfs_path,
291                potential_savings: size,
292                fix_command: Some("git lfs prune".to_string()),
293                risk: RiskLevel::Low,
294            });
295        }
296
297        Ok(recommendations)
298    }
299}
300
301/// Format bytes as human-readable size
302fn format_size(bytes: u64) -> String {
303    super::format_size(bytes)
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    #[test]
311    fn test_git_analyzer_creation() {
312        let analyzer = GitAnalyzer::new();
313        assert_eq!(analyzer.min_git_size, 100_000_000);
314    }
315
316    #[test]
317    fn test_git_scan() {
318        let analyzer = GitAnalyzer::new();
319        // Scan current directory with low depth
320        if let Ok(recommendations) = analyzer.scan(Path::new("."), 3) {
321            println!("Found {} git recommendations", recommendations.len());
322            for rec in &recommendations {
323                println!("  {} - {}", rec.title, rec.description);
324            }
325        }
326    }
327}