scribe_scanner/
git_integration.rs

1//! Git integration for enhanced file discovery and status tracking.
2//!
3//! This module provides comprehensive Git integration capabilities including:
4//! - Fast file discovery using `git ls-files`
5//! - File status tracking (modified, staged, untracked)
6//! - Commit history and blame information
7//! - Repository statistics and health metrics
8
9use scribe_core::{Result, ScribeError, GitStatus, GitFileStatus};
10use std::path::{Path, PathBuf};
11use std::process::Command;
12use std::collections::{HashMap, HashSet};
13use std::time::{SystemTime, UNIX_EPOCH};
14use std::cell::RefCell;
15use serde::{Serialize, Deserialize};
16use tokio::process::Command as AsyncCommand;
17
18/// Git repository integration handler
19#[derive(Debug)]
20pub struct GitIntegrator {
21    repo_path: PathBuf,
22    git_available: bool,
23    cache: GitCache,
24}
25
26/// Git file information
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct GitFileInfo {
29    pub path: PathBuf,
30    pub status: GitFileStatus,
31    pub last_commit: Option<GitCommitInfo>,
32    pub blame_info: Option<GitBlameInfo>,
33    pub changes_count: usize,
34    pub additions: usize,
35    pub deletions: usize,
36}
37
38/// Git commit information
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct GitCommitInfo {
41    pub hash: String,
42    pub author: String,
43    pub email: String,
44    pub timestamp: u64,
45    pub message: String,
46    pub files_changed: usize,
47}
48
49/// Git blame information for a file
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct GitBlameInfo {
52    pub lines: Vec<GitBlameLine>,
53    pub contributors: HashMap<String, usize>, // author -> line count
54    pub last_modified: u64,
55    pub age_distribution: AgeDistribution,
56}
57
58/// Individual line blame information
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct GitBlameLine {
61    pub line_number: usize,
62    pub commit_hash: String,
63    pub author: String,
64    pub timestamp: u64,
65    pub content: String,
66}
67
68/// Age distribution of code lines
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct AgeDistribution {
71    pub recent: usize,    // < 1 month
72    pub moderate: usize,  // 1-6 months
73    pub old: usize,       // 6-12 months
74    pub ancient: usize,   // > 1 year
75}
76
77/// Git repository statistics
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct GitRepositoryStats {
80    pub total_commits: usize,
81    pub contributors: Vec<ContributorStats>,
82    pub branches: Vec<String>,
83    pub tags: Vec<String>,
84    pub file_types: HashMap<String, usize>,
85    pub activity_timeline: Vec<ActivityPeriod>,
86    pub repository_health: RepositoryHealth,
87}
88
89/// Contributor statistics
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ContributorStats {
92    pub name: String,
93    pub email: String,
94    pub commits: usize,
95    pub lines_added: usize,
96    pub lines_deleted: usize,
97    pub files_modified: usize,
98    pub first_commit: u64,
99    pub last_commit: u64,
100}
101
102/// Activity period statistics
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct ActivityPeriod {
105    pub period: String, // e.g., "2024-01", "2024-W15"
106    pub commits: usize,
107    pub lines_changed: usize,
108    pub files_touched: usize,
109    pub contributors: HashSet<String>,
110}
111
112/// Repository health metrics
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct RepositoryHealth {
115    pub commit_frequency: f64,        // commits per day
116    pub contributor_diversity: f64,   // number of active contributors
117    pub code_churn: f64,             // lines changed / lines total
118    pub documentation_ratio: f64,     // docs files / code files
119    pub test_coverage_estimate: f64,  // test files / code files
120    pub branch_health: BranchHealth,
121}
122
123/// Branch health information
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct BranchHealth {
126    pub main_branch: String,
127    pub active_branches: usize,
128    pub stale_branches: usize,
129    pub merge_conflicts_risk: f64,
130}
131
132/// Git operations cache for performance
133#[derive(Debug)]
134struct GitCache {
135    file_statuses: RefCell<HashMap<PathBuf, GitFileStatus>>,
136    commit_cache: RefCell<HashMap<String, GitCommitInfo>>,
137    blame_cache: RefCell<HashMap<PathBuf, GitBlameInfo>>,
138    files_discovered: RefCell<usize>,
139    cache_timestamp: RefCell<Option<SystemTime>>,
140    cache_ttl: std::time::Duration,
141}
142
143impl Default for GitCache {
144    fn default() -> Self {
145        Self {
146            file_statuses: RefCell::new(HashMap::new()),
147            commit_cache: RefCell::new(HashMap::new()),
148            blame_cache: RefCell::new(HashMap::new()),
149            files_discovered: RefCell::new(0),
150            cache_timestamp: RefCell::new(None),
151            cache_ttl: std::time::Duration::from_secs(300),
152        }
153    }
154}
155
156impl GitIntegrator {
157    /// Create a new Git integrator for the given repository path
158    pub fn new<P: AsRef<Path>>(repo_path: P) -> Result<Self> {
159        let repo_path = repo_path.as_ref().to_path_buf();
160        
161        // Verify this is a Git repository
162        let git_dir = repo_path.join(".git");
163        if !git_dir.exists() {
164            return Err(ScribeError::git("Not a git repository".to_string()));
165        }
166
167        // Check if git command is available
168        let git_available = Command::new("git")
169            .arg("--version")
170            .output()
171            .map(|output| output.status.success())
172            .unwrap_or(false);
173
174        if !git_available {
175            log::warn!("Git command not available, falling back to filesystem scanning");
176        }
177
178        Ok(Self {
179            repo_path,
180            git_available,
181            cache: GitCache {
182                cache_ttl: std::time::Duration::from_secs(300), // 5 minutes
183                ..Default::default()
184            },
185        })
186    }
187
188    /// List all tracked files in the repository
189    pub async fn list_tracked_files(&self) -> Result<Vec<PathBuf>> {
190        if !self.git_available {
191            return Err(ScribeError::git("Git not available".to_string()));
192        }
193
194        let output = AsyncCommand::new("git")
195            .arg("ls-files")
196            .arg("-z") // null-separated output for safety
197            .current_dir(&self.repo_path)
198            .output()
199            .await
200            .map_err(|e| ScribeError::git(format!("Failed to run git ls-files: {}", e)))?;
201
202        if !output.status.success() {
203            let stderr = String::from_utf8_lossy(&output.stderr);
204            return Err(ScribeError::git(format!("git ls-files failed: {}", stderr)));
205        }
206
207        let stdout = String::from_utf8_lossy(&output.stdout);
208        let files: Vec<PathBuf> = stdout
209            .split('\0')
210            .filter(|s| !s.is_empty())
211            .map(|s| self.repo_path.join(s))
212            .collect();
213
214        // Update cache
215        *self.cache.files_discovered.borrow_mut() = files.len();
216        *self.cache.cache_timestamp.borrow_mut() = Some(SystemTime::now());
217
218        log::debug!("Git discovered {} tracked files", files.len());
219        Ok(files)
220    }
221
222    /// Get detailed file information including git status
223    pub async fn get_file_info(&self, file_path: &Path) -> Result<GitFileInfo> {
224        // Check cache first
225        if let Some(cached_status) = self.cache.file_statuses.borrow().get(file_path) {
226            if self.is_cache_valid() {
227                return Ok(GitFileInfo {
228                    path: file_path.to_path_buf(),
229                    status: cached_status.clone(),
230                    last_commit: None, // Would need to implement commit lookup
231                    blame_info: self.cache.blame_cache.borrow().get(file_path).cloned(),
232                    changes_count: 0,
233                    additions: 0,
234                    deletions: 0,
235                });
236            }
237        }
238
239        let status = self.get_file_status(file_path).await?;
240        let last_commit = self.get_last_commit_for_file(file_path).await.ok();
241        let blame_info = self.get_blame_info(file_path).await.ok();
242
243        // Get file change statistics
244        let (changes_count, additions, deletions) = self.get_file_change_stats(file_path).await
245            .unwrap_or((0, 0, 0));
246
247        // Cache the status and update timestamp
248        self.cache.file_statuses.borrow_mut().insert(file_path.to_path_buf(), status.clone());
249        *self.cache.cache_timestamp.borrow_mut() = Some(SystemTime::now());
250
251        Ok(GitFileInfo {
252            path: file_path.to_path_buf(),
253            status,
254            last_commit,
255            blame_info,
256            changes_count,
257            additions,
258            deletions,
259        })
260    }
261
262    /// Get the current status of a file
263    async fn get_file_status(&self, file_path: &Path) -> Result<GitFileStatus> {
264        if !self.git_available {
265            return Ok(GitFileStatus::Untracked);
266        }
267
268        let relative_path = file_path.strip_prefix(&self.repo_path)
269            .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
270
271        let output = AsyncCommand::new("git")
272            .arg("status")
273            .arg("--porcelain")
274            .arg(relative_path)
275            .current_dir(&self.repo_path)
276            .output()
277            .await
278            .map_err(|e| ScribeError::git(format!("Failed to get file status: {}", e)))?;
279
280        if !output.status.success() {
281            return Ok(GitFileStatus::Unmodified);
282        }
283
284        let stdout = String::from_utf8_lossy(&output.stdout);
285        let status = if stdout.is_empty() {
286            GitFileStatus::Unmodified
287        } else {
288            let status_code = stdout.chars().take(2).collect::<String>();
289            match status_code.as_str() {
290                " M" => GitFileStatus::Modified,
291                "M " => GitFileStatus::Modified,
292                "MM" => GitFileStatus::Modified, // Modified after staging
293                "A " => GitFileStatus::Added,
294                "D " => GitFileStatus::Deleted,
295                "R " => GitFileStatus::Renamed,
296                "C " => GitFileStatus::Copied,
297                "??" => GitFileStatus::Untracked,
298                "!!" => GitFileStatus::Ignored,
299                _ => GitFileStatus::Unmodified,
300            }
301        };
302
303        Ok(status)
304    }
305
306    /// Get the last commit information for a file
307    async fn get_last_commit_for_file(&self, file_path: &Path) -> Result<GitCommitInfo> {
308        if !self.git_available {
309            return Err(ScribeError::git("Git not available".to_string()));
310        }
311
312        let relative_path = file_path.strip_prefix(&self.repo_path)
313            .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
314
315        let output = AsyncCommand::new("git")
316            .arg("log")
317            .arg("-1")
318            .arg("--pretty=format:%H|%an|%ae|%at|%s|%H") // hash|author|email|timestamp|subject|hash_again
319            .arg("--")
320            .arg(relative_path)
321            .current_dir(&self.repo_path)
322            .output()
323            .await
324            .map_err(|e| ScribeError::git(format!("Failed to get commit info: {}", e)))?;
325
326        if !output.status.success() {
327            let stderr = String::from_utf8_lossy(&output.stderr);
328            return Err(ScribeError::git(format!("git log failed: {}", stderr)));
329        }
330
331        let stdout = String::from_utf8_lossy(&output.stdout);
332        let parts: Vec<&str> = stdout.trim().splitn(6, '|').collect();
333        
334        if parts.len() < 5 {
335            return Err(ScribeError::git("Invalid git log output".to_string()));
336        }
337
338        let timestamp = parts[3].parse::<u64>()
339            .map_err(|_| ScribeError::git("Invalid timestamp".to_string()))?;
340
341        Ok(GitCommitInfo {
342            hash: parts[0].to_string(),
343            author: parts[1].to_string(),
344            email: parts[2].to_string(),
345            timestamp,
346            message: parts[4].to_string(),
347            files_changed: 1, // Would need additional command to get accurate count
348        })
349    }
350
351    /// Get blame information for a file
352    async fn get_blame_info(&self, file_path: &Path) -> Result<GitBlameInfo> {
353        if !self.git_available {
354            return Err(ScribeError::git("Git not available".to_string()));
355        }
356
357        // Check cache first
358        if let Some(cached_blame) = self.cache.blame_cache.borrow().get(file_path) {
359            if self.is_cache_valid() {
360                return Ok(cached_blame.clone());
361            }
362        }
363
364        let relative_path = file_path.strip_prefix(&self.repo_path)
365            .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
366
367        let output = AsyncCommand::new("git")
368            .arg("blame")
369            .arg("--porcelain")
370            .arg(relative_path)
371            .current_dir(&self.repo_path)
372            .output()
373            .await
374            .map_err(|e| ScribeError::git(format!("Failed to get blame info: {}", e)))?;
375
376        if !output.status.success() {
377            let stderr = String::from_utf8_lossy(&output.stderr);
378            return Err(ScribeError::git(format!("git blame failed: {}", stderr)));
379        }
380
381        let stdout = String::from_utf8_lossy(&output.stdout);
382        let blame_info = self.parse_blame_output(&stdout)?;
383
384        Ok(blame_info)
385    }
386
387    /// Parse git blame porcelain output
388    fn parse_blame_output(&self, blame_output: &str) -> Result<GitBlameInfo> {
389        let mut lines = Vec::new();
390        let mut contributors = HashMap::new();
391        let mut last_modified = 0u64;
392        
393        let blame_lines: Vec<&str> = blame_output.lines().collect();
394        let mut i = 0;
395
396        while i < blame_lines.len() {
397            let line = blame_lines[i];
398            if line.is_empty() {
399                i += 1;
400                continue;
401            }
402
403            // Parse commit hash and line number from first line
404            let parts: Vec<&str> = line.split_whitespace().collect();
405            if parts.len() < 3 {
406                i += 1;
407                continue;
408            }
409
410            let commit_hash = parts[0].to_string();
411            let line_number = parts[2].parse::<usize>().unwrap_or(0);
412
413            // Parse additional information
414            let mut author = String::new();
415            let mut timestamp = 0u64;
416            let mut content = String::new();
417            
418            i += 1;
419            while i < blame_lines.len() {
420                let info_line = blame_lines[i];
421                if info_line.starts_with("author ") {
422                    author = info_line[7..].to_string();
423                } else if info_line.starts_with("author-time ") {
424                    timestamp = info_line[12..].parse().unwrap_or(0);
425                    last_modified = last_modified.max(timestamp);
426                } else if info_line.starts_with('\t') {
427                    content = info_line[1..].to_string();
428                    break;
429                }
430                i += 1;
431            }
432
433            // Count lines per author
434            *contributors.entry(author.clone()).or_insert(0) += 1;
435
436            lines.push(GitBlameLine {
437                line_number,
438                commit_hash,
439                author,
440                timestamp,
441                content,
442            });
443
444            i += 1;
445        }
446
447        // Calculate age distribution
448        let now = SystemTime::now()
449            .duration_since(UNIX_EPOCH)
450            .unwrap()
451            .as_secs();
452        
453        let mut age_distribution = AgeDistribution {
454            recent: 0,
455            moderate: 0,
456            old: 0,
457            ancient: 0,
458        };
459
460        for line in &lines {
461            let age_seconds = now.saturating_sub(line.timestamp);
462            let age_days = age_seconds / 86400; // seconds per day
463
464            match age_days {
465                0..=30 => age_distribution.recent += 1,
466                31..=180 => age_distribution.moderate += 1,
467                181..=365 => age_distribution.old += 1,
468                _ => age_distribution.ancient += 1,
469            }
470        }
471
472        Ok(GitBlameInfo {
473            lines,
474            contributors,
475            last_modified,
476            age_distribution,
477        })
478    }
479
480    /// Get file change statistics (additions/deletions count)
481    async fn get_file_change_stats(&self, file_path: &Path) -> Result<(usize, usize, usize)> {
482        if !self.git_available {
483            return Err(ScribeError::git("Git not available".to_string()));
484        }
485
486        let relative_path = file_path.strip_prefix(&self.repo_path)
487            .map_err(|_| ScribeError::git("File not in repository".to_string()))?;
488
489        let output = AsyncCommand::new("git")
490            .arg("log")
491            .arg("--numstat")
492            .arg("--pretty=format:")
493            .arg("--")
494            .arg(relative_path)
495            .current_dir(&self.repo_path)
496            .output()
497            .await
498            .map_err(|e| ScribeError::git(format!("Failed to get change stats: {}", e)))?;
499
500        if !output.status.success() {
501            return Ok((0, 0, 0));
502        }
503
504        let stdout = String::from_utf8_lossy(&output.stdout);
505        let mut total_changes = 0;
506        let mut total_additions = 0;
507        let mut total_deletions = 0;
508
509        for line in stdout.lines() {
510            if line.trim().is_empty() {
511                continue;
512            }
513
514            let parts: Vec<&str> = line.split_whitespace().collect();
515            if parts.len() >= 2 {
516                if let (Ok(additions), Ok(deletions)) = (parts[0].parse::<usize>(), parts[1].parse::<usize>()) {
517                    total_additions += additions;
518                    total_deletions += deletions;
519                    total_changes += 1;
520                }
521            }
522        }
523
524        Ok((total_changes, total_additions, total_deletions))
525    }
526
527    /// Get comprehensive repository statistics
528    pub async fn get_repository_stats(&self) -> Result<GitRepositoryStats> {
529        if !self.git_available {
530            return Err(ScribeError::git("Git not available".to_string()));
531        }
532
533        let (total_commits, contributors) = self.get_contributor_stats().await?;
534        let branches = self.get_branches().await?;
535        let tags = self.get_tags().await?;
536        let file_types = self.analyze_file_types().await?;
537        let activity_timeline = self.get_activity_timeline().await?;
538        let repository_health = self.calculate_repository_health(&contributors, &activity_timeline).await?;
539
540        Ok(GitRepositoryStats {
541            total_commits,
542            contributors,
543            branches,
544            tags,
545            file_types,
546            activity_timeline,
547            repository_health,
548        })
549    }
550
551    /// Get contributor statistics
552    async fn get_contributor_stats(&self) -> Result<(usize, Vec<ContributorStats>)> {
553        let output = AsyncCommand::new("git")
554            .arg("shortlog")
555            .arg("-sne")
556            .arg("--all")
557            .current_dir(&self.repo_path)
558            .output()
559            .await
560            .map_err(|e| ScribeError::git(format!("Failed to get contributors: {}", e)))?;
561
562        if !output.status.success() {
563            return Ok((0, vec![]));
564        }
565
566        let stdout = String::from_utf8_lossy(&output.stdout);
567        let mut contributors = Vec::new();
568        let mut total_commits = 0;
569
570        for line in stdout.lines() {
571            if let Some((count_str, name_email)) = line.trim().split_once('\t') {
572                if let Ok(commits) = count_str.trim().parse::<usize>() {
573                    total_commits += commits;
574                    
575                    // Parse name and email
576                    let (name, email) = if let Some((n, e)) = name_email.rsplit_once('<') {
577                        let email = e.trim_end_matches('>');
578                        (n.trim().to_string(), email.to_string())
579                    } else {
580                        (name_email.to_string(), String::new())
581                    };
582
583                    // Get additional stats for this contributor
584                    let (lines_added, lines_deleted, files_modified, first_commit, last_commit) = 
585                        self.get_detailed_contributor_stats(&email).await.unwrap_or((0, 0, 0, 0, 0));
586
587                    contributors.push(ContributorStats {
588                        name,
589                        email,
590                        commits,
591                        lines_added,
592                        lines_deleted,
593                        files_modified,
594                        first_commit,
595                        last_commit,
596                    });
597                }
598            }
599        }
600
601        // Sort by commit count descending
602        contributors.sort_by(|a, b| b.commits.cmp(&a.commits));
603
604        Ok((total_commits, contributors))
605    }
606
607    /// Get detailed statistics for a specific contributor
608    async fn get_detailed_contributor_stats(&self, email: &str) -> Result<(usize, usize, usize, u64, u64)> {
609        let output = AsyncCommand::new("git")
610            .arg("log")
611            .arg("--author")
612            .arg(email)
613            .arg("--numstat")
614            .arg("--pretty=format:%at")
615            .current_dir(&self.repo_path)
616            .output()
617            .await
618            .map_err(|e| ScribeError::git(format!("Failed to get detailed stats: {}", e)))?;
619
620        if !output.status.success() {
621            return Ok((0, 0, 0, 0, 0));
622        }
623
624        let stdout = String::from_utf8_lossy(&output.stdout);
625        let mut lines_added = 0;
626        let mut lines_deleted = 0;
627        let mut files_modified = 0;
628        let mut timestamps = Vec::new();
629
630        for line in stdout.lines() {
631            if line.trim().is_empty() {
632                continue;
633            }
634
635            // Check if it's a timestamp line
636            if let Ok(timestamp) = line.parse::<u64>() {
637                timestamps.push(timestamp);
638                continue;
639            }
640
641            // Check if it's a numstat line
642            let parts: Vec<&str> = line.split_whitespace().collect();
643            if parts.len() >= 3 {
644                if let (Ok(added), Ok(deleted)) = (parts[0].parse::<usize>(), parts[1].parse::<usize>()) {
645                    lines_added += added;
646                    lines_deleted += deleted;
647                    files_modified += 1;
648                }
649            }
650        }
651
652        let first_commit = timestamps.iter().min().copied().unwrap_or(0);
653        let last_commit = timestamps.iter().max().copied().unwrap_or(0);
654
655        Ok((lines_added, lines_deleted, files_modified, first_commit, last_commit))
656    }
657
658    /// Get list of branches
659    async fn get_branches(&self) -> Result<Vec<String>> {
660        let output = AsyncCommand::new("git")
661            .arg("branch")
662            .arg("-a")
663            .current_dir(&self.repo_path)
664            .output()
665            .await
666            .map_err(|e| ScribeError::git(format!("Failed to get branches: {}", e)))?;
667
668        if !output.status.success() {
669            return Ok(vec![]);
670        }
671
672        let stdout = String::from_utf8_lossy(&output.stdout);
673        let branches = stdout
674            .lines()
675            .map(|line| line.trim_start_matches("* ").trim())
676            .filter(|line| !line.is_empty())
677            .map(|line| line.to_string())
678            .collect();
679
680        Ok(branches)
681    }
682
683    /// Get list of tags
684    async fn get_tags(&self) -> Result<Vec<String>> {
685        let output = AsyncCommand::new("git")
686            .arg("tag")
687            .current_dir(&self.repo_path)
688            .output()
689            .await
690            .map_err(|e| ScribeError::git(format!("Failed to get tags: {}", e)))?;
691
692        if !output.status.success() {
693            return Ok(vec![]);
694        }
695
696        let stdout = String::from_utf8_lossy(&output.stdout);
697        let tags = stdout
698            .lines()
699            .filter(|line| !line.trim().is_empty())
700            .map(|line| line.trim().to_string())
701            .collect();
702
703        Ok(tags)
704    }
705
706    /// Analyze file types in the repository
707    async fn analyze_file_types(&self) -> Result<HashMap<String, usize>> {
708        let files = self.list_tracked_files().await?;
709        let mut file_types = HashMap::new();
710
711        for file in files {
712            if let Some(extension) = file.extension().and_then(|ext| ext.to_str()) {
713                *file_types.entry(extension.to_string()).or_insert(0) += 1;
714            } else {
715                *file_types.entry("no_extension".to_string()).or_insert(0) += 1;
716            }
717        }
718
719        Ok(file_types)
720    }
721
722    /// Get activity timeline
723    async fn get_activity_timeline(&self) -> Result<Vec<ActivityPeriod>> {
724        // This would implement more sophisticated timeline analysis
725        // For now, returning empty vector as placeholder
726        Ok(vec![])
727    }
728
729    /// Calculate repository health metrics
730    async fn calculate_repository_health(
731        &self,
732        contributors: &[ContributorStats],
733        activity_timeline: &[ActivityPeriod],
734    ) -> Result<RepositoryHealth> {
735        // Calculate basic health metrics
736        let commit_frequency = if !activity_timeline.is_empty() {
737            let total_commits: usize = activity_timeline.iter().map(|p| p.commits).sum();
738            total_commits as f64 / activity_timeline.len() as f64
739        } else {
740            0.0
741        };
742
743        let contributor_diversity = contributors.len() as f64;
744        
745        // Basic code churn calculation (would need more sophisticated analysis)
746        let total_added: usize = contributors.iter().map(|c| c.lines_added).sum();
747        let total_deleted: usize = contributors.iter().map(|c| c.lines_deleted).sum();
748        let code_churn = if total_added > 0 {
749            total_deleted as f64 / total_added as f64
750        } else {
751            0.0
752        };
753
754        // Placeholder values for other metrics
755        let documentation_ratio = 0.0;
756        let test_coverage_estimate = 0.0;
757
758        let branch_health = BranchHealth {
759            main_branch: "main".to_string(),
760            active_branches: 1,
761            stale_branches: 0,
762            merge_conflicts_risk: 0.0,
763        };
764
765        Ok(RepositoryHealth {
766            commit_frequency,
767            contributor_diversity,
768            code_churn,
769            documentation_ratio,
770            test_coverage_estimate,
771            branch_health,
772        })
773    }
774
775    /// Check if cache is still valid
776    fn is_cache_valid(&self) -> bool {
777        if let Some(cache_time) = *self.cache.cache_timestamp.borrow() {
778            SystemTime::now()
779                .duration_since(cache_time)
780                .map(|duration| duration < self.cache.cache_ttl)
781                .unwrap_or(false)
782        } else {
783            false
784        }
785    }
786
787    /// Clear all caches
788    pub fn clear_cache(&self) {
789        self.cache.file_statuses.borrow_mut().clear();
790        self.cache.commit_cache.borrow_mut().clear();
791        self.cache.blame_cache.borrow_mut().clear();
792        *self.cache.cache_timestamp.borrow_mut() = None;
793    }
794
795    /// Get number of files discovered through git
796    pub fn files_discovered(&self) -> usize {
797        *self.cache.files_discovered.borrow()
798    }
799
800    /// Check if git is available
801    pub fn is_git_available(&self) -> bool {
802        self.git_available
803    }
804
805    /// Get repository root path
806    pub fn repo_path(&self) -> &Path {
807        &self.repo_path
808    }
809}
810
811impl Default for AgeDistribution {
812    fn default() -> Self {
813        Self {
814            recent: 0,
815            moderate: 0,
816            old: 0,
817            ancient: 0,
818        }
819    }
820}
821
822#[cfg(test)]
823mod tests {
824    use super::*;
825    use tempfile::TempDir;
826    use std::fs;
827    use std::process::Command;
828
829    async fn create_test_git_repo() -> Result<TempDir> {
830        let temp_dir = TempDir::new().unwrap();
831        let repo_path = temp_dir.path();
832
833        // Initialize git repo
834        let output = Command::new("git")
835            .arg("init")
836            .current_dir(repo_path)
837            .output();
838
839        if output.is_err() || !output.unwrap().status.success() {
840            // Skip tests if git is not available
841            return Err(ScribeError::git("Git not available for testing".to_string()));
842        }
843
844        // Configure git for testing
845        Command::new("git")
846            .args(&["config", "user.name", "Test User"])
847            .current_dir(repo_path)
848            .output()
849            .unwrap();
850
851        Command::new("git")
852            .args(&["config", "user.email", "test@example.com"])
853            .current_dir(repo_path)
854            .output()
855            .unwrap();
856
857        // Create and commit a test file
858        let test_file = repo_path.join("test.rs");
859        fs::write(&test_file, "fn main() { println!(\"Hello, world!\"); }").unwrap();
860
861        Command::new("git")
862            .args(&["add", "test.rs"])
863            .current_dir(repo_path)
864            .output()
865            .unwrap();
866
867        Command::new("git")
868            .args(&["commit", "-m", "Initial commit"])
869            .current_dir(repo_path)
870            .output()
871            .unwrap();
872
873        Ok(temp_dir)
874    }
875
876    #[tokio::test]
877    async fn test_git_integrator_creation() {
878        if let Ok(temp_dir) = create_test_git_repo().await {
879            let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
880            assert!(integrator.is_git_available());
881            assert_eq!(integrator.repo_path(), temp_dir.path());
882        }
883    }
884
885    #[tokio::test]
886    async fn test_list_tracked_files() {
887        if let Ok(temp_dir) = create_test_git_repo().await {
888            let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
889            let files = integrator.list_tracked_files().await.unwrap();
890            
891            assert_eq!(files.len(), 1);
892            assert!(files[0].file_name().unwrap() == "test.rs");
893            assert_eq!(integrator.files_discovered(), 1);
894        }
895    }
896
897    #[tokio::test]
898    async fn test_get_file_info() {
899        if let Ok(temp_dir) = create_test_git_repo().await {
900            let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
901            let test_file = temp_dir.path().join("test.rs");
902            
903            let file_info = integrator.get_file_info(&test_file).await.unwrap();
904            
905            assert_eq!(file_info.path, test_file);
906            assert_eq!(file_info.status, GitFileStatus::Unmodified);
907            assert!(file_info.last_commit.is_some());
908        }
909    }
910
911    #[tokio::test]
912    async fn test_get_repository_stats() {
913        if let Ok(temp_dir) = create_test_git_repo().await {
914            let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
915            let stats = integrator.get_repository_stats().await.unwrap();
916            
917            assert!(stats.total_commits >= 1);
918            assert!(!stats.contributors.is_empty());
919            assert!(stats.contributors[0].name == "Test User");
920            assert!(stats.file_types.contains_key("rs"));
921        }
922    }
923
924    #[tokio::test]
925    async fn test_file_status_detection() {
926        if let Ok(temp_dir) = create_test_git_repo().await {
927            let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
928            let test_file = temp_dir.path().join("test.rs");
929            
930            // File should be tracked initially
931            let status = integrator.get_file_status(&test_file).await.unwrap();
932            assert_eq!(status, GitFileStatus::Unmodified);
933            
934            // Modify the file
935            fs::write(&test_file, "fn main() { println!(\"Modified!\"); }").unwrap();
936            
937            let status = integrator.get_file_status(&test_file).await.unwrap();
938            assert_eq!(status, GitFileStatus::Modified);
939            
940            // Create untracked file
941            let new_file = temp_dir.path().join("untracked.rs");
942            fs::write(&new_file, "// untracked").unwrap();
943            
944            let status = integrator.get_file_status(&new_file).await.unwrap();
945            assert_eq!(status, GitFileStatus::Untracked);
946        }
947    }
948
949    #[tokio::test]
950    async fn test_blame_info() {
951        if let Ok(temp_dir) = create_test_git_repo().await {
952            let integrator = GitIntegrator::new(temp_dir.path()).unwrap();
953            let test_file = temp_dir.path().join("test.rs");
954            
955            let blame_info = integrator.get_blame_info(&test_file).await.unwrap();
956            
957            assert_eq!(blame_info.lines.len(), 1);
958            assert!(!blame_info.contributors.is_empty());
959            assert!(blame_info.contributors.contains_key("Test User"));
960            assert!(blame_info.last_modified > 0);
961        }
962    }
963
964    #[test]
965    fn test_age_distribution_calculation() {
966        let now = SystemTime::now()
967            .duration_since(UNIX_EPOCH)
968            .unwrap()
969            .as_secs();
970        
971        let mut age_dist = AgeDistribution::default();
972        
973        // Simulate line ages
974        let recent_timestamp = now - (15 * 24 * 3600); // 15 days ago
975        let moderate_timestamp = now - (90 * 24 * 3600); // 90 days ago
976        let old_timestamp = now - (300 * 24 * 3600); // 300 days ago
977        let ancient_timestamp = now - (400 * 24 * 3600); // 400 days ago
978        
979        let timestamps = vec![recent_timestamp, moderate_timestamp, old_timestamp, ancient_timestamp];
980        
981        for timestamp in timestamps {
982            let age_seconds = now.saturating_sub(timestamp);
983            let age_days = age_seconds / 86400;
984            
985            match age_days {
986                0..=30 => age_dist.recent += 1,
987                31..=180 => age_dist.moderate += 1,
988                181..=365 => age_dist.old += 1,
989                _ => age_dist.ancient += 1,
990            }
991        }
992        
993        assert_eq!(age_dist.recent, 1);
994        assert_eq!(age_dist.moderate, 1);
995        assert_eq!(age_dist.old, 1);
996        assert_eq!(age_dist.ancient, 1);
997    }
998
999    #[tokio::test]
1000    async fn test_cache_functionality() {
1001        if let Ok(temp_dir) = create_test_git_repo().await {
1002            let mut integrator = GitIntegrator::new(temp_dir.path()).unwrap();
1003            let test_file = temp_dir.path().join("test.rs");
1004            
1005            // First call should populate cache
1006            let _ = integrator.get_file_info(&test_file).await.unwrap();
1007            assert!(integrator.is_cache_valid());
1008            
1009            // Clear cache
1010            integrator.clear_cache();
1011            assert!(!integrator.is_cache_valid());
1012        }
1013    }
1014}