cargo_coupling/
volatility.rs

1//! Git history analysis for volatility measurement
2//!
3//! Analyzes git log to determine how frequently files change.
4//! Optimized for large repositories using streaming and git path filtering.
5
6use std::collections::HashMap;
7use std::io::{BufRead, BufReader};
8use std::path::Path;
9use std::process::{Command, Stdio};
10
11use thiserror::Error;
12
13use crate::metrics::Volatility;
14
15/// Errors that can occur during volatility analysis
16#[derive(Error, Debug)]
17pub enum VolatilityError {
18    #[error("Failed to execute git command: {0}")]
19    GitCommand(#[from] std::io::Error),
20
21    #[error("Invalid UTF-8 in git output: {0}")]
22    InvalidUtf8(#[from] std::string::FromUtf8Error),
23
24    #[error("Not a git repository")]
25    NotGitRepo,
26}
27
28/// Volatility analyzer using git history
29#[derive(Debug, Default)]
30pub struct VolatilityAnalyzer {
31    /// File path -> change count
32    pub file_changes: HashMap<String, usize>,
33    /// Analysis period in months
34    pub period_months: usize,
35}
36
37impl VolatilityAnalyzer {
38    /// Create a new volatility analyzer
39    pub fn new(period_months: usize) -> Self {
40        Self {
41            file_changes: HashMap::new(),
42            period_months,
43        }
44    }
45
46    /// Analyze git history for a repository (optimized version)
47    ///
48    /// Optimizations applied:
49    /// 1. Use `-- "*.rs"` to filter .rs files at git level
50    /// 2. Use streaming with BufReader instead of loading all into memory
51    /// 3. Use `--diff-filter=AMRC` to skip deleted files
52    pub fn analyze(&mut self, repo_path: &Path) -> Result<(), VolatilityError> {
53        // Check if it's a git repo
54        let git_check = Command::new("git")
55            .args(["rev-parse", "--git-dir"])
56            .current_dir(repo_path)
57            .stderr(Stdio::null())
58            .output()?;
59
60        if !git_check.status.success() {
61            return Err(VolatilityError::NotGitRepo);
62        }
63
64        // Optimized: use --diff-filter and path spec to reduce output
65        // --diff-filter=AMRC: Added, Modified, Renamed, Copied (skip Deleted)
66        let mut child = Command::new("git")
67            .args([
68                "log",
69                "--pretty=format:",
70                "--name-only",
71                "--diff-filter=AMRC",
72                &format!("--since={} months ago", self.period_months),
73                "--",
74                "*.rs",
75            ])
76            .current_dir(repo_path)
77            .stdout(Stdio::piped())
78            .stderr(Stdio::null())
79            .spawn()?;
80
81        // Stream processing with BufReader
82        if let Some(stdout) = child.stdout.take() {
83            let reader = BufReader::with_capacity(64 * 1024, stdout); // 64KB buffer
84
85            for line in reader.lines() {
86                let line = match line {
87                    Ok(l) => l,
88                    Err(_) => continue,
89                };
90
91                let line = line.trim();
92                if !line.is_empty() && line.ends_with(".rs") {
93                    *self.file_changes.entry(line.to_string()).or_insert(0) += 1;
94                }
95            }
96        }
97
98        // Wait for git to finish
99        let _ = child.wait();
100
101        Ok(())
102    }
103
104    /// Get volatility level for a file
105    pub fn get_volatility(&self, file_path: &str) -> Volatility {
106        let count = self.file_changes.get(file_path).copied().unwrap_or(0);
107        Volatility::from_count(count)
108    }
109
110    /// Get change count for a file
111    pub fn get_change_count(&self, file_path: &str) -> usize {
112        self.file_changes.get(file_path).copied().unwrap_or(0)
113    }
114
115    /// Get all high volatility files
116    pub fn high_volatility_files(&self) -> Vec<(&String, usize)> {
117        self.file_changes
118            .iter()
119            .filter(|&(_, count)| *count > 10)
120            .map(|(path, count)| (path, *count))
121            .collect()
122    }
123
124    /// Get volatility statistics
125    pub fn statistics(&self) -> VolatilityStats {
126        if self.file_changes.is_empty() {
127            return VolatilityStats::default();
128        }
129
130        let counts: Vec<usize> = self.file_changes.values().copied().collect();
131        let total: usize = counts.iter().sum();
132        let max = counts.iter().max().copied().unwrap_or(0);
133        let min = counts.iter().min().copied().unwrap_or(0);
134        let avg = total as f64 / counts.len() as f64;
135
136        let low_count = counts.iter().filter(|&&c| c <= 2).count();
137        let medium_count = counts.iter().filter(|&&c| c > 2 && c <= 10).count();
138        let high_count = counts.iter().filter(|&&c| c > 10).count();
139
140        VolatilityStats {
141            total_files: counts.len(),
142            total_changes: total,
143            max_changes: max,
144            min_changes: min,
145            avg_changes: avg,
146            low_volatility_count: low_count,
147            medium_volatility_count: medium_count,
148            high_volatility_count: high_count,
149        }
150    }
151}
152
153/// Statistics about volatility across the project
154#[derive(Debug, Default)]
155pub struct VolatilityStats {
156    pub total_files: usize,
157    pub total_changes: usize,
158    pub max_changes: usize,
159    pub min_changes: usize,
160    pub avg_changes: f64,
161    pub low_volatility_count: usize,
162    pub medium_volatility_count: usize,
163    pub high_volatility_count: usize,
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169
170    #[test]
171    fn test_volatility_classification() {
172        let mut analyzer = VolatilityAnalyzer::new(6);
173        analyzer.file_changes.insert("stable.rs".to_string(), 1);
174        analyzer.file_changes.insert("moderate.rs".to_string(), 5);
175        analyzer.file_changes.insert("volatile.rs".to_string(), 15);
176
177        assert_eq!(analyzer.get_volatility("stable.rs"), Volatility::Low);
178        assert_eq!(analyzer.get_volatility("moderate.rs"), Volatility::Medium);
179        assert_eq!(analyzer.get_volatility("volatile.rs"), Volatility::High);
180        assert_eq!(analyzer.get_volatility("unknown.rs"), Volatility::Low);
181    }
182
183    #[test]
184    fn test_high_volatility_files() {
185        let mut analyzer = VolatilityAnalyzer::new(6);
186        analyzer.file_changes.insert("stable.rs".to_string(), 2);
187        analyzer.file_changes.insert("volatile.rs".to_string(), 15);
188        analyzer
189            .file_changes
190            .insert("very_volatile.rs".to_string(), 25);
191
192        let high_vol = analyzer.high_volatility_files();
193        assert_eq!(high_vol.len(), 2);
194    }
195
196    #[test]
197    fn test_statistics() {
198        let mut analyzer = VolatilityAnalyzer::new(6);
199        analyzer.file_changes.insert("a.rs".to_string(), 1);
200        analyzer.file_changes.insert("b.rs".to_string(), 5);
201        analyzer.file_changes.insert("c.rs".to_string(), 15);
202
203        let stats = analyzer.statistics();
204        assert_eq!(stats.total_files, 3);
205        assert_eq!(stats.total_changes, 21);
206        assert_eq!(stats.max_changes, 15);
207        assert_eq!(stats.min_changes, 1);
208        assert_eq!(stats.low_volatility_count, 1);
209        assert_eq!(stats.medium_volatility_count, 1);
210        assert_eq!(stats.high_volatility_count, 1);
211    }
212}