1use std::collections::HashMap;
7use std::io::{BufRead, BufReader};
8use std::path::Path;
9use std::process::{Command, Stdio};
10
11use thiserror::Error;
12
13use crate::metrics::Volatility;
14
15#[derive(Error, Debug)]
17pub enum VolatilityError {
18 #[error("Failed to execute git command: {0}")]
19 GitCommand(#[from] std::io::Error),
20
21 #[error("Invalid UTF-8 in git output: {0}")]
22 InvalidUtf8(#[from] std::string::FromUtf8Error),
23
24 #[error("Not a git repository")]
25 NotGitRepo,
26}
27
28#[derive(Debug, Default)]
30pub struct VolatilityAnalyzer {
31 pub file_changes: HashMap<String, usize>,
33 pub period_months: usize,
35}
36
37impl VolatilityAnalyzer {
38 pub fn new(period_months: usize) -> Self {
40 Self {
41 file_changes: HashMap::new(),
42 period_months,
43 }
44 }
45
46 pub fn analyze(&mut self, repo_path: &Path) -> Result<(), VolatilityError> {
53 let git_check = Command::new("git")
55 .args(["rev-parse", "--git-dir"])
56 .current_dir(repo_path)
57 .stderr(Stdio::null())
58 .output()?;
59
60 if !git_check.status.success() {
61 return Err(VolatilityError::NotGitRepo);
62 }
63
64 let mut child = Command::new("git")
67 .args([
68 "log",
69 "--pretty=format:",
70 "--name-only",
71 "--diff-filter=AMRC",
72 &format!("--since={} months ago", self.period_months),
73 "--",
74 "*.rs",
75 ])
76 .current_dir(repo_path)
77 .stdout(Stdio::piped())
78 .stderr(Stdio::null())
79 .spawn()?;
80
81 if let Some(stdout) = child.stdout.take() {
83 let reader = BufReader::with_capacity(64 * 1024, stdout); for line in reader.lines() {
86 let line = match line {
87 Ok(l) => l,
88 Err(_) => continue,
89 };
90
91 let line = line.trim();
92 if !line.is_empty() && line.ends_with(".rs") {
93 *self.file_changes.entry(line.to_string()).or_insert(0) += 1;
94 }
95 }
96 }
97
98 let _ = child.wait();
100
101 Ok(())
102 }
103
104 pub fn get_volatility(&self, file_path: &str) -> Volatility {
106 let count = self.file_changes.get(file_path).copied().unwrap_or(0);
107 Volatility::from_count(count)
108 }
109
110 pub fn get_change_count(&self, file_path: &str) -> usize {
112 self.file_changes.get(file_path).copied().unwrap_or(0)
113 }
114
115 pub fn high_volatility_files(&self) -> Vec<(&String, usize)> {
117 self.file_changes
118 .iter()
119 .filter(|&(_, count)| *count > 10)
120 .map(|(path, count)| (path, *count))
121 .collect()
122 }
123
124 pub fn analyze_temporal_coupling(
131 &self,
132 repo_path: &Path,
133 ) -> Result<Vec<TemporalCoupling>, VolatilityError> {
134 let mut child = Command::new("git")
136 .args([
137 "log",
138 "--pretty=format:__COMMIT__",
139 "--name-only",
140 "--diff-filter=AMRC",
141 &format!("--since={} months ago", self.period_months),
142 "--",
143 "*.rs",
144 ])
145 .current_dir(repo_path)
146 .stdout(Stdio::piped())
147 .stderr(Stdio::null())
148 .spawn()?;
149
150 let mut commits: Vec<Vec<String>> = Vec::new();
151 let mut current_files: Vec<String> = Vec::new();
152
153 if let Some(stdout) = child.stdout.take() {
154 let reader = BufReader::with_capacity(64 * 1024, stdout);
155 for line in reader.lines() {
156 let line = match line {
157 Ok(l) => l,
158 Err(_) => continue,
159 };
160 let trimmed = line.trim();
161 if trimmed == "__COMMIT__" {
162 if current_files.len() >= 2 {
163 commits.push(std::mem::take(&mut current_files));
164 } else {
165 current_files.clear();
166 }
167 } else if !trimmed.is_empty() && trimmed.ends_with(".rs") {
168 current_files.push(trimmed.to_string());
169 }
170 }
171 if current_files.len() >= 2 {
173 commits.push(current_files);
174 }
175 }
176
177 let _ = child.wait();
178
179 const MAX_FILES_PER_COMMIT: usize = 50;
183 let mut pair_counts: HashMap<(String, String), usize> = HashMap::new();
184 for files in &commits {
185 if files.len() > MAX_FILES_PER_COMMIT {
186 continue;
187 }
188 for i in 0..files.len() {
189 for j in (i + 1)..files.len() {
190 let (a, b) = if files[i] < files[j] {
191 (files[i].clone(), files[j].clone())
192 } else {
193 (files[j].clone(), files[i].clone())
194 };
195 *pair_counts.entry((a, b)).or_default() += 1;
196 }
197 }
198 }
199
200 let mut result: Vec<TemporalCoupling> = pair_counts
202 .into_iter()
203 .filter(|(_, count)| *count >= 3)
204 .map(|((file_a, file_b), count)| {
205 let total_a = self.file_changes.get(&file_a).copied().unwrap_or(1);
206 let total_b = self.file_changes.get(&file_b).copied().unwrap_or(1);
207 let coupling_ratio = count as f64 / total_a.min(total_b).max(1) as f64;
208 TemporalCoupling {
209 file_a,
210 file_b,
211 co_change_count: count,
212 coupling_ratio: coupling_ratio.min(1.0),
213 }
214 })
215 .collect();
216
217 result.sort_by(|a, b| {
218 b.co_change_count.cmp(&a.co_change_count).then(
219 b.coupling_ratio
220 .partial_cmp(&a.coupling_ratio)
221 .unwrap_or(std::cmp::Ordering::Equal),
222 )
223 });
224 Ok(result)
225 }
226
227 pub fn statistics(&self) -> VolatilityStats {
229 if self.file_changes.is_empty() {
230 return VolatilityStats::default();
231 }
232
233 let counts: Vec<usize> = self.file_changes.values().copied().collect();
234 let total: usize = counts.iter().sum();
235 let max = counts.iter().max().copied().unwrap_or(0);
236 let min = counts.iter().min().copied().unwrap_or(0);
237 let avg = total as f64 / counts.len() as f64;
238
239 let low_count = counts.iter().filter(|&&c| c <= 2).count();
240 let medium_count = counts.iter().filter(|&&c| c > 2 && c <= 10).count();
241 let high_count = counts.iter().filter(|&&c| c > 10).count();
242
243 VolatilityStats {
244 total_files: counts.len(),
245 total_changes: total,
246 max_changes: max,
247 min_changes: min,
248 avg_changes: avg,
249 low_volatility_count: low_count,
250 medium_volatility_count: medium_count,
251 high_volatility_count: high_count,
252 }
253 }
254}
255
256#[derive(Debug, Clone)]
261pub struct TemporalCoupling {
262 pub file_a: String,
264 pub file_b: String,
266 pub co_change_count: usize,
268 pub coupling_ratio: f64,
270}
271
272impl TemporalCoupling {
273 pub fn is_strong(&self) -> bool {
275 self.coupling_ratio >= 0.5
276 }
277}
278
279#[derive(Debug, Default)]
281pub struct VolatilityStats {
282 pub total_files: usize,
283 pub total_changes: usize,
284 pub max_changes: usize,
285 pub min_changes: usize,
286 pub avg_changes: f64,
287 pub low_volatility_count: usize,
288 pub medium_volatility_count: usize,
289 pub high_volatility_count: usize,
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295
296 #[test]
297 fn test_volatility_classification() {
298 let mut analyzer = VolatilityAnalyzer::new(6);
299 analyzer.file_changes.insert("stable.rs".to_string(), 1);
300 analyzer.file_changes.insert("moderate.rs".to_string(), 5);
301 analyzer.file_changes.insert("volatile.rs".to_string(), 15);
302
303 assert_eq!(analyzer.get_volatility("stable.rs"), Volatility::Low);
304 assert_eq!(analyzer.get_volatility("moderate.rs"), Volatility::Medium);
305 assert_eq!(analyzer.get_volatility("volatile.rs"), Volatility::High);
306 assert_eq!(analyzer.get_volatility("unknown.rs"), Volatility::Low);
307 }
308
309 #[test]
310 fn test_high_volatility_files() {
311 let mut analyzer = VolatilityAnalyzer::new(6);
312 analyzer.file_changes.insert("stable.rs".to_string(), 2);
313 analyzer.file_changes.insert("volatile.rs".to_string(), 15);
314 analyzer
315 .file_changes
316 .insert("very_volatile.rs".to_string(), 25);
317
318 let high_vol = analyzer.high_volatility_files();
319 assert_eq!(high_vol.len(), 2);
320 }
321
322 #[test]
323 fn test_statistics() {
324 let mut analyzer = VolatilityAnalyzer::new(6);
325 analyzer.file_changes.insert("a.rs".to_string(), 1);
326 analyzer.file_changes.insert("b.rs".to_string(), 5);
327 analyzer.file_changes.insert("c.rs".to_string(), 15);
328
329 let stats = analyzer.statistics();
330 assert_eq!(stats.total_files, 3);
331 assert_eq!(stats.total_changes, 21);
332 assert_eq!(stats.max_changes, 15);
333 assert_eq!(stats.min_changes, 1);
334 assert_eq!(stats.low_volatility_count, 1);
335 assert_eq!(stats.medium_volatility_count, 1);
336 assert_eq!(stats.high_volatility_count, 1);
337 }
338
339 #[test]
340 fn test_temporal_coupling_is_strong() {
341 let strong = TemporalCoupling {
342 file_a: "a.rs".to_string(),
343 file_b: "b.rs".to_string(),
344 co_change_count: 10,
345 coupling_ratio: 0.8,
346 };
347 assert!(strong.is_strong());
348
349 let exactly_threshold = TemporalCoupling {
350 file_a: "a.rs".to_string(),
351 file_b: "b.rs".to_string(),
352 co_change_count: 5,
353 coupling_ratio: 0.5,
354 };
355 assert!(exactly_threshold.is_strong());
356
357 let weak = TemporalCoupling {
358 file_a: "a.rs".to_string(),
359 file_b: "b.rs".to_string(),
360 co_change_count: 3,
361 coupling_ratio: 0.3,
362 };
363 assert!(!weak.is_strong());
364 }
365}