Skip to main content

codelens_engine/
coupling.rs

1use crate::project::ProjectRoot;
2use anyhow::Result;
3use serde::Serialize;
4use std::collections::HashMap;
5use std::process::Command;
6
7#[derive(Debug, Clone, Serialize)]
8pub struct CouplingEntry {
9    pub file_a: String,
10    pub file_b: String,
11    pub co_changes: usize,
12    pub total_changes_a: usize,
13    pub total_changes_b: usize,
14    pub strength: f64,
15}
16
17/// Analyze git history to find files that frequently change together.
18pub fn get_change_coupling(
19    project: &ProjectRoot,
20    months: usize,
21    min_strength: f64,
22    min_commits: usize,
23    max_results: usize,
24) -> Result<Vec<CouplingEntry>> {
25    let since = format!("{months} months ago");
26    let output = Command::new("git")
27        .args([
28            "log",
29            "--name-only",
30            "--pretty=format:---COMMIT---",
31            &format!("--since={since}"),
32        ])
33        .current_dir(project.as_path())
34        .output();
35
36    let output = match output {
37        Ok(out) => out,
38        Err(_) => return Ok(Vec::new()),
39    };
40
41    if !output.status.success() {
42        return Ok(Vec::new());
43    }
44
45    let text = String::from_utf8_lossy(&output.stdout);
46    let commits = parse_commits(&text);
47
48    // Count individual file changes and co-changes
49    let mut total_changes: HashMap<String, usize> = HashMap::new();
50    let mut co_changes: HashMap<(String, String), usize> = HashMap::new();
51
52    for files in &commits {
53        for file in files {
54            *total_changes.entry(file.clone()).or_insert(0) += 1;
55        }
56        // For all pairs in this commit (sorted to avoid double-counting)
57        for i in 0..files.len() {
58            for j in (i + 1)..files.len() {
59                let a = files[i].clone();
60                let b = files[j].clone();
61                let key = if a <= b { (a, b) } else { (b, a) };
62                *co_changes.entry(key).or_insert(0) += 1;
63            }
64        }
65    }
66
67    let mut entries: Vec<CouplingEntry> = co_changes
68        .into_iter()
69        .filter_map(|((file_a, file_b), count)| {
70            if count < min_commits {
71                return None;
72            }
73            let total_a = *total_changes.get(&file_a).unwrap_or(&1);
74            let total_b = *total_changes.get(&file_b).unwrap_or(&1);
75            // Jaccard-like: co_changes / max(total_a, total_b).
76            // max() avoids inflating scores for rarely-changed files.
77            let strength = count as f64 / total_a.max(total_b) as f64;
78            if strength < min_strength {
79                return None;
80            }
81            Some(CouplingEntry {
82                file_a,
83                file_b,
84                co_changes: count,
85                total_changes_a: total_a,
86                total_changes_b: total_b,
87                strength,
88            })
89        })
90        .collect();
91
92    entries.sort_by(|a, b| {
93        b.strength
94            .total_cmp(&a.strength)
95            .then(a.file_a.cmp(&b.file_a))
96    });
97
98    if max_results > 0 && entries.len() > max_results {
99        entries.truncate(max_results);
100    }
101
102    Ok(entries)
103}
104
105fn parse_commits(text: &str) -> Vec<Vec<String>> {
106    let mut commits = Vec::new();
107    let mut current: Vec<String> = Vec::new();
108
109    for line in text.lines() {
110        let trimmed = line.trim();
111        if trimmed == "---COMMIT---" {
112            if !current.is_empty() {
113                commits.push(std::mem::take(&mut current));
114            }
115        } else if !trimmed.is_empty() {
116            current.push(trimmed.to_owned());
117        }
118    }
119    if !current.is_empty() {
120        commits.push(current);
121    }
122
123    commits
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129
130    #[test]
131    fn parses_commits_correctly() {
132        let text = "---COMMIT---\nfoo.rs\nbar.rs\n---COMMIT---\nfoo.rs\nbaz.rs\n";
133        let commits = parse_commits(text);
134        assert_eq!(commits.len(), 2);
135        assert_eq!(commits[0], vec!["foo.rs", "bar.rs"]);
136        assert_eq!(commits[1], vec!["foo.rs", "baz.rs"]);
137    }
138
139    #[test]
140    fn empty_git_output_returns_empty() {
141        let commits = parse_commits("");
142        assert!(commits.is_empty());
143    }
144
145    #[test]
146    fn calculates_coupling_from_parsed_commits() {
147        // Simulate what get_change_coupling would calculate
148        let commits = vec![
149            vec!["a.rs".to_owned(), "b.rs".to_owned()],
150            vec!["a.rs".to_owned(), "b.rs".to_owned()],
151            vec!["a.rs".to_owned(), "b.rs".to_owned()],
152            vec!["a.rs".to_owned(), "c.rs".to_owned()],
153        ];
154
155        let mut total_changes: HashMap<String, usize> = HashMap::new();
156        let mut co_changes: HashMap<(String, String), usize> = HashMap::new();
157
158        for files in &commits {
159            for file in files {
160                *total_changes.entry(file.clone()).or_insert(0) += 1;
161            }
162            for i in 0..files.len() {
163                for j in (i + 1)..files.len() {
164                    let a = files[i].clone();
165                    let b = files[j].clone();
166                    let key = if a <= b { (a, b) } else { (b, a) };
167                    *co_changes.entry(key).or_insert(0) += 1;
168                }
169            }
170        }
171
172        let ab_count = co_changes[&("a.rs".to_owned(), "b.rs".to_owned())];
173        assert_eq!(ab_count, 3);
174        assert_eq!(total_changes["a.rs"], 4);
175        let strength = ab_count as f64 / total_changes["a.rs"].max(total_changes["b.rs"]) as f64;
176        assert!((strength - 0.75).abs() < 1e-9);
177    }
178}