argus_gitpulse/
coupling.rs1use std::collections::HashMap;
7
8use argus_core::ArgusError;
9use serde::{Deserialize, Serialize};
10
11use crate::mining::CommitInfo;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(rename_all = "camelCase")]
32pub struct CoupledPair {
33 pub file_a: String,
35 pub file_b: String,
37 pub co_changes: u32,
39 pub coupling_degree: f64,
41 pub changes_a: u32,
43 pub changes_b: u32,
45}
46
47pub fn detect_coupling(
80 commits: &[CommitInfo],
81 min_coupling: f64,
82 min_co_changes: u32,
83) -> Result<Vec<CoupledPair>, ArgusError> {
84 let mut file_changes: HashMap<String, u32> = HashMap::new();
86 let mut co_changes: HashMap<(String, String), u32> = HashMap::new();
88
89 for commit in commits {
90 let files: Vec<&str> = commit
91 .files_changed
92 .iter()
93 .map(|f| f.path.as_str())
94 .collect();
95 let unique_files: Vec<&str> = {
96 let mut seen = std::collections::HashSet::new();
97 let mut unique = Vec::new();
98 for f in &files {
99 if seen.insert(*f) {
100 unique.push(*f);
101 }
102 }
103 unique
104 };
105
106 for file in &unique_files {
108 *file_changes.entry((*file).to_string()).or_default() += 1;
109 }
110
111 for i in 0..unique_files.len() {
113 for j in (i + 1)..unique_files.len() {
114 let key = normalize_pair(unique_files[i], unique_files[j]);
115 *co_changes.entry(key).or_default() += 1;
116 }
117 }
118 }
119
120 let mut pairs = Vec::new();
122 for ((file_a, file_b), co_count) in &co_changes {
123 if *co_count < min_co_changes {
124 continue;
125 }
126
127 let changes_a = file_changes.get(file_a).copied().unwrap_or(0);
128 let changes_b = file_changes.get(file_b).copied().unwrap_or(0);
129 let max_changes = changes_a.max(changes_b);
130
131 if max_changes == 0 {
132 continue;
133 }
134
135 let coupling_degree = *co_count as f64 / max_changes as f64;
136
137 if coupling_degree < min_coupling {
138 continue;
139 }
140
141 pairs.push(CoupledPair {
142 file_a: file_a.clone(),
143 file_b: file_b.clone(),
144 co_changes: *co_count,
145 coupling_degree,
146 changes_a,
147 changes_b,
148 });
149 }
150
151 pairs.sort_by(|a, b| {
152 b.coupling_degree
153 .partial_cmp(&a.coupling_degree)
154 .unwrap_or(std::cmp::Ordering::Equal)
155 });
156
157 Ok(pairs)
158}
159
160fn normalize_pair(a: &str, b: &str) -> (String, String) {
161 if a <= b {
162 (a.to_string(), b.to_string())
163 } else {
164 (b.to_string(), a.to_string())
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use crate::mining::{ChangeStatus, FileChange};
172
173 fn make_commit(files: Vec<&str>) -> CommitInfo {
174 CommitInfo {
175 hash: "abc".into(),
176 author: "alice".into(),
177 email: "alice@example.com".into(),
178 timestamp: 1000,
179 message: "test".into(),
180 files_changed: files
181 .into_iter()
182 .map(|path| FileChange {
183 path: path.into(),
184 lines_added: 5,
185 lines_deleted: 2,
186 status: ChangeStatus::Modified,
187 })
188 .collect(),
189 }
190 }
191
192 #[test]
193 fn files_always_changed_together_have_coupling_1() {
194 let commits = vec![
195 make_commit(vec!["a.rs", "b.rs"]),
196 make_commit(vec!["a.rs", "b.rs"]),
197 make_commit(vec!["a.rs", "b.rs"]),
198 ];
199
200 let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
201 assert_eq!(pairs.len(), 1);
202 assert!((pairs[0].coupling_degree - 1.0).abs() < f64::EPSILON);
203 assert_eq!(pairs[0].co_changes, 3);
204 }
205
206 #[test]
207 fn files_never_changed_together_not_in_results() {
208 let commits = vec![make_commit(vec!["a.rs"]), make_commit(vec!["b.rs"])];
209
210 let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
211 assert!(pairs.is_empty());
212 }
213
214 #[test]
215 fn min_coupling_filter_works() {
216 let commits = vec![
217 make_commit(vec!["a.rs", "b.rs"]),
218 make_commit(vec!["a.rs"]),
219 make_commit(vec!["a.rs"]),
220 ];
221
222 let pairs_low = detect_coupling(&commits, 0.3, 1).unwrap();
224 assert_eq!(pairs_low.len(), 1);
225
226 let pairs_high = detect_coupling(&commits, 0.5, 1).unwrap();
227 assert!(pairs_high.is_empty());
228 }
229
230 #[test]
231 fn min_co_changes_filter_works() {
232 let commits = vec![make_commit(vec!["a.rs", "b.rs"])];
233
234 let pairs = detect_coupling(&commits, 0.0, 2).unwrap();
235 assert!(pairs.is_empty(), "need at least 2 co-changes");
236
237 let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
238 assert_eq!(pairs.len(), 1);
239 }
240
241 #[test]
242 fn pair_normalization_treats_ab_same_as_ba() {
243 let commits = vec![
245 make_commit(vec!["z.rs", "a.rs"]),
246 make_commit(vec!["a.rs", "z.rs"]),
247 ];
248
249 let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
250 assert_eq!(pairs.len(), 1);
251 assert_eq!(pairs[0].file_a, "a.rs");
253 assert_eq!(pairs[0].file_b, "z.rs");
254 assert_eq!(pairs[0].co_changes, 2);
255 }
256}