Skip to main content

argus_gitpulse/
coupling.rs

1//! Temporal coupling detection.
2//!
3//! Identifies pairs of files that frequently change together in commits,
4//! which may indicate hidden dependencies or architectural coupling.
5
6use std::collections::HashMap;
7
8use argus_core::ArgusError;
9use serde::{Deserialize, Serialize};
10
11use crate::mining::CommitInfo;
12
13/// A pair of files that frequently change together.
14///
15/// # Examples
16///
17/// ```
18/// use argus_gitpulse::coupling::CoupledPair;
19///
20/// let pair = CoupledPair {
21///     file_a: "src/auth.rs".into(),
22///     file_b: "src/session.rs".into(),
23///     co_changes: 15,
24///     coupling_degree: 0.75,
25///     changes_a: 20,
26///     changes_b: 18,
27/// };
28/// assert!(pair.coupling_degree > 0.5);
29/// ```
30#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(rename_all = "camelCase")]
32pub struct CoupledPair {
33    /// First file in the pair (lexicographically smaller).
34    pub file_a: String,
35    /// Second file in the pair.
36    pub file_b: String,
37    /// Number of commits touching both files.
38    pub co_changes: u32,
39    /// `co_changes / max(changes_a, changes_b)`.
40    pub coupling_degree: f64,
41    /// Total commits touching file_a.
42    pub changes_a: u32,
43    /// Total commits touching file_b.
44    pub changes_b: u32,
45}
46
47/// Detect temporal coupling between files.
48///
49/// Returns coupled pairs sorted by `coupling_degree` descending.
50/// Only returns pairs where `coupling_degree >= min_coupling`
51/// and `co_changes >= min_co_changes`.
52///
53/// # Errors
54///
55/// Returns [`ArgusError`] on processing failure.
56///
57/// # Examples
58///
59/// ```
60/// use argus_gitpulse::coupling::detect_coupling;
61/// use argus_gitpulse::mining::{CommitInfo, FileChange, ChangeStatus};
62///
63/// let commits = vec![
64///     CommitInfo {
65///         hash: "abc".into(),
66///         author: "alice".into(),
67///         email: "alice@example.com".into(),
68///         timestamp: 1000,
69///         message: "change".into(),
70///         files_changed: vec![
71///             FileChange { path: "a.rs".into(), lines_added: 5, lines_deleted: 0, status: ChangeStatus::Modified },
72///             FileChange { path: "b.rs".into(), lines_added: 3, lines_deleted: 0, status: ChangeStatus::Modified },
73///         ],
74///     },
75/// ];
76/// let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
77/// assert_eq!(pairs.len(), 1);
78/// ```
79pub fn detect_coupling(
80    commits: &[CommitInfo],
81    min_coupling: f64,
82    min_co_changes: u32,
83) -> Result<Vec<CoupledPair>, ArgusError> {
84    // Count per-file changes
85    let mut file_changes: HashMap<String, u32> = HashMap::new();
86    // Count co-changes for pairs (normalized key: lexicographic order)
87    let mut co_changes: HashMap<(String, String), u32> = HashMap::new();
88
89    for commit in commits {
90        let files: Vec<&str> = commit
91            .files_changed
92            .iter()
93            .map(|f| f.path.as_str())
94            .collect();
95        let unique_files: Vec<&str> = {
96            let mut seen = std::collections::HashSet::new();
97            let mut unique = Vec::new();
98            for f in &files {
99                if seen.insert(*f) {
100                    unique.push(*f);
101                }
102            }
103            unique
104        };
105
106        // Count individual file changes
107        for file in &unique_files {
108            *file_changes.entry((*file).to_string()).or_default() += 1;
109        }
110
111        // Count co-changes for every pair
112        for i in 0..unique_files.len() {
113            for j in (i + 1)..unique_files.len() {
114                let key = normalize_pair(unique_files[i], unique_files[j]);
115                *co_changes.entry(key).or_default() += 1;
116            }
117        }
118    }
119
120    // Build coupled pairs
121    let mut pairs = Vec::new();
122    for ((file_a, file_b), co_count) in &co_changes {
123        if *co_count < min_co_changes {
124            continue;
125        }
126
127        let changes_a = file_changes.get(file_a).copied().unwrap_or(0);
128        let changes_b = file_changes.get(file_b).copied().unwrap_or(0);
129        let max_changes = changes_a.max(changes_b);
130
131        if max_changes == 0 {
132            continue;
133        }
134
135        let coupling_degree = *co_count as f64 / max_changes as f64;
136
137        if coupling_degree < min_coupling {
138            continue;
139        }
140
141        pairs.push(CoupledPair {
142            file_a: file_a.clone(),
143            file_b: file_b.clone(),
144            co_changes: *co_count,
145            coupling_degree,
146            changes_a,
147            changes_b,
148        });
149    }
150
151    pairs.sort_by(|a, b| {
152        b.coupling_degree
153            .partial_cmp(&a.coupling_degree)
154            .unwrap_or(std::cmp::Ordering::Equal)
155    });
156
157    Ok(pairs)
158}
159
160fn normalize_pair(a: &str, b: &str) -> (String, String) {
161    if a <= b {
162        (a.to_string(), b.to_string())
163    } else {
164        (b.to_string(), a.to_string())
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171    use crate::mining::{ChangeStatus, FileChange};
172
173    fn make_commit(files: Vec<&str>) -> CommitInfo {
174        CommitInfo {
175            hash: "abc".into(),
176            author: "alice".into(),
177            email: "alice@example.com".into(),
178            timestamp: 1000,
179            message: "test".into(),
180            files_changed: files
181                .into_iter()
182                .map(|path| FileChange {
183                    path: path.into(),
184                    lines_added: 5,
185                    lines_deleted: 2,
186                    status: ChangeStatus::Modified,
187                })
188                .collect(),
189        }
190    }
191
192    #[test]
193    fn files_always_changed_together_have_coupling_1() {
194        let commits = vec![
195            make_commit(vec!["a.rs", "b.rs"]),
196            make_commit(vec!["a.rs", "b.rs"]),
197            make_commit(vec!["a.rs", "b.rs"]),
198        ];
199
200        let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
201        assert_eq!(pairs.len(), 1);
202        assert!((pairs[0].coupling_degree - 1.0).abs() < f64::EPSILON);
203        assert_eq!(pairs[0].co_changes, 3);
204    }
205
206    #[test]
207    fn files_never_changed_together_not_in_results() {
208        let commits = vec![make_commit(vec!["a.rs"]), make_commit(vec!["b.rs"])];
209
210        let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
211        assert!(pairs.is_empty());
212    }
213
214    #[test]
215    fn min_coupling_filter_works() {
216        let commits = vec![
217            make_commit(vec!["a.rs", "b.rs"]),
218            make_commit(vec!["a.rs"]),
219            make_commit(vec!["a.rs"]),
220        ];
221
222        // coupling = 1/3 = 0.33
223        let pairs_low = detect_coupling(&commits, 0.3, 1).unwrap();
224        assert_eq!(pairs_low.len(), 1);
225
226        let pairs_high = detect_coupling(&commits, 0.5, 1).unwrap();
227        assert!(pairs_high.is_empty());
228    }
229
230    #[test]
231    fn min_co_changes_filter_works() {
232        let commits = vec![make_commit(vec!["a.rs", "b.rs"])];
233
234        let pairs = detect_coupling(&commits, 0.0, 2).unwrap();
235        assert!(pairs.is_empty(), "need at least 2 co-changes");
236
237        let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
238        assert_eq!(pairs.len(), 1);
239    }
240
241    #[test]
242    fn pair_normalization_treats_ab_same_as_ba() {
243        // Both orderings should produce the same result
244        let commits = vec![
245            make_commit(vec!["z.rs", "a.rs"]),
246            make_commit(vec!["a.rs", "z.rs"]),
247        ];
248
249        let pairs = detect_coupling(&commits, 0.0, 1).unwrap();
250        assert_eq!(pairs.len(), 1);
251        // Should be normalized: a.rs < z.rs
252        assert_eq!(pairs[0].file_a, "a.rs");
253        assert_eq!(pairs[0].file_b, "z.rs");
254        assert_eq!(pairs[0].co_changes, 2);
255    }
256}