Skip to main content

argus_gitpulse/
hotspots.rs

1//! Churn × complexity hotspot detection.
2//!
3//! Identifies files with high churn and high complexity (LoC) that are
4//! likely sources of bugs, following the Tornhill methodology.
5
6use std::collections::{HashMap, HashSet};
7use std::path::Path;
8
9use argus_core::ArgusError;
10use serde::{Deserialize, Serialize};
11
12use crate::mining::CommitInfo;
13
14/// A hotspot — a file with high churn and high complexity.
15///
16/// # Examples
17///
18/// ```
19/// use argus_gitpulse::hotspots::Hotspot;
20///
21/// let h = Hotspot {
22///     path: "src/main.rs".into(),
23///     revisions: 10,
24///     total_churn: 500,
25///     relative_churn: 2.5,
26///     current_loc: 200,
27///     score: 0.85,
28///     last_modified: 1700000000,
29///     authors: 3,
30/// };
31/// assert!(h.score > 0.0 && h.score <= 1.0);
32/// ```
33#[derive(Debug, Clone, Serialize, Deserialize)]
34#[serde(rename_all = "camelCase")]
35pub struct Hotspot {
36    /// File path relative to repo root.
37    pub path: String,
38    /// Number of commits touching this file.
39    pub revisions: u32,
40    /// Total lines added + deleted across all commits.
41    pub total_churn: u64,
42    /// `total_churn / current_loc`.
43    pub relative_churn: f64,
44    /// Current lines of code in the file.
45    pub current_loc: u64,
46    /// Normalized hotspot score (0.0–1.0).
47    pub score: f64,
48    /// Unix timestamp of most recent change.
49    pub last_modified: i64,
50    /// Number of distinct authors.
51    pub authors: u32,
52}
53
54/// Detect hotspots from commit history.
55///
56/// Returns hotspots sorted by score descending. Only includes files
57/// that still exist on disk at `repo_path`.
58///
59/// # Errors
60///
61/// Returns [`ArgusError::Git`] on filesystem errors.
62///
63/// # Examples
64///
65/// ```no_run
66/// use std::path::Path;
67/// use argus_gitpulse::hotspots::detect_hotspots;
68/// use argus_gitpulse::mining::{mine_history, MiningOptions};
69///
70/// let commits = mine_history(Path::new("."), &MiningOptions::default()).unwrap();
71/// let hotspots = detect_hotspots(Path::new("."), &commits).unwrap();
72/// for h in hotspots.iter().take(5) {
73///     println!("{}: score={:.2}, revisions={}", h.path, h.score, h.revisions);
74/// }
75/// ```
76pub fn detect_hotspots(
77    repo_path: &Path,
78    commits: &[CommitInfo],
79) -> Result<Vec<Hotspot>, ArgusError> {
80    if commits.is_empty() {
81        return Ok(Vec::new());
82    }
83
84    // Accumulate per-file stats
85    let mut revisions: HashMap<String, u32> = HashMap::new();
86    let mut churn: HashMap<String, u64> = HashMap::new();
87    let mut authors: HashMap<String, HashSet<String>> = HashMap::new();
88    let mut last_modified: HashMap<String, i64> = HashMap::new();
89
90    for commit in commits {
91        for file in &commit.files_changed {
92            *revisions.entry(file.path.clone()).or_default() += 1;
93            *churn.entry(file.path.clone()).or_default() += file.lines_added + file.lines_deleted;
94            authors
95                .entry(file.path.clone())
96                .or_default()
97                .insert(commit.author.clone());
98            let entry = last_modified.entry(file.path.clone()).or_insert(0);
99            if commit.timestamp > *entry {
100                *entry = commit.timestamp;
101            }
102        }
103    }
104
105    // Build hotspots, only for files that exist on disk
106    let mut hotspots = Vec::new();
107    for (path, rev_count) in &revisions {
108        let full_path = repo_path.join(path);
109        let Some(loc) = count_lines(&full_path) else {
110            continue;
111        };
112
113        let total_churn = churn.get(path).copied().unwrap_or(0);
114        let relative_churn = if loc > 0 {
115            total_churn as f64 / loc as f64
116        } else {
117            0.0
118        };
119        let author_count = authors.get(path).map_or(0, |s| s.len() as u32);
120        let last_mod = last_modified.get(path).copied().unwrap_or(0);
121
122        hotspots.push(Hotspot {
123            path: path.clone(),
124            revisions: *rev_count,
125            total_churn,
126            relative_churn,
127            current_loc: loc,
128            score: 0.0, // computed below
129            last_modified: last_mod,
130            authors: author_count,
131        });
132    }
133
134    if hotspots.is_empty() {
135        return Ok(hotspots);
136    }
137
138    // Normalize and compute scores
139    let max_revisions = hotspots.iter().map(|h| h.revisions).max().unwrap_or(1) as f64;
140    let max_relative_churn = hotspots
141        .iter()
142        .map(|h| h.relative_churn)
143        .fold(0.0f64, f64::max)
144        .max(1.0);
145    let max_loc = hotspots.iter().map(|h| h.current_loc).max().unwrap_or(1) as f64;
146
147    for hotspot in &mut hotspots {
148        let norm_revisions = hotspot.revisions as f64 / max_revisions;
149        let norm_churn = hotspot.relative_churn / max_relative_churn;
150        let norm_loc = hotspot.current_loc as f64 / max_loc;
151
152        hotspot.score = norm_revisions * 0.5 + norm_churn * 0.3 + norm_loc * 0.2;
153    }
154
155    hotspots.sort_by(|a, b| {
156        b.score
157            .partial_cmp(&a.score)
158            .unwrap_or(std::cmp::Ordering::Equal)
159    });
160
161    Ok(hotspots)
162}
163
164fn count_lines(path: &Path) -> Option<u64> {
165    let content = std::fs::read_to_string(path).ok()?;
166    Some(content.lines().count() as u64)
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use crate::mining::{ChangeStatus, FileChange};
173    use std::path::PathBuf;
174
175    fn make_commit(author: &str, timestamp: i64, files: Vec<(&str, u64, u64)>) -> CommitInfo {
176        CommitInfo {
177            hash: format!("hash_{timestamp}"),
178            author: author.into(),
179            email: format!("{author}@example.com"),
180            timestamp,
181            message: "test commit".into(),
182            files_changed: files
183                .into_iter()
184                .map(|(path, added, deleted)| FileChange {
185                    path: path.into(),
186                    lines_added: added,
187                    lines_deleted: deleted,
188                    status: ChangeStatus::Modified,
189                })
190                .collect(),
191        }
192    }
193
194    #[test]
195    fn high_churn_file_gets_high_score() {
196        // Use real files in the repo
197        let repo_path = find_repo_root().unwrap();
198        let commits = vec![
199            make_commit("alice", 1000, vec![("src/main.rs", 100, 50)]),
200            make_commit("alice", 2000, vec![("src/main.rs", 80, 40)]),
201            make_commit("alice", 3000, vec![("src/main.rs", 60, 30)]),
202            make_commit("bob", 4000, vec![("Cargo.toml", 1, 0)]),
203        ];
204
205        let hotspots = detect_hotspots(&repo_path, &commits).unwrap();
206        assert!(!hotspots.is_empty());
207
208        // main.rs should score higher than Cargo.toml
209        let main_spot = hotspots.iter().find(|h| h.path == "src/main.rs");
210        let cargo_spot = hotspots.iter().find(|h| h.path == "Cargo.toml");
211
212        if let (Some(main_h), Some(cargo_h)) = (main_spot, cargo_spot) {
213            assert!(
214                main_h.score > cargo_h.score,
215                "main.rs ({:.4}) should score higher than Cargo.toml ({:.4})",
216                main_h.score,
217                cargo_h.score,
218            );
219        }
220    }
221
222    #[test]
223    fn deleted_files_are_excluded() {
224        let repo_path = find_repo_root().unwrap();
225        let commits = vec![make_commit(
226            "alice",
227            1000,
228            vec![("nonexistent_file_xyz.rs", 100, 50)],
229        )];
230
231        let hotspots = detect_hotspots(&repo_path, &commits).unwrap();
232        let found = hotspots.iter().any(|h| h.path == "nonexistent_file_xyz.rs");
233        assert!(!found, "nonexistent files should be excluded");
234    }
235
236    #[test]
237    fn scores_are_in_valid_range() {
238        let repo_path = find_repo_root().unwrap();
239        let commits = vec![
240            make_commit("alice", 1000, vec![("src/main.rs", 50, 20)]),
241            make_commit("bob", 2000, vec![("Cargo.toml", 5, 2)]),
242        ];
243
244        let hotspots = detect_hotspots(&repo_path, &commits).unwrap();
245        for h in &hotspots {
246            assert!(
247                h.score >= 0.0 && h.score <= 1.0,
248                "score {} is out of range for {}",
249                h.score,
250                h.path,
251            );
252        }
253    }
254
255    #[test]
256    fn empty_commits_dont_crash() {
257        let repo_path = find_repo_root().unwrap();
258        let hotspots = detect_hotspots(&repo_path, &[]).unwrap();
259        assert!(hotspots.is_empty());
260    }
261
262    fn find_repo_root() -> Option<PathBuf> {
263        let mut path = std::env::current_dir().ok()?;
264        loop {
265            if path.join(".git").exists() {
266                return Some(path);
267            }
268            if !path.pop() {
269                return None;
270            }
271        }
272    }
273}