1use std::collections::{HashMap, HashSet};
7use std::path::Path;
8
9use argus_core::ArgusError;
10use serde::{Deserialize, Serialize};
11
12use crate::mining::CommitInfo;
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
34#[serde(rename_all = "camelCase")]
35pub struct Hotspot {
36 pub path: String,
38 pub revisions: u32,
40 pub total_churn: u64,
42 pub relative_churn: f64,
44 pub current_loc: u64,
46 pub score: f64,
48 pub last_modified: i64,
50 pub authors: u32,
52}
53
54pub fn detect_hotspots(
77 repo_path: &Path,
78 commits: &[CommitInfo],
79) -> Result<Vec<Hotspot>, ArgusError> {
80 if commits.is_empty() {
81 return Ok(Vec::new());
82 }
83
84 let mut revisions: HashMap<String, u32> = HashMap::new();
86 let mut churn: HashMap<String, u64> = HashMap::new();
87 let mut authors: HashMap<String, HashSet<String>> = HashMap::new();
88 let mut last_modified: HashMap<String, i64> = HashMap::new();
89
90 for commit in commits {
91 for file in &commit.files_changed {
92 *revisions.entry(file.path.clone()).or_default() += 1;
93 *churn.entry(file.path.clone()).or_default() += file.lines_added + file.lines_deleted;
94 authors
95 .entry(file.path.clone())
96 .or_default()
97 .insert(commit.author.clone());
98 let entry = last_modified.entry(file.path.clone()).or_insert(0);
99 if commit.timestamp > *entry {
100 *entry = commit.timestamp;
101 }
102 }
103 }
104
105 let mut hotspots = Vec::new();
107 for (path, rev_count) in &revisions {
108 let full_path = repo_path.join(path);
109 let Some(loc) = count_lines(&full_path) else {
110 continue;
111 };
112
113 let total_churn = churn.get(path).copied().unwrap_or(0);
114 let relative_churn = if loc > 0 {
115 total_churn as f64 / loc as f64
116 } else {
117 0.0
118 };
119 let author_count = authors.get(path).map_or(0, |s| s.len() as u32);
120 let last_mod = last_modified.get(path).copied().unwrap_or(0);
121
122 hotspots.push(Hotspot {
123 path: path.clone(),
124 revisions: *rev_count,
125 total_churn,
126 relative_churn,
127 current_loc: loc,
128 score: 0.0, last_modified: last_mod,
130 authors: author_count,
131 });
132 }
133
134 if hotspots.is_empty() {
135 return Ok(hotspots);
136 }
137
138 let max_revisions = hotspots.iter().map(|h| h.revisions).max().unwrap_or(1) as f64;
140 let max_relative_churn = hotspots
141 .iter()
142 .map(|h| h.relative_churn)
143 .fold(0.0f64, f64::max)
144 .max(1.0);
145 let max_loc = hotspots.iter().map(|h| h.current_loc).max().unwrap_or(1) as f64;
146
147 for hotspot in &mut hotspots {
148 let norm_revisions = hotspot.revisions as f64 / max_revisions;
149 let norm_churn = hotspot.relative_churn / max_relative_churn;
150 let norm_loc = hotspot.current_loc as f64 / max_loc;
151
152 hotspot.score = norm_revisions * 0.5 + norm_churn * 0.3 + norm_loc * 0.2;
153 }
154
155 hotspots.sort_by(|a, b| {
156 b.score
157 .partial_cmp(&a.score)
158 .unwrap_or(std::cmp::Ordering::Equal)
159 });
160
161 Ok(hotspots)
162}
163
164fn count_lines(path: &Path) -> Option<u64> {
165 let content = std::fs::read_to_string(path).ok()?;
166 Some(content.lines().count() as u64)
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172 use crate::mining::{ChangeStatus, FileChange};
173 use std::path::PathBuf;
174
175 fn make_commit(author: &str, timestamp: i64, files: Vec<(&str, u64, u64)>) -> CommitInfo {
176 CommitInfo {
177 hash: format!("hash_{timestamp}"),
178 author: author.into(),
179 email: format!("{author}@example.com"),
180 timestamp,
181 message: "test commit".into(),
182 files_changed: files
183 .into_iter()
184 .map(|(path, added, deleted)| FileChange {
185 path: path.into(),
186 lines_added: added,
187 lines_deleted: deleted,
188 status: ChangeStatus::Modified,
189 })
190 .collect(),
191 }
192 }
193
194 #[test]
195 fn high_churn_file_gets_high_score() {
196 let repo_path = find_repo_root().unwrap();
198 let commits = vec![
199 make_commit("alice", 1000, vec![("src/main.rs", 100, 50)]),
200 make_commit("alice", 2000, vec![("src/main.rs", 80, 40)]),
201 make_commit("alice", 3000, vec![("src/main.rs", 60, 30)]),
202 make_commit("bob", 4000, vec![("Cargo.toml", 1, 0)]),
203 ];
204
205 let hotspots = detect_hotspots(&repo_path, &commits).unwrap();
206 assert!(!hotspots.is_empty());
207
208 let main_spot = hotspots.iter().find(|h| h.path == "src/main.rs");
210 let cargo_spot = hotspots.iter().find(|h| h.path == "Cargo.toml");
211
212 if let (Some(main_h), Some(cargo_h)) = (main_spot, cargo_spot) {
213 assert!(
214 main_h.score > cargo_h.score,
215 "main.rs ({:.4}) should score higher than Cargo.toml ({:.4})",
216 main_h.score,
217 cargo_h.score,
218 );
219 }
220 }
221
222 #[test]
223 fn deleted_files_are_excluded() {
224 let repo_path = find_repo_root().unwrap();
225 let commits = vec![make_commit(
226 "alice",
227 1000,
228 vec![("nonexistent_file_xyz.rs", 100, 50)],
229 )];
230
231 let hotspots = detect_hotspots(&repo_path, &commits).unwrap();
232 let found = hotspots.iter().any(|h| h.path == "nonexistent_file_xyz.rs");
233 assert!(!found, "nonexistent files should be excluded");
234 }
235
236 #[test]
237 fn scores_are_in_valid_range() {
238 let repo_path = find_repo_root().unwrap();
239 let commits = vec![
240 make_commit("alice", 1000, vec![("src/main.rs", 50, 20)]),
241 make_commit("bob", 2000, vec![("Cargo.toml", 5, 2)]),
242 ];
243
244 let hotspots = detect_hotspots(&repo_path, &commits).unwrap();
245 for h in &hotspots {
246 assert!(
247 h.score >= 0.0 && h.score <= 1.0,
248 "score {} is out of range for {}",
249 h.score,
250 h.path,
251 );
252 }
253 }
254
255 #[test]
256 fn empty_commits_dont_crash() {
257 let repo_path = find_repo_root().unwrap();
258 let hotspots = detect_hotspots(&repo_path, &[]).unwrap();
259 assert!(hotspots.is_empty());
260 }
261
262 fn find_repo_root() -> Option<PathBuf> {
263 let mut path = std::env::current_dir().ok()?;
264 loop {
265 if path.join(".git").exists() {
266 return Some(path);
267 }
268 if !path.pop() {
269 return None;
270 }
271 }
272 }
273}