1use std::collections::HashMap;
8
9use argus_core::ArgusError;
10use serde::{Deserialize, Serialize};
11
12use crate::mining::CommitInfo;
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
32#[serde(rename_all = "camelCase")]
33pub struct FileOwnership {
34 pub path: String,
36 pub total_commits: u32,
38 pub authors: Vec<AuthorContribution>,
40 pub bus_factor: u32,
42 pub dominant_author_ratio: f64,
44 pub is_knowledge_silo: bool,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
64#[serde(rename_all = "camelCase")]
65pub struct AuthorContribution {
66 pub name: String,
68 pub email: String,
70 pub commits: u32,
72 pub ratio: f64,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
93#[serde(rename_all = "camelCase")]
94pub struct OwnershipSummary {
95 pub total_files: usize,
97 pub single_author_files: usize,
99 pub knowledge_silos: usize,
101 pub project_bus_factor: u32,
103 pub files: Vec<FileOwnership>,
105}
106
107pub fn analyze_ownership(commits: &[CommitInfo]) -> Result<OwnershipSummary, ArgusError> {
135 let mut file_authors: HashMap<String, HashMap<(String, String), u32>> = HashMap::new();
138
139 for commit in commits {
140 let author_key = (commit.author.clone(), commit.email.clone());
141 for file in &commit.files_changed {
142 *file_authors
143 .entry(file.path.clone())
144 .or_default()
145 .entry(author_key.clone())
146 .or_default() += 1;
147 }
148 }
149
150 let mut files = Vec::new();
151 let mut single_author_files = 0usize;
152 let mut knowledge_silos = 0usize;
153
154 for (path, author_map) in &file_authors {
155 let total_commits: u32 = author_map.values().sum();
156 if total_commits == 0 {
157 continue;
158 }
159
160 let mut author_contribs: Vec<AuthorContribution> = Vec::new();
161 let mut max_commits = 0u32;
162
163 for ((name, email), count) in author_map {
164 let ratio = *count as f64 / total_commits as f64;
165 if *count > max_commits {
166 max_commits = *count;
167 }
168 author_contribs.push(AuthorContribution {
169 name: name.clone(),
170 email: email.clone(),
171 commits: *count,
172 ratio,
173 });
174 }
175
176 author_contribs.sort_by(|a, b| b.commits.cmp(&a.commits));
178
179 let dominant_author_ratio = max_commits as f64 / total_commits as f64;
180 let bus_factor = author_contribs.iter().filter(|a| a.ratio > 0.10).count() as u32;
181 let is_silo = dominant_author_ratio > 0.80;
182
183 if author_contribs.len() == 1 {
184 single_author_files += 1;
185 }
186 if is_silo {
187 knowledge_silos += 1;
188 }
189
190 files.push(FileOwnership {
191 path: path.clone(),
192 total_commits,
193 authors: author_contribs,
194 bus_factor,
195 dominant_author_ratio,
196 is_knowledge_silo: is_silo,
197 });
198 }
199
200 files.sort_by(|a, b| {
202 b.dominant_author_ratio
203 .partial_cmp(&a.dominant_author_ratio)
204 .unwrap_or(std::cmp::Ordering::Equal)
205 });
206
207 let project_bus_factor = compute_project_bus_factor(&files);
208
209 Ok(OwnershipSummary {
210 total_files: files.len(),
211 single_author_files,
212 knowledge_silos,
213 project_bus_factor,
214 files,
215 })
216}
217
218fn compute_project_bus_factor(files: &[FileOwnership]) -> u32 {
223 if files.is_empty() {
224 return 0;
225 }
226
227 let mut all_authors: HashMap<String, u32> = HashMap::new();
229 for file in files {
230 for author in &file.authors {
231 *all_authors.entry(author.email.clone()).or_default() += 1;
232 }
233 }
234
235 let mut sorted_authors: Vec<(String, u32)> = all_authors.into_iter().collect();
237 sorted_authors.sort_by(|a, b| b.1.cmp(&a.1));
238
239 let total_files = files.len();
240 let threshold = total_files / 2;
241 let mut removed_authors: std::collections::HashSet<String> = std::collections::HashSet::new();
242 let mut removals = 0u32;
243
244 for (author_email, _) in &sorted_authors {
245 removed_authors.insert(author_email.clone());
246 removals += 1;
247
248 let mut orphaned = 0usize;
250 for file in files {
251 let has_significant_author = file
252 .authors
253 .iter()
254 .any(|a| a.ratio > 0.10 && !removed_authors.contains(&a.email));
255 if !has_significant_author {
256 orphaned += 1;
257 }
258 }
259
260 if orphaned > threshold {
261 return removals;
262 }
263 }
264
265 removals
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271 use crate::mining::{ChangeStatus, FileChange};
272
273 fn make_commit(author: &str, email: &str, files: Vec<&str>) -> CommitInfo {
274 CommitInfo {
275 hash: "abc".into(),
276 author: author.into(),
277 email: email.into(),
278 timestamp: 1000,
279 message: "test".into(),
280 files_changed: files
281 .into_iter()
282 .map(|path| FileChange {
283 path: path.into(),
284 lines_added: 5,
285 lines_deleted: 2,
286 status: ChangeStatus::Modified,
287 })
288 .collect(),
289 }
290 }
291
292 #[test]
293 fn single_author_file_is_knowledge_silo() {
294 let commits = vec![
295 make_commit("alice", "alice@example.com", vec!["main.rs"]),
296 make_commit("alice", "alice@example.com", vec!["main.rs"]),
297 make_commit("alice", "alice@example.com", vec!["main.rs"]),
298 ];
299
300 let summary = analyze_ownership(&commits).unwrap();
301 assert_eq!(summary.total_files, 1);
302 assert_eq!(summary.single_author_files, 1);
303 assert_eq!(summary.knowledge_silos, 1);
304
305 let file = &summary.files[0];
306 assert_eq!(file.bus_factor, 1);
307 assert!(file.is_knowledge_silo);
308 assert!((file.dominant_author_ratio - 1.0).abs() < f64::EPSILON);
309 }
310
311 #[test]
312 fn five_equal_authors_not_a_silo() {
313 let commits = vec![
314 make_commit("alice", "alice@e.com", vec!["main.rs"]),
315 make_commit("bob", "bob@e.com", vec!["main.rs"]),
316 make_commit("carol", "carol@e.com", vec!["main.rs"]),
317 make_commit("dave", "dave@e.com", vec!["main.rs"]),
318 make_commit("eve", "eve@e.com", vec!["main.rs"]),
319 ];
320
321 let summary = analyze_ownership(&commits).unwrap();
322 let file = &summary.files[0];
323 assert_eq!(file.bus_factor, 5);
324 assert!(!file.is_knowledge_silo);
325 assert!((file.dominant_author_ratio - 0.2).abs() < f64::EPSILON);
326 }
327
328 #[test]
329 fn dominant_author_ratio_calculation() {
330 let commits = vec![
331 make_commit("alice", "alice@e.com", vec!["main.rs"]),
332 make_commit("alice", "alice@e.com", vec!["main.rs"]),
333 make_commit("alice", "alice@e.com", vec!["main.rs"]),
334 make_commit("bob", "bob@e.com", vec!["main.rs"]),
335 ];
336
337 let summary = analyze_ownership(&commits).unwrap();
338 let file = &summary.files[0];
339 assert!((file.dominant_author_ratio - 0.75).abs() < f64::EPSILON);
341 assert!(!file.is_knowledge_silo); }
343
344 #[test]
345 fn project_bus_factor_calculation() {
346 let commits = vec![
349 make_commit("alice", "alice@e.com", vec!["file1.rs"]),
350 make_commit("bob", "bob@e.com", vec!["file2.rs"]),
351 make_commit("carol", "carol@e.com", vec!["file3.rs"]),
352 ];
353
354 let summary = analyze_ownership(&commits).unwrap();
355 assert_eq!(summary.project_bus_factor, 2);
357 }
358}