Skip to main content

argus_gitpulse/
ownership.rs

1//! Knowledge silo and bus factor analysis.
2//!
3//! Analyzes code ownership distribution across a project to identify
4//! knowledge silos (files dominated by a single author) and compute
5//! the project bus factor.
6
7use std::collections::HashMap;
8
9use argus_core::ArgusError;
10use serde::{Deserialize, Serialize};
11
12use crate::mining::CommitInfo;
13
14/// Ownership metrics for a single file.
15///
16/// # Examples
17///
18/// ```
19/// use argus_gitpulse::ownership::FileOwnership;
20///
21/// let ownership = FileOwnership {
22///     path: "src/main.rs".into(),
23///     total_commits: 20,
24///     authors: vec![],
25///     bus_factor: 3,
26///     dominant_author_ratio: 0.45,
27///     is_knowledge_silo: false,
28/// };
29/// assert!(!ownership.is_knowledge_silo);
30/// ```
31#[derive(Debug, Clone, Serialize, Deserialize)]
32#[serde(rename_all = "camelCase")]
33pub struct FileOwnership {
34    /// File path relative to repo root.
35    pub path: String,
36    /// Total commits touching this file.
37    pub total_commits: u32,
38    /// Per-author contribution breakdown.
39    pub authors: Vec<AuthorContribution>,
40    /// Number of authors with >10% contribution.
41    pub bus_factor: u32,
42    /// `max(author_commits) / total_commits`.
43    pub dominant_author_ratio: f64,
44    /// Whether `dominant_author_ratio > 0.80`.
45    pub is_knowledge_silo: bool,
46}
47
48/// Per-author contribution to a file.
49///
50/// # Examples
51///
52/// ```
53/// use argus_gitpulse::ownership::AuthorContribution;
54///
55/// let contrib = AuthorContribution {
56///     name: "alice".into(),
57///     email: "alice@example.com".into(),
58///     commits: 15,
59///     ratio: 0.75,
60/// };
61/// assert!(contrib.ratio > 0.5);
62/// ```
63#[derive(Debug, Clone, Serialize, Deserialize)]
64#[serde(rename_all = "camelCase")]
65pub struct AuthorContribution {
66    /// Author name.
67    pub name: String,
68    /// Author email.
69    pub email: String,
70    /// Number of commits by this author.
71    pub commits: u32,
72    /// `commits / total_commits` for this file.
73    pub ratio: f64,
74}
75
76/// Summary of knowledge distribution across the project.
77///
78/// # Examples
79///
80/// ```
81/// use argus_gitpulse::ownership::OwnershipSummary;
82///
83/// let summary = OwnershipSummary {
84///     total_files: 50,
85///     single_author_files: 10,
86///     knowledge_silos: 15,
87///     project_bus_factor: 2,
88///     files: vec![],
89/// };
90/// assert_eq!(summary.total_files, 50);
91/// ```
92#[derive(Debug, Clone, Serialize, Deserialize)]
93#[serde(rename_all = "camelCase")]
94pub struct OwnershipSummary {
95    /// Total files analyzed.
96    pub total_files: usize,
97    /// Files with only one author.
98    pub single_author_files: usize,
99    /// Files where one author has >80% of commits.
100    pub knowledge_silos: usize,
101    /// Minimum authors to remove to orphan >50% of files.
102    pub project_bus_factor: u32,
103    /// Per-file ownership data.
104    pub files: Vec<FileOwnership>,
105}
106
107/// Analyze code ownership and knowledge distribution.
108///
109/// # Errors
110///
111/// Returns [`ArgusError`] on processing failure.
112///
113/// # Examples
114///
115/// ```
116/// use argus_gitpulse::ownership::analyze_ownership;
117/// use argus_gitpulse::mining::{CommitInfo, FileChange, ChangeStatus};
118///
119/// let commits = vec![
120///     CommitInfo {
121///         hash: "abc".into(),
122///         author: "alice".into(),
123///         email: "alice@example.com".into(),
124///         timestamp: 1000,
125///         message: "init".into(),
126///         files_changed: vec![
127///             FileChange { path: "main.rs".into(), lines_added: 50, lines_deleted: 0, status: ChangeStatus::Added },
128///         ],
129///     },
130/// ];
131/// let summary = analyze_ownership(&commits).unwrap();
132/// assert_eq!(summary.total_files, 1);
133/// ```
134pub fn analyze_ownership(commits: &[CommitInfo]) -> Result<OwnershipSummary, ArgusError> {
135    // Accumulate per-file, per-author commit counts
136    // Key: file path, Value: map of (author_name, email) -> commit count
137    let mut file_authors: HashMap<String, HashMap<(String, String), u32>> = HashMap::new();
138
139    for commit in commits {
140        let author_key = (commit.author.clone(), commit.email.clone());
141        for file in &commit.files_changed {
142            *file_authors
143                .entry(file.path.clone())
144                .or_default()
145                .entry(author_key.clone())
146                .or_default() += 1;
147        }
148    }
149
150    let mut files = Vec::new();
151    let mut single_author_files = 0usize;
152    let mut knowledge_silos = 0usize;
153
154    for (path, author_map) in &file_authors {
155        let total_commits: u32 = author_map.values().sum();
156        if total_commits == 0 {
157            continue;
158        }
159
160        let mut author_contribs: Vec<AuthorContribution> = Vec::new();
161        let mut max_commits = 0u32;
162
163        for ((name, email), count) in author_map {
164            let ratio = *count as f64 / total_commits as f64;
165            if *count > max_commits {
166                max_commits = *count;
167            }
168            author_contribs.push(AuthorContribution {
169                name: name.clone(),
170                email: email.clone(),
171                commits: *count,
172                ratio,
173            });
174        }
175
176        // Sort authors by commits descending
177        author_contribs.sort_by(|a, b| b.commits.cmp(&a.commits));
178
179        let dominant_author_ratio = max_commits as f64 / total_commits as f64;
180        let bus_factor = author_contribs.iter().filter(|a| a.ratio > 0.10).count() as u32;
181        let is_silo = dominant_author_ratio > 0.80;
182
183        if author_contribs.len() == 1 {
184            single_author_files += 1;
185        }
186        if is_silo {
187            knowledge_silos += 1;
188        }
189
190        files.push(FileOwnership {
191            path: path.clone(),
192            total_commits,
193            authors: author_contribs,
194            bus_factor,
195            dominant_author_ratio,
196            is_knowledge_silo: is_silo,
197        });
198    }
199
200    // Sort by dominant_author_ratio descending (silos first)
201    files.sort_by(|a, b| {
202        b.dominant_author_ratio
203            .partial_cmp(&a.dominant_author_ratio)
204            .unwrap_or(std::cmp::Ordering::Equal)
205    });
206
207    let project_bus_factor = compute_project_bus_factor(&files);
208
209    Ok(OwnershipSummary {
210        total_files: files.len(),
211        single_author_files,
212        knowledge_silos,
213        project_bus_factor,
214        files,
215    })
216}
217
218/// Compute the project bus factor.
219///
220/// Iteratively remove the top contributor until >50% of files lose
221/// all "significant" authors (those with >10% ratio).
222fn compute_project_bus_factor(files: &[FileOwnership]) -> u32 {
223    if files.is_empty() {
224        return 0;
225    }
226
227    // Collect all unique authors across all files
228    let mut all_authors: HashMap<String, u32> = HashMap::new();
229    for file in files {
230        for author in &file.authors {
231            *all_authors.entry(author.email.clone()).or_default() += 1;
232        }
233    }
234
235    // Sort authors by number of files they contribute to (descending)
236    let mut sorted_authors: Vec<(String, u32)> = all_authors.into_iter().collect();
237    sorted_authors.sort_by(|a, b| b.1.cmp(&a.1));
238
239    let total_files = files.len();
240    let threshold = total_files / 2;
241    let mut removed_authors: std::collections::HashSet<String> = std::collections::HashSet::new();
242    let mut removals = 0u32;
243
244    for (author_email, _) in &sorted_authors {
245        removed_authors.insert(author_email.clone());
246        removals += 1;
247
248        // Count files that have lost all significant authors
249        let mut orphaned = 0usize;
250        for file in files {
251            let has_significant_author = file
252                .authors
253                .iter()
254                .any(|a| a.ratio > 0.10 && !removed_authors.contains(&a.email));
255            if !has_significant_author {
256                orphaned += 1;
257            }
258        }
259
260        if orphaned > threshold {
261            return removals;
262        }
263    }
264
265    removals
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use crate::mining::{ChangeStatus, FileChange};
272
273    fn make_commit(author: &str, email: &str, files: Vec<&str>) -> CommitInfo {
274        CommitInfo {
275            hash: "abc".into(),
276            author: author.into(),
277            email: email.into(),
278            timestamp: 1000,
279            message: "test".into(),
280            files_changed: files
281                .into_iter()
282                .map(|path| FileChange {
283                    path: path.into(),
284                    lines_added: 5,
285                    lines_deleted: 2,
286                    status: ChangeStatus::Modified,
287                })
288                .collect(),
289        }
290    }
291
292    #[test]
293    fn single_author_file_is_knowledge_silo() {
294        let commits = vec![
295            make_commit("alice", "alice@example.com", vec!["main.rs"]),
296            make_commit("alice", "alice@example.com", vec!["main.rs"]),
297            make_commit("alice", "alice@example.com", vec!["main.rs"]),
298        ];
299
300        let summary = analyze_ownership(&commits).unwrap();
301        assert_eq!(summary.total_files, 1);
302        assert_eq!(summary.single_author_files, 1);
303        assert_eq!(summary.knowledge_silos, 1);
304
305        let file = &summary.files[0];
306        assert_eq!(file.bus_factor, 1);
307        assert!(file.is_knowledge_silo);
308        assert!((file.dominant_author_ratio - 1.0).abs() < f64::EPSILON);
309    }
310
311    #[test]
312    fn five_equal_authors_not_a_silo() {
313        let commits = vec![
314            make_commit("alice", "alice@e.com", vec!["main.rs"]),
315            make_commit("bob", "bob@e.com", vec!["main.rs"]),
316            make_commit("carol", "carol@e.com", vec!["main.rs"]),
317            make_commit("dave", "dave@e.com", vec!["main.rs"]),
318            make_commit("eve", "eve@e.com", vec!["main.rs"]),
319        ];
320
321        let summary = analyze_ownership(&commits).unwrap();
322        let file = &summary.files[0];
323        assert_eq!(file.bus_factor, 5);
324        assert!(!file.is_knowledge_silo);
325        assert!((file.dominant_author_ratio - 0.2).abs() < f64::EPSILON);
326    }
327
328    #[test]
329    fn dominant_author_ratio_calculation() {
330        let commits = vec![
331            make_commit("alice", "alice@e.com", vec!["main.rs"]),
332            make_commit("alice", "alice@e.com", vec!["main.rs"]),
333            make_commit("alice", "alice@e.com", vec!["main.rs"]),
334            make_commit("bob", "bob@e.com", vec!["main.rs"]),
335        ];
336
337        let summary = analyze_ownership(&commits).unwrap();
338        let file = &summary.files[0];
339        // alice: 3/4 = 0.75
340        assert!((file.dominant_author_ratio - 0.75).abs() < f64::EPSILON);
341        assert!(!file.is_knowledge_silo); // 0.75 < 0.80
342    }
343
344    #[test]
345    fn project_bus_factor_calculation() {
346        // alice owns file1 exclusively, bob owns file2 exclusively,
347        // carol owns file3 exclusively
348        let commits = vec![
349            make_commit("alice", "alice@e.com", vec!["file1.rs"]),
350            make_commit("bob", "bob@e.com", vec!["file2.rs"]),
351            make_commit("carol", "carol@e.com", vec!["file3.rs"]),
352        ];
353
354        let summary = analyze_ownership(&commits).unwrap();
355        // Removing any 2 authors orphans >50% of files (2 out of 3)
356        assert_eq!(summary.project_bus_factor, 2);
357    }
358}