1use std::collections::HashMap;
2use std::path::Path;
3
4use git2::{Repository, Sort};
5
6type OwnerList = Vec<(String, String, f64)>;
7type CoChangeList = Vec<(String, String, f64)>;
8
9pub struct GitAnalysis {
10 pub file_churn: HashMap<String, f64>,
11 pub file_owners: HashMap<String, OwnerList>,
12 pub co_changes: CoChangeList,
13}
14
15pub fn analyze_repo(repo_path: &Path, file_paths: &[String]) -> anyhow::Result<GitAnalysis> {
16 let repo = Repository::open(repo_path)?;
17
18 let (file_churn, co_changes) = compute_churn_and_co_changes(&repo)?;
19 let file_owners = compute_blame(&repo, file_paths)?;
20
21 Ok(GitAnalysis {
22 file_churn,
23 file_owners,
24 co_changes,
25 })
26}
27
28fn compute_churn_and_co_changes(
29 repo: &Repository,
30) -> anyhow::Result<(HashMap<String, f64>, CoChangeList)> {
31 let churn_cutoff = chrono::Utc::now().timestamp() - 90 * 86400;
33 let co_change_cutoff = chrono::Utc::now().timestamp() - 365 * 86400;
34
35 let mut commit_counts: HashMap<String, u32> = HashMap::new();
36 let mut pair_counts: HashMap<(String, String), u32> = HashMap::new();
37 let mut max_churn: u32 = 0;
38 let mut max_co: u32 = 0;
39
40 let mut revwalk = repo.revwalk()?;
41 revwalk.push_head()?;
42 revwalk.set_sorting(Sort::TIME)?;
43
44 for oid_result in revwalk {
45 let oid = match oid_result {
46 Ok(o) => o,
47 Err(_) => continue,
48 };
49 let commit = match repo.find_commit(oid) {
50 Ok(c) => c,
51 Err(_) => continue,
52 };
53
54 let commit_ts = commit.time().seconds();
55 if commit_ts < co_change_cutoff {
56 break;
57 }
58
59 let commit_tree = match commit.tree() {
60 Ok(t) => t,
61 Err(_) => continue,
62 };
63
64 let mut parent_tree = None;
65 if let Ok(parent) = commit.parent(0) {
66 if let Ok(tree) = parent.tree() {
67 parent_tree = Some(tree);
68 }
69 }
70
71 let diff =
72 repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
73
74 let mut changed_files: Vec<String> = Vec::new();
75
76 diff.foreach(
77 &mut |delta, _| {
78 if let Some(path) = delta.new_file().path() {
79 if let Some(s) = path.to_str() {
80 changed_files.push(s.to_string());
81 }
82 }
83 true
84 },
85 None,
86 None,
87 None,
88 )?;
89
90 changed_files.sort();
91 changed_files.dedup();
92
93 if commit_ts >= churn_cutoff {
95 for file in &changed_files {
96 let count = commit_counts.entry(file.clone()).or_insert(0);
97 *count += 1;
98 if *count > max_churn {
99 max_churn = *count;
100 }
101 }
102 }
103
104 for i in 0..changed_files.len() {
105 for j in (i + 1)..changed_files.len() {
106 let pair = (changed_files[i].clone(), changed_files[j].clone());
107 let count = pair_counts.entry(pair).or_insert(0);
108 *count += 1;
109 if *count > max_co {
110 max_co = *count;
111 }
112 }
113 }
114 }
115
116 let mut churn_map = HashMap::new();
117 if max_churn > 0 {
118 for (file, count) in commit_counts {
119 churn_map.insert(file, count as f64 / max_churn as f64);
120 }
121 }
122
123 let mut co_results: CoChangeList = Vec::new();
124 let min_co_count = 2u32;
125 if max_co > 0 {
126 for ((a, b), count) in pair_counts {
127 if count >= min_co_count {
128 co_results.push((a, b, count as f64 / max_co as f64));
129 }
130 }
131 }
132
133 Ok((churn_map, co_results))
134}
135
136fn compute_blame(
137 repo: &Repository,
138 file_paths: &[String],
139) -> anyhow::Result<HashMap<String, OwnerList>> {
140 let mut owners: HashMap<String, OwnerList> = HashMap::new();
141
142 for file_path in file_paths {
143 let blame = match repo.blame_file(std::path::Path::new(file_path), None) {
144 Ok(b) => b,
145 Err(e) => {
146 tracing::debug!("blame failed for {}: {}", file_path, e);
147 continue;
148 }
149 };
150
151 let mut author_lines: HashMap<String, (String, u32)> = HashMap::new();
152 let mut total_lines: u32 = 0;
153
154 for hunk in blame.iter() {
155 let sig = hunk.final_signature();
156 let name = sig.name().unwrap_or("unknown").to_string();
157 let email = sig.email().unwrap_or("unknown").to_string();
158 let lines = hunk.lines_in_hunk() as u32;
159
160 let key = email.clone();
161 let entry = author_lines.entry(key).or_insert((name, 0));
162 entry.1 += lines;
163 total_lines += lines;
164 }
165
166 if total_lines > 0 {
167 let mut file_owners: Vec<(String, String, f64)> = author_lines
168 .into_iter()
169 .map(|(email, (name, lines))| {
170 (name, email, lines as f64 / total_lines as f64)
171 })
172 .collect();
173 file_owners
174 .sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
175 file_owners.truncate(3);
176 owners.insert(file_path.clone(), file_owners);
177 }
178 }
179
180 Ok(owners)
181}