1use std::collections::HashMap;
2use std::path::Path;
3
4use git2::{Repository, Sort};
5
6type OwnerList = Vec<(String, String, f64)>;
7type CoChangeList = Vec<(String, String, f64)>;
8
9pub struct GitAnalysis {
11 pub file_churn: HashMap<String, f64>,
13 pub file_owners: HashMap<String, OwnerList>,
15 pub co_changes: CoChangeList,
17}
18
19pub fn analyze_repo(repo_path: &Path, file_paths: &[String]) -> anyhow::Result<GitAnalysis> {
25 let repo = Repository::open(repo_path)?;
26
27 let (file_churn, co_changes) = compute_churn_and_co_changes(&repo)?;
28 let file_owners = compute_blame(&repo, file_paths)?;
29
30 Ok(GitAnalysis {
31 file_churn,
32 file_owners,
33 co_changes,
34 })
35}
36
37fn compute_churn_and_co_changes(
38 repo: &Repository,
39) -> anyhow::Result<(HashMap<String, f64>, CoChangeList)> {
40 let churn_cutoff = chrono::Utc::now().timestamp() - 90 * 86400;
42 let co_change_cutoff = chrono::Utc::now().timestamp() - 365 * 86400;
43
44 let mut commit_counts: HashMap<String, u32> = HashMap::new();
45 let mut pair_counts: HashMap<(String, String), u32> = HashMap::new();
46 let mut max_churn: u32 = 0;
47 let mut max_co: u32 = 0;
48
49 let mut revwalk = repo.revwalk()?;
50 revwalk.push_head()?;
51 revwalk.set_sorting(Sort::TIME)?;
52
53 for oid_result in revwalk {
54 let oid = match oid_result {
55 Ok(o) => o,
56 Err(_) => continue,
57 };
58 let commit = match repo.find_commit(oid) {
59 Ok(c) => c,
60 Err(_) => continue,
61 };
62
63 let commit_ts = commit.time().seconds();
64 if commit_ts < co_change_cutoff {
65 break;
66 }
67
68 let commit_tree = match commit.tree() {
69 Ok(t) => t,
70 Err(_) => continue,
71 };
72
73 let mut parent_tree = None;
74 if let Ok(parent) = commit.parent(0) {
75 if let Ok(tree) = parent.tree() {
76 parent_tree = Some(tree);
77 }
78 }
79
80 let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
81
82 let mut changed_files: Vec<String> = Vec::new();
83
84 diff.foreach(
85 &mut |delta, _| {
86 if let Some(path) = delta.new_file().path() {
87 if let Some(s) = path.to_str() {
88 changed_files.push(s.to_string());
89 }
90 }
91 true
92 },
93 None,
94 None,
95 None,
96 )?;
97
98 changed_files.sort();
99 changed_files.dedup();
100
101 if commit_ts >= churn_cutoff {
103 for file in &changed_files {
104 let count = commit_counts.entry(file.clone()).or_insert(0);
105 *count += 1;
106 if *count > max_churn {
107 max_churn = *count;
108 }
109 }
110 }
111
112 for i in 0..changed_files.len() {
113 for j in (i + 1)..changed_files.len() {
114 let pair = (changed_files[i].clone(), changed_files[j].clone());
115 let count = pair_counts.entry(pair).or_insert(0);
116 *count += 1;
117 if *count > max_co {
118 max_co = *count;
119 }
120 }
121 }
122 }
123
124 let mut churn_map = HashMap::new();
125 if max_churn > 0 {
126 for (file, count) in commit_counts {
127 churn_map.insert(file, count as f64 / max_churn as f64);
128 }
129 }
130
131 let mut co_results: CoChangeList = Vec::new();
132 let min_co_count = 2u32;
133 if max_co > 0 {
134 for ((a, b), count) in pair_counts {
135 if count >= min_co_count {
136 co_results.push((a, b, count as f64 / max_co as f64));
137 }
138 }
139 }
140
141 Ok((churn_map, co_results))
142}
143
144fn compute_blame(
145 repo: &Repository,
146 file_paths: &[String],
147) -> anyhow::Result<HashMap<String, OwnerList>> {
148 let mut owners: HashMap<String, OwnerList> = HashMap::new();
149
150 for file_path in file_paths {
151 let blame = match repo.blame_file(std::path::Path::new(file_path), None) {
152 Ok(b) => b,
153 Err(e) => {
154 tracing::debug!("blame failed for {}: {}", file_path, e);
155 continue;
156 }
157 };
158
159 let mut author_lines: HashMap<String, (String, u32)> = HashMap::new();
160 let mut total_lines: u32 = 0;
161
162 for hunk in blame.iter() {
163 let sig = hunk.final_signature();
164 let name = sig.name().unwrap_or("unknown").to_string();
165 let email = sig.email().unwrap_or("unknown").to_string();
166 let lines = hunk.lines_in_hunk() as u32;
167
168 let key = email.clone();
169 let entry = author_lines.entry(key).or_insert((name, 0));
170 entry.1 += lines;
171 total_lines += lines;
172 }
173
174 if total_lines > 0 {
175 let mut file_owners: Vec<(String, String, f64)> = author_lines
176 .into_iter()
177 .map(|(email, (name, lines))| (name, email, lines as f64 / total_lines as f64))
178 .collect();
179 file_owners.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
180 file_owners.truncate(3);
181 owners.insert(file_path.clone(), file_owners);
182 }
183 }
184
185 Ok(owners)
186}