1use self::metrics::GitMetrics;
2use self::sig::Sig;
3use crate::cli::MyRegex;
4use anyhow::Result;
5use gix::bstr::BString;
6use gix::bstr::ByteSlice;
7use gix::diff::Options;
8use gix::diff::tree_with_rewrites::Change;
9use gix::prelude::ObjectIdExt;
10use gix::revision::walk::Sorting;
11use gix::traverse::commit::simple::CommitTimeOrder;
12use gix::{Commit, ObjectId};
13use std::collections::HashMap;
14use std::sync::Arc;
15use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
16use std::sync::mpsc::{Sender, channel};
17use std::thread::JoinHandle;
18
19pub mod metrics;
20pub mod sig;
21
22pub fn traverse_commit_graph(
23 repo: &gix::Repository,
24 no_bots: Option<MyRegex>,
25 churn_pool_size: Option<usize>,
26 no_merges: bool,
27) -> Result<GitMetrics> {
28 let mut time_of_most_recent_commit = None;
29 let mut time_of_first_commit = None;
30 let mut number_of_commits_by_signature: HashMap<Sig, usize> = HashMap::new();
31 let mailmap = repo.open_mailmap();
32 let is_traversal_complete = Arc::new(AtomicBool::default());
33 let total_number_of_commits = Arc::new(AtomicUsize::default());
34
35 let commit_graph = repo.commit_graph().ok();
36 let can_use_commit_graph = commit_graph.is_some();
37
38 let commit_iter = repo
39 .head_commit()?
40 .id()
41 .ancestors()
42 .sorting(Sorting::ByCommitTime(CommitTimeOrder::NewestFirst))
43 .use_commit_graph(can_use_commit_graph)
44 .with_commit_graph(commit_graph)
45 .all()?;
46
47 let (churn_thread, churn_tx) = get_churn_channel(
50 repo,
51 &mailmap,
52 no_bots.clone(),
53 &is_traversal_complete,
54 &total_number_of_commits,
55 churn_pool_size,
56 );
57
58 let mut count = 0;
59 for commit in commit_iter {
60 let commit = commit?;
61 {
62 if no_merges && commit.parent_ids.len() > 1 {
63 continue;
64 }
65
66 update_signature_counts(
67 &commit.object()?,
68 &mailmap,
69 no_bots.as_ref(),
70 &mut number_of_commits_by_signature,
71 )?;
72
73 churn_tx.send(commit.id)?;
74
75 let commit_time = gix::date::Time::new(
76 commit
77 .commit_time
78 .expect("sorting by time yields this field as part of traversal"),
79 0,
80 );
81 time_of_most_recent_commit.get_or_insert(commit_time);
82 time_of_first_commit = commit_time.into();
83
84 count += 1;
85 }
86 }
87
88 total_number_of_commits.store(count, Ordering::SeqCst);
89 is_traversal_complete.store(true, Ordering::SeqCst);
90
91 drop(churn_tx);
92
93 let (number_of_commits_by_file_path, churn_pool_size) =
94 churn_thread.join().expect("never panics")?;
95
96 let git_metrics = GitMetrics::new(
97 number_of_commits_by_signature,
98 number_of_commits_by_file_path,
99 churn_pool_size,
100 time_of_first_commit,
101 time_of_most_recent_commit,
102 );
103
104 Ok(git_metrics)
105}
106
107type NumberOfCommitsByFilepath = HashMap<BString, usize>;
108type ChurnPair = (NumberOfCommitsByFilepath, usize);
109
110fn get_churn_channel(
111 repo: &gix::Repository,
112 mailmap: &gix::mailmap::Snapshot,
113 bot_regex_pattern: Option<MyRegex>,
114 is_traversal_complete: &Arc<AtomicBool>,
115 total_number_of_commits: &Arc<AtomicUsize>,
116 churn_pool_size: Option<usize>,
117) -> (JoinHandle<Result<ChurnPair>>, Sender<ObjectId>) {
118 let (tx, rx) = channel::<gix::hash::ObjectId>();
119 let thread = std::thread::spawn({
120 let repo = repo.clone();
121 let mailmap = mailmap.clone();
122 let bot_regex_pattern = bot_regex_pattern.clone();
123 let is_traversal_complete = is_traversal_complete.clone();
124 let total_number_of_commits = total_number_of_commits.clone();
125 move || -> Result<_> {
126 let mut number_of_commits_by_file_path = NumberOfCommitsByFilepath::new();
127 let mut diffs_computed = 0;
128 while let Ok(commit_id) = rx.recv() {
129 let commit = repo.find_object(commit_id)?.into_commit();
130 if is_bot_commit(&commit, &mailmap, bot_regex_pattern.as_ref())? {
131 continue;
132 }
133 compute_diff_with_parent(&mut number_of_commits_by_file_path, &commit, &repo)?;
134 diffs_computed += 1;
135 if should_break(
136 is_traversal_complete.load(Ordering::Relaxed),
137 total_number_of_commits.load(Ordering::Relaxed),
138 churn_pool_size,
139 diffs_computed,
140 ) {
141 break;
142 }
143 }
144
145 Ok((number_of_commits_by_file_path, diffs_computed))
146 }
147 });
148
149 (thread, tx)
150}
151
152fn should_break(
153 is_traversal_complete: bool,
154 total_number_of_commits: usize,
155 churn_pool_size_opt: Option<usize>,
156 diffs_computed: usize,
157) -> bool {
158 if !is_traversal_complete {
159 return false;
160 }
161
162 churn_pool_size_opt.is_none_or(|churn_pool_size| {
163 diffs_computed >= churn_pool_size.min(total_number_of_commits)
164 })
165}
166
167fn update_signature_counts(
168 commit: &gix::Commit,
169 mailmap: &gix::mailmap::Snapshot,
170 bot_regex_pattern: Option<&MyRegex>,
171 number_of_commits_by_signature: &mut HashMap<Sig, usize>,
172) -> Result<()> {
173 let sig = mailmap.resolve(commit.author()?);
174 if !is_bot(&sig.name, bot_regex_pattern) {
175 *number_of_commits_by_signature
176 .entry(sig.into())
177 .or_insert(0) += 1;
178 }
179 Ok(())
180}
181
182fn compute_diff_with_parent(
183 change_map: &mut HashMap<BString, usize>,
184 commit: &Commit,
185 repo: &gix::Repository,
186) -> Result<()> {
187 let mut parents = commit.parent_ids();
188 let parents = (
189 parents
190 .next()
191 .and_then(|parent_id| parent_id.object().ok()?.into_commit().tree_id().ok())
192 .unwrap_or_else(|| gix::hash::ObjectId::empty_tree(repo.object_hash()).attach(repo)),
193 parents.next(),
194 );
195
196 if let (parent_tree_id, None) = parents {
197 let old_tree = parent_tree_id.object()?.into_tree();
198 let new_tree = commit.tree()?;
199 let changes =
200 repo.diff_tree_to_tree(&old_tree, &new_tree, Options::default().with_rewrites(None))?;
201 for change in &changes {
202 let is_file_change = match change {
203 Change::Addition { entry_mode, .. } | Change::Modification { entry_mode, .. } => {
204 entry_mode.is_blob()
205 }
206 Change::Deletion { .. } | Change::Rewrite { .. } => false,
207 };
208 if is_file_change {
209 let path = change.location();
210 *change_map.entry(path.to_owned()).or_insert(0) += 1;
211 }
212 }
213 }
214
215 Ok(())
216}
217
218fn is_bot_commit(
219 commit: &Commit,
220 mailmap: &gix::mailmap::Snapshot,
221 bot_regex_pattern: Option<&MyRegex>,
222) -> Result<bool> {
223 if bot_regex_pattern.is_some() {
224 let sig = mailmap.resolve(commit.author()?);
225 Ok(is_bot(&sig.name, bot_regex_pattern))
226 } else {
227 Ok(false)
228 }
229}
230
231fn is_bot(author_name: &BString, bot_regex_pattern: Option<&MyRegex>) -> bool {
232 bot_regex_pattern.is_some_and(|regex| regex.0.is_match(author_name.to_str_lossy().as_ref()))
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238 use crate::cli::NO_BOTS_DEFAULT_REGEX_PATTERN;
239 use rstest::rstest;
240 use std::str::FromStr;
241
242 #[rstest]
243 #[case("John Doe", false)]
244 #[case("dependabot[bot]", true)]
245 #[case("foo bot", true)]
246 #[case("foo-bot", true)]
247 #[case("bot", false)]
248 fn test_is_bot(#[case] author_name: &str, #[case] expected: bool) -> Result<()> {
249 let from_str = MyRegex::from_str(NO_BOTS_DEFAULT_REGEX_PATTERN);
250 let no_bots: Option<MyRegex> = Some(from_str?);
251 assert_eq!(is_bot(&author_name.into(), no_bots.as_ref()), expected);
252 Ok(())
253 }
254
255 #[rstest]
256 #[case(false, 10, Some(5), 5, false)]
257 #[case(true, 10, Some(5), 5, true)]
258 #[case(true, 10, Some(8), 5, false)]
259 #[case(true, 10, Some(20), 10, true)]
260 #[case(true, 10, None, 5, true)]
261 fn test_should_break(
262 #[case] has_commit_graph_traversal_ended: bool,
263 #[case] total_number_of_commits: usize,
264 #[case] churn_pool_size_opt: Option<usize>,
265 #[case] number_of_diffs_computed: usize,
266 #[case] expected: bool,
267 ) {
268 let result = should_break(
269 has_commit_graph_traversal_ended,
270 total_number_of_commits,
271 churn_pool_size_opt,
272 number_of_diffs_computed,
273 );
274
275 assert_eq!(result, expected);
276 }
277}