Skip to main content

onefetch/info/git/
mod.rs

1use self::metrics::GitMetrics;
2use self::sig::Sig;
3use crate::cli::MyRegex;
4use anyhow::Result;
5use gix::bstr::BString;
6use gix::bstr::ByteSlice;
7use gix::diff::Options;
8use gix::diff::tree_with_rewrites::Change;
9use gix::prelude::ObjectIdExt;
10use gix::revision::walk::Sorting;
11use gix::traverse::commit::simple::CommitTimeOrder;
12use gix::{Commit, ObjectId};
13use std::collections::HashMap;
14use std::sync::Arc;
15use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
16use std::sync::mpsc::{Sender, channel};
17use std::thread::JoinHandle;
18
19pub mod metrics;
20pub mod sig;
21
22pub fn traverse_commit_graph(
23    repo: &gix::Repository,
24    no_bots: Option<MyRegex>,
25    churn_pool_size: Option<usize>,
26    no_merges: bool,
27) -> Result<GitMetrics> {
28    let mut time_of_most_recent_commit = None;
29    let mut time_of_first_commit = None;
30    let mut number_of_commits_by_signature: HashMap<Sig, usize> = HashMap::new();
31    let mailmap = repo.open_mailmap();
32    let is_traversal_complete = Arc::new(AtomicBool::default());
33    let total_number_of_commits = Arc::new(AtomicUsize::default());
34
35    let commit_graph = repo.commit_graph().ok();
36    let can_use_commit_graph = commit_graph.is_some();
37
38    let commit_iter = repo
39        .head_commit()?
40        .id()
41        .ancestors()
42        .sorting(Sorting::ByCommitTime(CommitTimeOrder::NewestFirst))
43        .use_commit_graph(can_use_commit_graph)
44        .with_commit_graph(commit_graph)
45        .all()?;
46
47    // Best-effort strategy for Churn computation: keep computing churn while traversal runs;
48    // it stops once traversal is done and churn_pool_size is reached (if provided).
49    let (churn_thread, churn_tx) = get_churn_channel(
50        repo,
51        &mailmap,
52        no_bots.clone(),
53        &is_traversal_complete,
54        &total_number_of_commits,
55        churn_pool_size,
56    );
57
58    let mut count = 0;
59    for commit in commit_iter {
60        let commit = commit?;
61        {
62            if no_merges && commit.parent_ids.len() > 1 {
63                continue;
64            }
65
66            update_signature_counts(
67                &commit.object()?,
68                &mailmap,
69                no_bots.as_ref(),
70                &mut number_of_commits_by_signature,
71            )?;
72
73            churn_tx.send(commit.id)?;
74
75            let commit_time = gix::date::Time::new(
76                commit
77                    .commit_time
78                    .expect("sorting by time yields this field as part of traversal"),
79                0,
80            );
81            time_of_most_recent_commit.get_or_insert(commit_time);
82            time_of_first_commit = commit_time.into();
83
84            count += 1;
85        }
86    }
87
88    total_number_of_commits.store(count, Ordering::SeqCst);
89    is_traversal_complete.store(true, Ordering::SeqCst);
90
91    drop(churn_tx);
92
93    let (number_of_commits_by_file_path, churn_pool_size) =
94        churn_thread.join().expect("never panics")?;
95
96    let git_metrics = GitMetrics::new(
97        number_of_commits_by_signature,
98        number_of_commits_by_file_path,
99        churn_pool_size,
100        time_of_first_commit,
101        time_of_most_recent_commit,
102    );
103
104    Ok(git_metrics)
105}
106
107type NumberOfCommitsByFilepath = HashMap<BString, usize>;
108type ChurnPair = (NumberOfCommitsByFilepath, usize);
109
110fn get_churn_channel(
111    repo: &gix::Repository,
112    mailmap: &gix::mailmap::Snapshot,
113    bot_regex_pattern: Option<MyRegex>,
114    is_traversal_complete: &Arc<AtomicBool>,
115    total_number_of_commits: &Arc<AtomicUsize>,
116    churn_pool_size: Option<usize>,
117) -> (JoinHandle<Result<ChurnPair>>, Sender<ObjectId>) {
118    let (tx, rx) = channel::<gix::hash::ObjectId>();
119    let thread = std::thread::spawn({
120        let repo = repo.clone();
121        let mailmap = mailmap.clone();
122        let bot_regex_pattern = bot_regex_pattern.clone();
123        let is_traversal_complete = is_traversal_complete.clone();
124        let total_number_of_commits = total_number_of_commits.clone();
125        move || -> Result<_> {
126            let mut number_of_commits_by_file_path = NumberOfCommitsByFilepath::new();
127            let mut diffs_computed = 0;
128            while let Ok(commit_id) = rx.recv() {
129                let commit = repo.find_object(commit_id)?.into_commit();
130                if is_bot_commit(&commit, &mailmap, bot_regex_pattern.as_ref())? {
131                    continue;
132                }
133                compute_diff_with_parent(&mut number_of_commits_by_file_path, &commit, &repo)?;
134                diffs_computed += 1;
135                if should_break(
136                    is_traversal_complete.load(Ordering::Relaxed),
137                    total_number_of_commits.load(Ordering::Relaxed),
138                    churn_pool_size,
139                    diffs_computed,
140                ) {
141                    break;
142                }
143            }
144
145            Ok((number_of_commits_by_file_path, diffs_computed))
146        }
147    });
148
149    (thread, tx)
150}
151
152fn should_break(
153    is_traversal_complete: bool,
154    total_number_of_commits: usize,
155    churn_pool_size_opt: Option<usize>,
156    diffs_computed: usize,
157) -> bool {
158    if !is_traversal_complete {
159        return false;
160    }
161
162    churn_pool_size_opt.is_none_or(|churn_pool_size| {
163        diffs_computed >= churn_pool_size.min(total_number_of_commits)
164    })
165}
166
167fn update_signature_counts(
168    commit: &gix::Commit,
169    mailmap: &gix::mailmap::Snapshot,
170    bot_regex_pattern: Option<&MyRegex>,
171    number_of_commits_by_signature: &mut HashMap<Sig, usize>,
172) -> Result<()> {
173    let sig = mailmap.resolve(commit.author()?);
174    if !is_bot(&sig.name, bot_regex_pattern) {
175        *number_of_commits_by_signature
176            .entry(sig.into())
177            .or_insert(0) += 1;
178    }
179    Ok(())
180}
181
182fn compute_diff_with_parent(
183    change_map: &mut HashMap<BString, usize>,
184    commit: &Commit,
185    repo: &gix::Repository,
186) -> Result<()> {
187    let mut parents = commit.parent_ids();
188    let parents = (
189        parents
190            .next()
191            .and_then(|parent_id| parent_id.object().ok()?.into_commit().tree_id().ok())
192            .unwrap_or_else(|| gix::hash::ObjectId::empty_tree(repo.object_hash()).attach(repo)),
193        parents.next(),
194    );
195
196    if let (parent_tree_id, None) = parents {
197        let old_tree = parent_tree_id.object()?.into_tree();
198        let new_tree = commit.tree()?;
199        let changes =
200            repo.diff_tree_to_tree(&old_tree, &new_tree, Options::default().with_rewrites(None))?;
201        for change in &changes {
202            let is_file_change = match change {
203                Change::Addition { entry_mode, .. } | Change::Modification { entry_mode, .. } => {
204                    entry_mode.is_blob()
205                }
206                Change::Deletion { .. } | Change::Rewrite { .. } => false,
207            };
208            if is_file_change {
209                let path = change.location();
210                *change_map.entry(path.to_owned()).or_insert(0) += 1;
211            }
212        }
213    }
214
215    Ok(())
216}
217
218fn is_bot_commit(
219    commit: &Commit,
220    mailmap: &gix::mailmap::Snapshot,
221    bot_regex_pattern: Option<&MyRegex>,
222) -> Result<bool> {
223    if bot_regex_pattern.is_some() {
224        let sig = mailmap.resolve(commit.author()?);
225        Ok(is_bot(&sig.name, bot_regex_pattern))
226    } else {
227        Ok(false)
228    }
229}
230
231fn is_bot(author_name: &BString, bot_regex_pattern: Option<&MyRegex>) -> bool {
232    bot_regex_pattern.is_some_and(|regex| regex.0.is_match(author_name.to_str_lossy().as_ref()))
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238    use crate::cli::NO_BOTS_DEFAULT_REGEX_PATTERN;
239    use rstest::rstest;
240    use std::str::FromStr;
241
242    #[rstest]
243    #[case("John Doe", false)]
244    #[case("dependabot[bot]", true)]
245    #[case("foo bot", true)]
246    #[case("foo-bot", true)]
247    #[case("bot", false)]
248    fn test_is_bot(#[case] author_name: &str, #[case] expected: bool) -> Result<()> {
249        let from_str = MyRegex::from_str(NO_BOTS_DEFAULT_REGEX_PATTERN);
250        let no_bots: Option<MyRegex> = Some(from_str?);
251        assert_eq!(is_bot(&author_name.into(), no_bots.as_ref()), expected);
252        Ok(())
253    }
254
255    #[rstest]
256    #[case(false, 10, Some(5), 5, false)]
257    #[case(true, 10, Some(5), 5, true)]
258    #[case(true, 10, Some(8), 5, false)]
259    #[case(true, 10, Some(20), 10, true)]
260    #[case(true, 10, None, 5, true)]
261    fn test_should_break(
262        #[case] has_commit_graph_traversal_ended: bool,
263        #[case] total_number_of_commits: usize,
264        #[case] churn_pool_size_opt: Option<usize>,
265        #[case] number_of_diffs_computed: usize,
266        #[case] expected: bool,
267    ) {
268        let result = should_break(
269            has_commit_graph_traversal_ended,
270            total_number_of_commits,
271            churn_pool_size_opt,
272            number_of_diffs_computed,
273        );
274
275        assert_eq!(result, expected);
276    }
277}