gitoxide_core/hours/
mod.rs

1use std::{collections::BTreeSet, io, path::Path, time::Instant};
2
3use anyhow::bail;
4use gix::{
5    actor,
6    bstr::{BStr, ByteSlice},
7    prelude::*,
8    progress, Count, NestedProgress, Progress,
9};
10
11/// Additional configuration for the hours estimation functionality.
12pub struct Context<W> {
13    /// Ignore github bots which match the `[bot]` search string.
14    pub ignore_bots: bool,
15    /// Show personally identifiable information before the summary. Includes names and email addresses.
16    pub show_pii: bool,
17    /// Collect how many files have been added, removed and modified (without rename tracking).
18    pub file_stats: bool,
19    /// Collect how many lines in files have been added, removed and modified (without rename tracking).
20    pub line_stats: bool,
21    /// The amount of threads to use. If unset, use all cores, if 0 use al physical cores.
22    pub threads: Option<usize>,
23    /// Omit unifying identities by name and email which can lead to the same author appear multiple times
24    /// due to using different names or email addresses.
25    pub omit_unify_identities: bool,
26    /// Where to write our output to
27    pub out: W,
28}
29
30/// Estimate the hours it takes to produce the content of the repository in `_working_dir_`, with `_refname_` for
31/// the start of the commit graph traversal.
32///
33/// * `_working_dir_` - The directory containing a '.git/' folder.
34/// * `_refname_` - The name of the ref like 'main' or 'master' at which to start iterating the commit graph.
35/// * `_progress_` - A way to provide progress and performance information
36pub fn estimate<W, P>(
37    working_dir: &Path,
38    rev_spec: &BStr,
39    mut progress: P,
40    Context {
41        show_pii,
42        ignore_bots,
43        file_stats,
44        line_stats,
45        omit_unify_identities,
46        threads,
47        mut out,
48    }: Context<W>,
49) -> anyhow::Result<()>
50where
51    W: io::Write,
52    P: NestedProgress,
53{
54    let repo = gix::discover(working_dir)?;
55    let commit_id = repo.rev_parse_single(rev_spec)?.detach();
56    let mut string_heap = BTreeSet::<&'static [u8]>::new();
57    let needs_stats = file_stats || line_stats;
58    let threads = gix::features::parallel::num_threads(threads);
59
60    let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
61        std::thread::scope(|scope| -> anyhow::Result<_> {
62            let start = Instant::now();
63            let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec<u8>)>();
64            let mailmap = repo.open_mailmap();
65
66            let extract_signatures = scope.spawn(move || -> anyhow::Result<Vec<_>> {
67                let mut out = Vec::new();
68                for (commit_idx, commit_data) in rx {
69                    if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data)
70                        .author()
71                        .map(|author| mailmap.resolve_cow(author.trim()))
72                    {
73                        let mut string_ref = |s: &[u8]| -> &'static BStr {
74                            match string_heap.get(s) {
75                                Some(n) => n.as_bstr(),
76                                None => {
77                                    let sv: Vec<u8> = s.to_owned();
78                                    string_heap.insert(Box::leak(sv.into_boxed_slice()));
79                                    (*string_heap.get(s).expect("present")).as_ref()
80                                }
81                            }
82                        };
83                        let name = string_ref(author.name.as_ref());
84                        let email = string_ref(author.email.as_ref());
85
86                        out.push((
87                            commit_idx,
88                            actor::SignatureRef {
89                                name,
90                                email,
91                                time: author.time,
92                            },
93                        ));
94                    }
95                }
96                out.shrink_to_fit();
97                out.sort_by(|a, b| {
98                    a.1.email
99                        .cmp(b.1.email)
100                        .then(a.1.time.seconds.cmp(&b.1.time.seconds).reverse())
101                });
102                Ok(out)
103            });
104
105            let (stats_progresses, stats_counters) = needs_stats
106                .then(|| {
107                    let mut sp = progress.add_child("extract stats");
108                    sp.init(None, progress::count("commits"));
109                    let sc = sp.counter();
110
111                    let mut cp = progress.add_child("find changes");
112                    cp.init(None, progress::count("modified files"));
113                    let cc = cp.counter();
114
115                    let mut lp = progress.add_child("find changes");
116                    lp.init(None, progress::count("diff lines"));
117                    let lc = lp.counter();
118
119                    (Some((sp, cp, lp)), Some((sc, cc, lc)))
120                })
121                .unwrap_or_default();
122
123            let mut progress = progress.add_child("traverse commit graph");
124            progress.init(None, progress::count("commits"));
125
126            let (tx_tree_id, stat_threads) = needs_stats
127                .then(|| {
128                    let (tx, threads) = spawn_tree_delta_threads(
129                        scope,
130                        threads,
131                        line_stats,
132                        repo.clone(),
133                        stats_counters.clone().expect("counters are set"),
134                    );
135                    (Some(tx), threads)
136                })
137                .unwrap_or_default();
138
139            let mut commit_idx = 0_u32;
140            let mut skipped_merge_commits = 0;
141            const CHUNK_SIZE: usize = 50;
142            let mut chunk = Vec::with_capacity(CHUNK_SIZE);
143            let mut commit_iter = commit_id.ancestors(&repo.objects);
144            let mut is_shallow = false;
145            while let Some(c) = commit_iter.next() {
146                progress.inc();
147                if gix::interrupt::is_triggered() {
148                    bail!("Cancelled by user");
149                }
150                match c {
151                    Ok(c) => {
152                        tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok();
153                        let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| {
154                            let mut parents = c.parent_ids.into_iter();
155                            parents
156                                .next()
157                                .map(|first_parent| (tx, Some(first_parent), c.id.to_owned()))
158                                .filter(|_| {
159                                    if parents.next().is_some() {
160                                        skipped_merge_commits += 1;
161                                        false
162                                    } else {
163                                        true
164                                    }
165                                })
166                        });
167                        if let Some((tx_tree, first_parent, commit)) = tree_delta_info {
168                            if chunk.len() == CHUNK_SIZE {
169                                tx_tree
170                                    .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
171                                    .ok();
172                            } else {
173                                chunk.push((commit_idx, first_parent, commit));
174                            }
175                        }
176                        commit_idx += 1;
177                    }
178                    Err(gix::traverse::commit::simple::Error::Find { .. }) => {
179                        is_shallow = true;
180                        break;
181                    }
182                    Err(err) => return Err(err.into()),
183                };
184            }
185            if let Some(tx) = tx_tree_id {
186                tx.send(chunk).ok();
187            }
188            drop(tx);
189            progress.show_throughput(start);
190            drop(progress);
191
192            let stats_by_commit_idx = match stats_progresses {
193                Some((mut stat_progress, change_progress, line_progress)) => {
194                    stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits));
195                    let mut stats = Vec::new();
196                    for handle in stat_threads {
197                        stats.extend(handle.join().expect("no panic")?);
198                        if gix::interrupt::is_triggered() {
199                            bail!("Cancelled by user");
200                        }
201                    }
202                    stats.sort_by_key(|t| t.0);
203                    stat_progress.show_throughput(start);
204                    change_progress.show_throughput(start);
205                    line_progress.show_throughput(start);
206                    stats
207                }
208                None => Vec::new(),
209            };
210
211            Ok((
212                extract_signatures.join().expect("no panic")?,
213                stats_by_commit_idx,
214                is_shallow,
215                skipped_merge_commits,
216            ))
217        })?
218    };
219
220    if commit_authors.is_empty() {
221        bail!("No commits to process");
222    }
223
224    let start = Instant::now();
225    let mut current_email = &commit_authors[0].1.email;
226    let mut slice_start = 0;
227    let mut results_by_hours = Vec::new();
228    let mut ignored_bot_commits = 0_u32;
229    for (idx, (_, elm)) in commit_authors.iter().enumerate() {
230        if elm.email != *current_email {
231            let estimate = estimate_hours(&commit_authors[slice_start..idx], &stats);
232            slice_start = idx;
233            current_email = &elm.email;
234            if ignore_bots && estimate.name.contains_str(b"[bot]") {
235                ignored_bot_commits += estimate.num_commits;
236                continue;
237            }
238            results_by_hours.push(estimate);
239        }
240    }
241    if let Some(commits) = commit_authors.get(slice_start..) {
242        results_by_hours.push(estimate_hours(commits, &stats));
243    }
244
245    let num_authors = results_by_hours.len();
246    let mut results_by_hours = if !omit_unify_identities {
247        deduplicate_identities(&results_by_hours)
248    } else {
249        results_by_hours
250            .iter()
251            .fold(Vec::with_capacity(results_by_hours.len()), |mut acc, e| {
252                acc.push(e.into());
253                acc
254            })
255    };
256    let elapsed = start.elapsed();
257    progress.done(format!(
258        "Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
259        commit_authors.len(),
260        elapsed,
261        commit_authors.len() as f32 / elapsed.as_secs_f32()
262    ));
263
264    let num_unique_authors = results_by_hours.len();
265    let (total_hours, total_commits, total_files, total_lines) = results_by_hours
266        .iter()
267        .map(|e| (e.hours, e.num_commits, e.files, e.lines))
268        .reduce(|a, b| (a.0 + b.0, a.1 + b.1, a.2.clone().added(&b.2), a.3.clone().added(&b.3)))
269        .expect("at least one commit at this point");
270    if show_pii {
271        results_by_hours.sort_by(|a, b| a.hours.partial_cmp(&b.hours).unwrap_or(std::cmp::Ordering::Equal));
272        for entry in &results_by_hours {
273            entry.write_to(
274                total_hours,
275                file_stats.then_some(total_files),
276                line_stats.then_some(total_lines),
277                &mut out,
278            )?;
279            writeln!(out)?;
280        }
281    }
282    writeln!(
283        out,
284        "total hours: {:.02}\ntotal 8h days: {:.02}\ntotal commits = {}{}\ntotal authors: {}",
285        total_hours,
286        total_hours / HOURS_PER_WORKDAY,
287        total_commits,
288        is_shallow.then_some(" (shallow)").unwrap_or_default(),
289        num_authors
290    )?;
291    if file_stats {
292        writeln!(
293            out,
294            "total files added/removed/modified/remaining: {}/{}/{}/{}",
295            total_files.added,
296            total_files.removed,
297            total_files.modified,
298            total_files.added - total_files.removed
299        )?;
300    }
301    if line_stats {
302        writeln!(
303            out,
304            "total lines added/removed/remaining: {}/{}/{}",
305            total_lines.added,
306            total_lines.removed,
307            total_lines.added - total_lines.removed
308        )?;
309    }
310    if !omit_unify_identities {
311        writeln!(
312            out,
313            "total unique authors: {} ({:.02}% duplication)",
314            num_unique_authors,
315            (1.0 - (num_unique_authors as f32 / num_authors as f32)) * 100.0
316        )?;
317    }
318    if ignored_bot_commits != 0 {
319        writeln!(out, "commits by bots: {ignored_bot_commits}")?;
320    }
321    if needs_stats && skipped_merge_commits != 0 {
322        writeln!(out, "stats omitted for {skipped_merge_commits} merge commits")?;
323    }
324    assert_eq!(
325        total_commits,
326        commit_authors.len() as u32 - ignored_bot_commits,
327        "need to get all commits"
328    );
329    Ok(())
330}
331
332mod core;
333use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY};
334
335mod util;
336use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson};
337
338use crate::hours::core::spawn_tree_delta_threads;