gitoxide_core/hours/
mod.rs

1use std::{collections::BTreeSet, io, path::Path, time::Instant};
2
3use anyhow::bail;
4use gix::{
5    bstr::{BStr, ByteSlice},
6    prelude::*,
7    progress, Count, NestedProgress, Progress,
8};
9
10/// Additional configuration for the hours estimation functionality.
11pub struct Context<W> {
12    /// Ignore github bots which match the `[bot]` search string.
13    pub ignore_bots: bool,
14    /// Show personally identifiable information before the summary. Includes names and email addresses.
15    pub show_pii: bool,
16    /// Collect how many files have been added, removed and modified (without rename tracking).
17    pub file_stats: bool,
18    /// Collect how many lines in files have been added, removed and modified (without rename tracking).
19    pub line_stats: bool,
20    /// The number of threads to use. If unset, use all cores, if 0 use all physical cores.
21    pub threads: Option<usize>,
22    /// Omit unifying identities by name and email which can lead to the same author appear multiple times
23    /// due to using different names or email addresses.
24    pub omit_unify_identities: bool,
25    /// Where to write our output to
26    pub out: W,
27}
28
29pub struct SignatureRef<'a> {
30    name: &'a BStr,
31    email: &'a BStr,
32    time: gix::date::Time,
33}
34
35impl SignatureRef<'_> {
36    fn seconds(&self) -> gix::date::SecondsSinceUnixEpoch {
37        self.time.seconds
38    }
39}
40
41/// Estimate the hours it takes to produce the content of the repository in `_working_dir_`, with `_refname_` for
42/// the start of the commit graph traversal.
43///
44/// * `_working_dir_` - The directory containing a '.git/' folder.
45/// * `_refname_` - The name of the ref like 'main' or 'master' at which to start iterating the commit graph.
46/// * `_progress_` - A way to provide progress and performance information
47pub fn estimate<W, P>(
48    working_dir: &Path,
49    rev_spec: &BStr,
50    mut progress: P,
51    Context {
52        show_pii,
53        ignore_bots,
54        file_stats,
55        line_stats,
56        omit_unify_identities,
57        threads,
58        mut out,
59    }: Context<W>,
60) -> anyhow::Result<()>
61where
62    W: io::Write,
63    P: NestedProgress,
64{
65    let repo = gix::discover(working_dir)?;
66    let commit_id = repo.rev_parse_single(rev_spec)?.detach();
67    let mut string_heap = BTreeSet::<&'static [u8]>::new();
68    let needs_stats = file_stats || line_stats;
69    let threads = gix::features::parallel::num_threads(threads);
70
71    let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
72        std::thread::scope(|scope| -> anyhow::Result<_> {
73            let start = Instant::now();
74            let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec<u8>)>();
75            let mailmap = repo.open_mailmap();
76
77            let extract_signatures = scope.spawn(move || -> anyhow::Result<Vec<_>> {
78                let mut out = Vec::new();
79                for (commit_idx, commit_data) in rx {
80                    if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data)
81                        .author()
82                        .map(|author| mailmap.resolve_cow(author.trim()))
83                    {
84                        let mut string_ref = |s: &[u8]| -> &'static BStr {
85                            match string_heap.get(s) {
86                                Some(n) => n.as_bstr(),
87                                None => {
88                                    let sv: Vec<u8> = s.to_owned();
89                                    string_heap.insert(Box::leak(sv.into_boxed_slice()));
90                                    (*string_heap.get(s).expect("present")).as_ref()
91                                }
92                            }
93                        };
94                        let name = string_ref(author.name.as_ref());
95                        let email = string_ref(author.email.as_ref());
96
97                        out.push((
98                            commit_idx,
99                            SignatureRef {
100                                name,
101                                email,
102                                time: author.time,
103                            },
104                        ));
105                    }
106                }
107                out.shrink_to_fit();
108                out.sort_by(|a, b| {
109                    a.1.email
110                        .cmp(b.1.email)
111                        .then(a.1.seconds().cmp(&b.1.seconds()).reverse())
112                });
113                Ok(out)
114            });
115
116            let (stats_progresses, stats_counters) = if needs_stats {
117                {
118                    let mut sp = progress.add_child("extract stats");
119                    sp.init(None, progress::count("commits"));
120                    let sc = sp.counter();
121
122                    let mut cp = progress.add_child("find changes");
123                    cp.init(None, progress::count("modified files"));
124                    let cc = cp.counter();
125
126                    let mut lp = progress.add_child("find changes");
127                    lp.init(None, progress::count("diff lines"));
128                    let lc = lp.counter();
129
130                    (Some((sp, cp, lp)), Some((sc, cc, lc)))
131                }
132            } else {
133                Default::default()
134            };
135
136            let mut progress = progress.add_child("traverse commit graph");
137            progress.init(None, progress::count("commits"));
138
139            let (tx_tree_id, stat_threads) = if needs_stats {
140                {
141                    let (tx, threads) = spawn_tree_delta_threads(
142                        scope,
143                        threads,
144                        line_stats,
145                        repo.clone(),
146                        stats_counters.clone().expect("counters are set"),
147                    );
148                    (Some(tx), threads)
149                }
150            } else {
151                Default::default()
152            };
153
154            let mut commit_idx = 0_u32;
155            let mut skipped_merge_commits = 0;
156            const CHUNK_SIZE: usize = 50;
157            let mut chunk = Vec::with_capacity(CHUNK_SIZE);
158            let mut commit_iter = commit_id.ancestors(&repo.objects);
159            let mut is_shallow = false;
160            while let Some(c) = commit_iter.next() {
161                progress.inc();
162                if gix::interrupt::is_triggered() {
163                    bail!("Cancelled by user");
164                }
165                match c {
166                    Ok(c) => {
167                        tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok();
168                        let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| {
169                            let mut parents = c.parent_ids.into_iter();
170                            parents
171                                .next()
172                                .map(|first_parent| (tx, Some(first_parent), c.id.to_owned()))
173                                .filter(|_| {
174                                    if parents.next().is_some() {
175                                        skipped_merge_commits += 1;
176                                        false
177                                    } else {
178                                        true
179                                    }
180                                })
181                        });
182                        if let Some((tx_tree, first_parent, commit)) = tree_delta_info {
183                            if chunk.len() == CHUNK_SIZE {
184                                tx_tree
185                                    .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
186                                    .ok();
187                            } else {
188                                chunk.push((commit_idx, first_parent, commit));
189                            }
190                        }
191                        commit_idx += 1;
192                    }
193                    Err(gix::traverse::commit::simple::Error::Find { .. }) => {
194                        is_shallow = true;
195                        break;
196                    }
197                    Err(err) => return Err(err.into()),
198                }
199            }
200            if let Some(tx) = tx_tree_id {
201                tx.send(chunk).ok();
202            }
203            drop(tx);
204            progress.show_throughput(start);
205            drop(progress);
206
207            let stats_by_commit_idx = match stats_progresses {
208                Some((mut stat_progress, change_progress, line_progress)) => {
209                    stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits));
210                    let mut stats = Vec::new();
211                    for handle in stat_threads {
212                        stats.extend(handle.join().expect("no panic")?);
213                        if gix::interrupt::is_triggered() {
214                            bail!("Cancelled by user");
215                        }
216                    }
217                    stats.sort_by_key(|t| t.0);
218                    stat_progress.show_throughput(start);
219                    change_progress.show_throughput(start);
220                    line_progress.show_throughput(start);
221                    stats
222                }
223                None => Vec::new(),
224            };
225
226            Ok((
227                extract_signatures.join().expect("no panic")?,
228                stats_by_commit_idx,
229                is_shallow,
230                skipped_merge_commits,
231            ))
232        })?
233    };
234
235    if commit_authors.is_empty() {
236        bail!("No commits to process");
237    }
238
239    let start = Instant::now();
240    let mut current_email = &commit_authors[0].1.email;
241    let mut slice_start = 0;
242    let mut results_by_hours = Vec::new();
243    let mut ignored_bot_commits = 0_u32;
244    for (idx, (_, elm)) in commit_authors.iter().enumerate() {
245        if elm.email != *current_email {
246            let estimate = estimate_hours(&commit_authors[slice_start..idx], &stats);
247            slice_start = idx;
248            current_email = &elm.email;
249            if ignore_bots && estimate.name.contains_str(b"[bot]") {
250                ignored_bot_commits += estimate.num_commits;
251                continue;
252            }
253            results_by_hours.push(estimate);
254        }
255    }
256    if let Some(commits) = commit_authors.get(slice_start..) {
257        results_by_hours.push(estimate_hours(commits, &stats));
258    }
259
260    let num_authors = results_by_hours.len();
261    let mut results_by_hours = if !omit_unify_identities {
262        deduplicate_identities(&results_by_hours)
263    } else {
264        results_by_hours
265            .iter()
266            .fold(Vec::with_capacity(results_by_hours.len()), |mut acc, e| {
267                acc.push(e.into());
268                acc
269            })
270    };
271    let elapsed = start.elapsed();
272    progress.done(format!(
273        "Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
274        commit_authors.len(),
275        elapsed,
276        commit_authors.len() as f32 / elapsed.as_secs_f32()
277    ));
278
279    let num_unique_authors = results_by_hours.len();
280    let (total_hours, total_commits, total_files, total_lines) = results_by_hours
281        .iter()
282        .map(|e| (e.hours, e.num_commits, e.files, e.lines))
283        .reduce(|a, b| (a.0 + b.0, a.1 + b.1, a.2.clone().added(&b.2), a.3.clone().added(&b.3)))
284        .expect("at least one commit at this point");
285    if show_pii {
286        results_by_hours.sort_by(|a, b| a.hours.partial_cmp(&b.hours).unwrap_or(std::cmp::Ordering::Equal));
287        for entry in &results_by_hours {
288            entry.write_to(
289                total_hours,
290                file_stats.then_some(total_files),
291                line_stats.then_some(total_lines),
292                &mut out,
293            )?;
294            writeln!(out)?;
295        }
296    }
297    writeln!(
298        out,
299        "total hours: {:.02}\ntotal 8h days: {:.02}\ntotal commits = {}{}\ntotal authors: {}",
300        total_hours,
301        total_hours / HOURS_PER_WORKDAY,
302        total_commits,
303        if is_shallow { " (shallow)" } else { Default::default() },
304        num_authors
305    )?;
306    if file_stats {
307        writeln!(
308            out,
309            "total files added/removed/modified/remaining: {}/{}/{}/{}",
310            total_files.added,
311            total_files.removed,
312            total_files.modified,
313            total_files.added - total_files.removed
314        )?;
315    }
316    if line_stats {
317        writeln!(
318            out,
319            "total lines added/removed/remaining: {}/{}/{}",
320            total_lines.added,
321            total_lines.removed,
322            total_lines.added - total_lines.removed
323        )?;
324    }
325    if !omit_unify_identities {
326        writeln!(
327            out,
328            "total unique authors: {} ({:.02}% duplication)",
329            num_unique_authors,
330            (1.0 - (num_unique_authors as f32 / num_authors as f32)) * 100.0
331        )?;
332    }
333    if ignored_bot_commits != 0 {
334        writeln!(out, "commits by bots: {ignored_bot_commits}")?;
335    }
336    if needs_stats && skipped_merge_commits != 0 {
337        writeln!(out, "stats omitted for {skipped_merge_commits} merge commits")?;
338    }
339    assert_eq!(
340        total_commits,
341        commit_authors.len() as u32 - ignored_bot_commits,
342        "need to get all commits"
343    );
344    Ok(())
345}
346
347mod core;
348use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY};
349
350mod util;
351use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson};
352
353use crate::hours::core::spawn_tree_delta_threads;