gitoxide_core/hours/
mod.rs

1use std::{collections::BTreeSet, io, path::Path, time::Instant};
2
3use anyhow::bail;
4use gix::{
5    bstr::{BStr, ByteSlice},
6    prelude::*,
7    progress, Count, NestedProgress, Progress,
8};
9
10/// Additional configuration for the hours estimation functionality.
11pub struct Context<W> {
12    /// Ignore github bots which match the `[bot]` search string.
13    pub ignore_bots: bool,
14    /// Show personally identifiable information before the summary. Includes names and email addresses.
15    pub show_pii: bool,
16    /// Collect how many files have been added, removed and modified (without rename tracking).
17    pub file_stats: bool,
18    /// Collect how many lines in files have been added, removed and modified (without rename tracking).
19    pub line_stats: bool,
20    /// The amount of threads to use. If unset, use all cores, if 0 use al physical cores.
21    pub threads: Option<usize>,
22    /// Omit unifying identities by name and email which can lead to the same author appear multiple times
23    /// due to using different names or email addresses.
24    pub omit_unify_identities: bool,
25    /// Where to write our output to
26    pub out: W,
27}
28
29pub struct SignatureRef<'a> {
30    name: &'a BStr,
31    email: &'a BStr,
32    time: gix::date::Time,
33}
34
35impl SignatureRef<'_> {
36    fn seconds(&self) -> gix::date::SecondsSinceUnixEpoch {
37        self.time.seconds
38    }
39}
40
41/// Estimate the hours it takes to produce the content of the repository in `_working_dir_`, with `_refname_` for
42/// the start of the commit graph traversal.
43///
44/// * `_working_dir_` - The directory containing a '.git/' folder.
45/// * `_refname_` - The name of the ref like 'main' or 'master' at which to start iterating the commit graph.
46/// * `_progress_` - A way to provide progress and performance information
47pub fn estimate<W, P>(
48    working_dir: &Path,
49    rev_spec: &BStr,
50    mut progress: P,
51    Context {
52        show_pii,
53        ignore_bots,
54        file_stats,
55        line_stats,
56        omit_unify_identities,
57        threads,
58        mut out,
59    }: Context<W>,
60) -> anyhow::Result<()>
61where
62    W: io::Write,
63    P: NestedProgress,
64{
65    let repo = gix::discover(working_dir)?;
66    let commit_id = repo.rev_parse_single(rev_spec)?.detach();
67    let mut string_heap = BTreeSet::<&'static [u8]>::new();
68    let needs_stats = file_stats || line_stats;
69    let threads = gix::features::parallel::num_threads(threads);
70
71    let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
72        std::thread::scope(|scope| -> anyhow::Result<_> {
73            let start = Instant::now();
74            let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec<u8>)>();
75            let mailmap = repo.open_mailmap();
76
77            let extract_signatures = scope.spawn(move || -> anyhow::Result<Vec<_>> {
78                let mut out = Vec::new();
79                for (commit_idx, commit_data) in rx {
80                    if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data)
81                        .author()
82                        .map(|author| mailmap.resolve_cow(author.trim()))
83                    {
84                        let mut string_ref = |s: &[u8]| -> &'static BStr {
85                            match string_heap.get(s) {
86                                Some(n) => n.as_bstr(),
87                                None => {
88                                    let sv: Vec<u8> = s.to_owned();
89                                    string_heap.insert(Box::leak(sv.into_boxed_slice()));
90                                    (*string_heap.get(s).expect("present")).as_ref()
91                                }
92                            }
93                        };
94                        let name = string_ref(author.name.as_ref());
95                        let email = string_ref(author.email.as_ref());
96
97                        out.push((
98                            commit_idx,
99                            SignatureRef {
100                                name,
101                                email,
102                                time: author.time,
103                            },
104                        ));
105                    }
106                }
107                out.shrink_to_fit();
108                out.sort_by(|a, b| {
109                    a.1.email
110                        .cmp(b.1.email)
111                        .then(a.1.seconds().cmp(&b.1.seconds()).reverse())
112                });
113                Ok(out)
114            });
115
116            let (stats_progresses, stats_counters) = needs_stats
117                .then(|| {
118                    let mut sp = progress.add_child("extract stats");
119                    sp.init(None, progress::count("commits"));
120                    let sc = sp.counter();
121
122                    let mut cp = progress.add_child("find changes");
123                    cp.init(None, progress::count("modified files"));
124                    let cc = cp.counter();
125
126                    let mut lp = progress.add_child("find changes");
127                    lp.init(None, progress::count("diff lines"));
128                    let lc = lp.counter();
129
130                    (Some((sp, cp, lp)), Some((sc, cc, lc)))
131                })
132                .unwrap_or_default();
133
134            let mut progress = progress.add_child("traverse commit graph");
135            progress.init(None, progress::count("commits"));
136
137            let (tx_tree_id, stat_threads) = needs_stats
138                .then(|| {
139                    let (tx, threads) = spawn_tree_delta_threads(
140                        scope,
141                        threads,
142                        line_stats,
143                        repo.clone(),
144                        stats_counters.clone().expect("counters are set"),
145                    );
146                    (Some(tx), threads)
147                })
148                .unwrap_or_default();
149
150            let mut commit_idx = 0_u32;
151            let mut skipped_merge_commits = 0;
152            const CHUNK_SIZE: usize = 50;
153            let mut chunk = Vec::with_capacity(CHUNK_SIZE);
154            let mut commit_iter = commit_id.ancestors(&repo.objects);
155            let mut is_shallow = false;
156            while let Some(c) = commit_iter.next() {
157                progress.inc();
158                if gix::interrupt::is_triggered() {
159                    bail!("Cancelled by user");
160                }
161                match c {
162                    Ok(c) => {
163                        tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok();
164                        let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| {
165                            let mut parents = c.parent_ids.into_iter();
166                            parents
167                                .next()
168                                .map(|first_parent| (tx, Some(first_parent), c.id.to_owned()))
169                                .filter(|_| {
170                                    if parents.next().is_some() {
171                                        skipped_merge_commits += 1;
172                                        false
173                                    } else {
174                                        true
175                                    }
176                                })
177                        });
178                        if let Some((tx_tree, first_parent, commit)) = tree_delta_info {
179                            if chunk.len() == CHUNK_SIZE {
180                                tx_tree
181                                    .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
182                                    .ok();
183                            } else {
184                                chunk.push((commit_idx, first_parent, commit));
185                            }
186                        }
187                        commit_idx += 1;
188                    }
189                    Err(gix::traverse::commit::simple::Error::Find { .. }) => {
190                        is_shallow = true;
191                        break;
192                    }
193                    Err(err) => return Err(err.into()),
194                }
195            }
196            if let Some(tx) = tx_tree_id {
197                tx.send(chunk).ok();
198            }
199            drop(tx);
200            progress.show_throughput(start);
201            drop(progress);
202
203            let stats_by_commit_idx = match stats_progresses {
204                Some((mut stat_progress, change_progress, line_progress)) => {
205                    stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits));
206                    let mut stats = Vec::new();
207                    for handle in stat_threads {
208                        stats.extend(handle.join().expect("no panic")?);
209                        if gix::interrupt::is_triggered() {
210                            bail!("Cancelled by user");
211                        }
212                    }
213                    stats.sort_by_key(|t| t.0);
214                    stat_progress.show_throughput(start);
215                    change_progress.show_throughput(start);
216                    line_progress.show_throughput(start);
217                    stats
218                }
219                None => Vec::new(),
220            };
221
222            Ok((
223                extract_signatures.join().expect("no panic")?,
224                stats_by_commit_idx,
225                is_shallow,
226                skipped_merge_commits,
227            ))
228        })?
229    };
230
231    if commit_authors.is_empty() {
232        bail!("No commits to process");
233    }
234
235    let start = Instant::now();
236    let mut current_email = &commit_authors[0].1.email;
237    let mut slice_start = 0;
238    let mut results_by_hours = Vec::new();
239    let mut ignored_bot_commits = 0_u32;
240    for (idx, (_, elm)) in commit_authors.iter().enumerate() {
241        if elm.email != *current_email {
242            let estimate = estimate_hours(&commit_authors[slice_start..idx], &stats);
243            slice_start = idx;
244            current_email = &elm.email;
245            if ignore_bots && estimate.name.contains_str(b"[bot]") {
246                ignored_bot_commits += estimate.num_commits;
247                continue;
248            }
249            results_by_hours.push(estimate);
250        }
251    }
252    if let Some(commits) = commit_authors.get(slice_start..) {
253        results_by_hours.push(estimate_hours(commits, &stats));
254    }
255
256    let num_authors = results_by_hours.len();
257    let mut results_by_hours = if !omit_unify_identities {
258        deduplicate_identities(&results_by_hours)
259    } else {
260        results_by_hours
261            .iter()
262            .fold(Vec::with_capacity(results_by_hours.len()), |mut acc, e| {
263                acc.push(e.into());
264                acc
265            })
266    };
267    let elapsed = start.elapsed();
268    progress.done(format!(
269        "Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
270        commit_authors.len(),
271        elapsed,
272        commit_authors.len() as f32 / elapsed.as_secs_f32()
273    ));
274
275    let num_unique_authors = results_by_hours.len();
276    let (total_hours, total_commits, total_files, total_lines) = results_by_hours
277        .iter()
278        .map(|e| (e.hours, e.num_commits, e.files, e.lines))
279        .reduce(|a, b| (a.0 + b.0, a.1 + b.1, a.2.clone().added(&b.2), a.3.clone().added(&b.3)))
280        .expect("at least one commit at this point");
281    if show_pii {
282        results_by_hours.sort_by(|a, b| a.hours.partial_cmp(&b.hours).unwrap_or(std::cmp::Ordering::Equal));
283        for entry in &results_by_hours {
284            entry.write_to(
285                total_hours,
286                file_stats.then_some(total_files),
287                line_stats.then_some(total_lines),
288                &mut out,
289            )?;
290            writeln!(out)?;
291        }
292    }
293    writeln!(
294        out,
295        "total hours: {:.02}\ntotal 8h days: {:.02}\ntotal commits = {}{}\ntotal authors: {}",
296        total_hours,
297        total_hours / HOURS_PER_WORKDAY,
298        total_commits,
299        is_shallow.then_some(" (shallow)").unwrap_or_default(),
300        num_authors
301    )?;
302    if file_stats {
303        writeln!(
304            out,
305            "total files added/removed/modified/remaining: {}/{}/{}/{}",
306            total_files.added,
307            total_files.removed,
308            total_files.modified,
309            total_files.added - total_files.removed
310        )?;
311    }
312    if line_stats {
313        writeln!(
314            out,
315            "total lines added/removed/remaining: {}/{}/{}",
316            total_lines.added,
317            total_lines.removed,
318            total_lines.added - total_lines.removed
319        )?;
320    }
321    if !omit_unify_identities {
322        writeln!(
323            out,
324            "total unique authors: {} ({:.02}% duplication)",
325            num_unique_authors,
326            (1.0 - (num_unique_authors as f32 / num_authors as f32)) * 100.0
327        )?;
328    }
329    if ignored_bot_commits != 0 {
330        writeln!(out, "commits by bots: {ignored_bot_commits}")?;
331    }
332    if needs_stats && skipped_merge_commits != 0 {
333        writeln!(out, "stats omitted for {skipped_merge_commits} merge commits")?;
334    }
335    assert_eq!(
336        total_commits,
337        commit_authors.len() as u32 - ignored_bot_commits,
338        "need to get all commits"
339    );
340    Ok(())
341}
342
343mod core;
344use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY};
345
346mod util;
347use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson};
348
349use crate::hours::core::spawn_tree_delta_threads;