1use std::{collections::BTreeSet, io, path::Path, time::Instant};
2
3use anyhow::bail;
4use gix::{
5 bstr::{BStr, ByteSlice},
6 prelude::*,
7 progress, Count, NestedProgress, Progress,
8};
9
10pub struct Context<W> {
12 pub ignore_bots: bool,
14 pub show_pii: bool,
16 pub file_stats: bool,
18 pub line_stats: bool,
20 pub threads: Option<usize>,
22 pub omit_unify_identities: bool,
25 pub out: W,
27}
28
29pub struct SignatureRef<'a> {
30 name: &'a BStr,
31 email: &'a BStr,
32 time: gix::date::Time,
33}
34
35impl SignatureRef<'_> {
36 fn seconds(&self) -> gix::date::SecondsSinceUnixEpoch {
37 self.time.seconds
38 }
39}
40
41pub fn estimate<W, P>(
48 working_dir: &Path,
49 rev_spec: &BStr,
50 mut progress: P,
51 Context {
52 show_pii,
53 ignore_bots,
54 file_stats,
55 line_stats,
56 omit_unify_identities,
57 threads,
58 mut out,
59 }: Context<W>,
60) -> anyhow::Result<()>
61where
62 W: io::Write,
63 P: NestedProgress,
64{
65 let repo = gix::discover(working_dir)?;
66 let commit_id = repo.rev_parse_single(rev_spec)?.detach();
67 let mut string_heap = BTreeSet::<&'static [u8]>::new();
68 let needs_stats = file_stats || line_stats;
69 let threads = gix::features::parallel::num_threads(threads);
70
71 let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
72 std::thread::scope(|scope| -> anyhow::Result<_> {
73 let start = Instant::now();
74 let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec<u8>)>();
75 let mailmap = repo.open_mailmap();
76
77 let extract_signatures = scope.spawn(move || -> anyhow::Result<Vec<_>> {
78 let mut out = Vec::new();
79 for (commit_idx, commit_data) in rx {
80 if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data)
81 .author()
82 .map(|author| mailmap.resolve_cow(author.trim()))
83 {
84 let mut string_ref = |s: &[u8]| -> &'static BStr {
85 match string_heap.get(s) {
86 Some(n) => n.as_bstr(),
87 None => {
88 let sv: Vec<u8> = s.to_owned();
89 string_heap.insert(Box::leak(sv.into_boxed_slice()));
90 (*string_heap.get(s).expect("present")).as_ref()
91 }
92 }
93 };
94 let name = string_ref(author.name.as_ref());
95 let email = string_ref(author.email.as_ref());
96
97 out.push((
98 commit_idx,
99 SignatureRef {
100 name,
101 email,
102 time: author.time,
103 },
104 ));
105 }
106 }
107 out.shrink_to_fit();
108 out.sort_by(|a, b| {
109 a.1.email
110 .cmp(b.1.email)
111 .then(a.1.seconds().cmp(&b.1.seconds()).reverse())
112 });
113 Ok(out)
114 });
115
116 let (stats_progresses, stats_counters) = if needs_stats {
117 {
118 let mut sp = progress.add_child("extract stats");
119 sp.init(None, progress::count("commits"));
120 let sc = sp.counter();
121
122 let mut cp = progress.add_child("find changes");
123 cp.init(None, progress::count("modified files"));
124 let cc = cp.counter();
125
126 let mut lp = progress.add_child("find changes");
127 lp.init(None, progress::count("diff lines"));
128 let lc = lp.counter();
129
130 (Some((sp, cp, lp)), Some((sc, cc, lc)))
131 }
132 } else {
133 Default::default()
134 };
135
136 let mut progress = progress.add_child("traverse commit graph");
137 progress.init(None, progress::count("commits"));
138
139 let (tx_tree_id, stat_threads) = if needs_stats {
140 {
141 let (tx, threads) = spawn_tree_delta_threads(
142 scope,
143 threads,
144 line_stats,
145 repo.clone(),
146 stats_counters.clone().expect("counters are set"),
147 );
148 (Some(tx), threads)
149 }
150 } else {
151 Default::default()
152 };
153
154 let mut commit_idx = 0_u32;
155 let mut skipped_merge_commits = 0;
156 const CHUNK_SIZE: usize = 50;
157 let mut chunk = Vec::with_capacity(CHUNK_SIZE);
158 let mut commit_iter = commit_id.ancestors(&repo.objects);
159 let mut is_shallow = false;
160 while let Some(c) = commit_iter.next() {
161 progress.inc();
162 if gix::interrupt::is_triggered() {
163 bail!("Cancelled by user");
164 }
165 match c {
166 Ok(c) => {
167 tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok();
168 let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| {
169 let mut parents = c.parent_ids.into_iter();
170 parents
171 .next()
172 .map(|first_parent| (tx, Some(first_parent), c.id.to_owned()))
173 .filter(|_| {
174 if parents.next().is_some() {
175 skipped_merge_commits += 1;
176 false
177 } else {
178 true
179 }
180 })
181 });
182 if let Some((tx_tree, first_parent, commit)) = tree_delta_info {
183 if chunk.len() == CHUNK_SIZE {
184 tx_tree
185 .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
186 .ok();
187 } else {
188 chunk.push((commit_idx, first_parent, commit));
189 }
190 }
191 commit_idx += 1;
192 }
193 Err(gix::traverse::commit::simple::Error::Find { .. }) => {
194 is_shallow = true;
195 break;
196 }
197 Err(err) => return Err(err.into()),
198 }
199 }
200 if let Some(tx) = tx_tree_id {
201 tx.send(chunk).ok();
202 }
203 drop(tx);
204 progress.show_throughput(start);
205 drop(progress);
206
207 let stats_by_commit_idx = match stats_progresses {
208 Some((mut stat_progress, change_progress, line_progress)) => {
209 stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits));
210 let mut stats = Vec::new();
211 for handle in stat_threads {
212 stats.extend(handle.join().expect("no panic")?);
213 if gix::interrupt::is_triggered() {
214 bail!("Cancelled by user");
215 }
216 }
217 stats.sort_by_key(|t| t.0);
218 stat_progress.show_throughput(start);
219 change_progress.show_throughput(start);
220 line_progress.show_throughput(start);
221 stats
222 }
223 None => Vec::new(),
224 };
225
226 Ok((
227 extract_signatures.join().expect("no panic")?,
228 stats_by_commit_idx,
229 is_shallow,
230 skipped_merge_commits,
231 ))
232 })?
233 };
234
235 if commit_authors.is_empty() {
236 bail!("No commits to process");
237 }
238
239 let start = Instant::now();
240 let mut current_email = &commit_authors[0].1.email;
241 let mut slice_start = 0;
242 let mut results_by_hours = Vec::new();
243 let mut ignored_bot_commits = 0_u32;
244 for (idx, (_, elm)) in commit_authors.iter().enumerate() {
245 if elm.email != *current_email {
246 let estimate = estimate_hours(&commit_authors[slice_start..idx], &stats);
247 slice_start = idx;
248 current_email = &elm.email;
249 if ignore_bots && estimate.name.contains_str(b"[bot]") {
250 ignored_bot_commits += estimate.num_commits;
251 continue;
252 }
253 results_by_hours.push(estimate);
254 }
255 }
256 if let Some(commits) = commit_authors.get(slice_start..) {
257 results_by_hours.push(estimate_hours(commits, &stats));
258 }
259
260 let num_authors = results_by_hours.len();
261 let mut results_by_hours = if !omit_unify_identities {
262 deduplicate_identities(&results_by_hours)
263 } else {
264 results_by_hours
265 .iter()
266 .fold(Vec::with_capacity(results_by_hours.len()), |mut acc, e| {
267 acc.push(e.into());
268 acc
269 })
270 };
271 let elapsed = start.elapsed();
272 progress.done(format!(
273 "Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
274 commit_authors.len(),
275 elapsed,
276 commit_authors.len() as f32 / elapsed.as_secs_f32()
277 ));
278
279 let num_unique_authors = results_by_hours.len();
280 let (total_hours, total_commits, total_files, total_lines) = results_by_hours
281 .iter()
282 .map(|e| (e.hours, e.num_commits, e.files, e.lines))
283 .reduce(|a, b| (a.0 + b.0, a.1 + b.1, a.2.clone().added(&b.2), a.3.clone().added(&b.3)))
284 .expect("at least one commit at this point");
285 if show_pii {
286 results_by_hours.sort_by(|a, b| a.hours.partial_cmp(&b.hours).unwrap_or(std::cmp::Ordering::Equal));
287 for entry in &results_by_hours {
288 entry.write_to(
289 total_hours,
290 file_stats.then_some(total_files),
291 line_stats.then_some(total_lines),
292 &mut out,
293 )?;
294 writeln!(out)?;
295 }
296 }
297 writeln!(
298 out,
299 "total hours: {:.02}\ntotal 8h days: {:.02}\ntotal commits = {}{}\ntotal authors: {}",
300 total_hours,
301 total_hours / HOURS_PER_WORKDAY,
302 total_commits,
303 if is_shallow { " (shallow)" } else { Default::default() },
304 num_authors
305 )?;
306 if file_stats {
307 writeln!(
308 out,
309 "total files added/removed/modified/remaining: {}/{}/{}/{}",
310 total_files.added,
311 total_files.removed,
312 total_files.modified,
313 total_files.added - total_files.removed
314 )?;
315 }
316 if line_stats {
317 writeln!(
318 out,
319 "total lines added/removed/remaining: {}/{}/{}",
320 total_lines.added,
321 total_lines.removed,
322 total_lines.added - total_lines.removed
323 )?;
324 }
325 if !omit_unify_identities {
326 writeln!(
327 out,
328 "total unique authors: {} ({:.02}% duplication)",
329 num_unique_authors,
330 (1.0 - (num_unique_authors as f32 / num_authors as f32)) * 100.0
331 )?;
332 }
333 if ignored_bot_commits != 0 {
334 writeln!(out, "commits by bots: {ignored_bot_commits}")?;
335 }
336 if needs_stats && skipped_merge_commits != 0 {
337 writeln!(out, "stats omitted for {skipped_merge_commits} merge commits")?;
338 }
339 assert_eq!(
340 total_commits,
341 commit_authors.len() as u32 - ignored_bot_commits,
342 "need to get all commits"
343 );
344 Ok(())
345}
346
347mod core;
348use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY};
349
350mod util;
351use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson};
352
353use crate::hours::core::spawn_tree_delta_threads;