1use std::{collections::BTreeSet, io, path::Path, time::Instant};
2
3use anyhow::bail;
4use gix::{
5 bstr::{BStr, ByteSlice},
6 prelude::*,
7 progress, Count, NestedProgress, Progress,
8};
9
10pub struct Context<W> {
12 pub ignore_bots: bool,
14 pub show_pii: bool,
16 pub file_stats: bool,
18 pub line_stats: bool,
20 pub threads: Option<usize>,
22 pub omit_unify_identities: bool,
25 pub out: W,
27}
28
29pub struct SignatureRef<'a> {
30 name: &'a BStr,
31 email: &'a BStr,
32 time: gix::date::Time,
33}
34
35impl SignatureRef<'_> {
36 fn seconds(&self) -> gix::date::SecondsSinceUnixEpoch {
37 self.time.seconds
38 }
39}
40
41pub fn estimate<W, P>(
48 working_dir: &Path,
49 rev_spec: &BStr,
50 mut progress: P,
51 Context {
52 show_pii,
53 ignore_bots,
54 file_stats,
55 line_stats,
56 omit_unify_identities,
57 threads,
58 mut out,
59 }: Context<W>,
60) -> anyhow::Result<()>
61where
62 W: io::Write,
63 P: NestedProgress,
64{
65 let repo = gix::discover(working_dir)?;
66 let commit_id = repo.rev_parse_single(rev_spec)?.detach();
67 let mut string_heap = BTreeSet::<&'static [u8]>::new();
68 let needs_stats = file_stats || line_stats;
69 let threads = gix::features::parallel::num_threads(threads);
70
71 let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
72 std::thread::scope(|scope| -> anyhow::Result<_> {
73 let start = Instant::now();
74 let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec<u8>)>();
75 let mailmap = repo.open_mailmap();
76
77 let extract_signatures = scope.spawn(move || -> anyhow::Result<Vec<_>> {
78 let mut out = Vec::new();
79 for (commit_idx, commit_data) in rx {
80 if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data)
81 .author()
82 .map(|author| mailmap.resolve_cow(author.trim()))
83 {
84 let mut string_ref = |s: &[u8]| -> &'static BStr {
85 match string_heap.get(s) {
86 Some(n) => n.as_bstr(),
87 None => {
88 let sv: Vec<u8> = s.to_owned();
89 string_heap.insert(Box::leak(sv.into_boxed_slice()));
90 (*string_heap.get(s).expect("present")).as_ref()
91 }
92 }
93 };
94 let name = string_ref(author.name.as_ref());
95 let email = string_ref(author.email.as_ref());
96
97 out.push((
98 commit_idx,
99 SignatureRef {
100 name,
101 email,
102 time: author.time,
103 },
104 ));
105 }
106 }
107 out.shrink_to_fit();
108 out.sort_by(|a, b| {
109 a.1.email
110 .cmp(b.1.email)
111 .then(a.1.seconds().cmp(&b.1.seconds()).reverse())
112 });
113 Ok(out)
114 });
115
116 let (stats_progresses, stats_counters) = needs_stats
117 .then(|| {
118 let mut sp = progress.add_child("extract stats");
119 sp.init(None, progress::count("commits"));
120 let sc = sp.counter();
121
122 let mut cp = progress.add_child("find changes");
123 cp.init(None, progress::count("modified files"));
124 let cc = cp.counter();
125
126 let mut lp = progress.add_child("find changes");
127 lp.init(None, progress::count("diff lines"));
128 let lc = lp.counter();
129
130 (Some((sp, cp, lp)), Some((sc, cc, lc)))
131 })
132 .unwrap_or_default();
133
134 let mut progress = progress.add_child("traverse commit graph");
135 progress.init(None, progress::count("commits"));
136
137 let (tx_tree_id, stat_threads) = needs_stats
138 .then(|| {
139 let (tx, threads) = spawn_tree_delta_threads(
140 scope,
141 threads,
142 line_stats,
143 repo.clone(),
144 stats_counters.clone().expect("counters are set"),
145 );
146 (Some(tx), threads)
147 })
148 .unwrap_or_default();
149
150 let mut commit_idx = 0_u32;
151 let mut skipped_merge_commits = 0;
152 const CHUNK_SIZE: usize = 50;
153 let mut chunk = Vec::with_capacity(CHUNK_SIZE);
154 let mut commit_iter = commit_id.ancestors(&repo.objects);
155 let mut is_shallow = false;
156 while let Some(c) = commit_iter.next() {
157 progress.inc();
158 if gix::interrupt::is_triggered() {
159 bail!("Cancelled by user");
160 }
161 match c {
162 Ok(c) => {
163 tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok();
164 let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| {
165 let mut parents = c.parent_ids.into_iter();
166 parents
167 .next()
168 .map(|first_parent| (tx, Some(first_parent), c.id.to_owned()))
169 .filter(|_| {
170 if parents.next().is_some() {
171 skipped_merge_commits += 1;
172 false
173 } else {
174 true
175 }
176 })
177 });
178 if let Some((tx_tree, first_parent, commit)) = tree_delta_info {
179 if chunk.len() == CHUNK_SIZE {
180 tx_tree
181 .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
182 .ok();
183 } else {
184 chunk.push((commit_idx, first_parent, commit));
185 }
186 }
187 commit_idx += 1;
188 }
189 Err(gix::traverse::commit::simple::Error::Find { .. }) => {
190 is_shallow = true;
191 break;
192 }
193 Err(err) => return Err(err.into()),
194 }
195 }
196 if let Some(tx) = tx_tree_id {
197 tx.send(chunk).ok();
198 }
199 drop(tx);
200 progress.show_throughput(start);
201 drop(progress);
202
203 let stats_by_commit_idx = match stats_progresses {
204 Some((mut stat_progress, change_progress, line_progress)) => {
205 stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits));
206 let mut stats = Vec::new();
207 for handle in stat_threads {
208 stats.extend(handle.join().expect("no panic")?);
209 if gix::interrupt::is_triggered() {
210 bail!("Cancelled by user");
211 }
212 }
213 stats.sort_by_key(|t| t.0);
214 stat_progress.show_throughput(start);
215 change_progress.show_throughput(start);
216 line_progress.show_throughput(start);
217 stats
218 }
219 None => Vec::new(),
220 };
221
222 Ok((
223 extract_signatures.join().expect("no panic")?,
224 stats_by_commit_idx,
225 is_shallow,
226 skipped_merge_commits,
227 ))
228 })?
229 };
230
231 if commit_authors.is_empty() {
232 bail!("No commits to process");
233 }
234
235 let start = Instant::now();
236 let mut current_email = &commit_authors[0].1.email;
237 let mut slice_start = 0;
238 let mut results_by_hours = Vec::new();
239 let mut ignored_bot_commits = 0_u32;
240 for (idx, (_, elm)) in commit_authors.iter().enumerate() {
241 if elm.email != *current_email {
242 let estimate = estimate_hours(&commit_authors[slice_start..idx], &stats);
243 slice_start = idx;
244 current_email = &elm.email;
245 if ignore_bots && estimate.name.contains_str(b"[bot]") {
246 ignored_bot_commits += estimate.num_commits;
247 continue;
248 }
249 results_by_hours.push(estimate);
250 }
251 }
252 if let Some(commits) = commit_authors.get(slice_start..) {
253 results_by_hours.push(estimate_hours(commits, &stats));
254 }
255
256 let num_authors = results_by_hours.len();
257 let mut results_by_hours = if !omit_unify_identities {
258 deduplicate_identities(&results_by_hours)
259 } else {
260 results_by_hours
261 .iter()
262 .fold(Vec::with_capacity(results_by_hours.len()), |mut acc, e| {
263 acc.push(e.into());
264 acc
265 })
266 };
267 let elapsed = start.elapsed();
268 progress.done(format!(
269 "Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
270 commit_authors.len(),
271 elapsed,
272 commit_authors.len() as f32 / elapsed.as_secs_f32()
273 ));
274
275 let num_unique_authors = results_by_hours.len();
276 let (total_hours, total_commits, total_files, total_lines) = results_by_hours
277 .iter()
278 .map(|e| (e.hours, e.num_commits, e.files, e.lines))
279 .reduce(|a, b| (a.0 + b.0, a.1 + b.1, a.2.clone().added(&b.2), a.3.clone().added(&b.3)))
280 .expect("at least one commit at this point");
281 if show_pii {
282 results_by_hours.sort_by(|a, b| a.hours.partial_cmp(&b.hours).unwrap_or(std::cmp::Ordering::Equal));
283 for entry in &results_by_hours {
284 entry.write_to(
285 total_hours,
286 file_stats.then_some(total_files),
287 line_stats.then_some(total_lines),
288 &mut out,
289 )?;
290 writeln!(out)?;
291 }
292 }
293 writeln!(
294 out,
295 "total hours: {:.02}\ntotal 8h days: {:.02}\ntotal commits = {}{}\ntotal authors: {}",
296 total_hours,
297 total_hours / HOURS_PER_WORKDAY,
298 total_commits,
299 is_shallow.then_some(" (shallow)").unwrap_or_default(),
300 num_authors
301 )?;
302 if file_stats {
303 writeln!(
304 out,
305 "total files added/removed/modified/remaining: {}/{}/{}/{}",
306 total_files.added,
307 total_files.removed,
308 total_files.modified,
309 total_files.added - total_files.removed
310 )?;
311 }
312 if line_stats {
313 writeln!(
314 out,
315 "total lines added/removed/remaining: {}/{}/{}",
316 total_lines.added,
317 total_lines.removed,
318 total_lines.added - total_lines.removed
319 )?;
320 }
321 if !omit_unify_identities {
322 writeln!(
323 out,
324 "total unique authors: {} ({:.02}% duplication)",
325 num_unique_authors,
326 (1.0 - (num_unique_authors as f32 / num_authors as f32)) * 100.0
327 )?;
328 }
329 if ignored_bot_commits != 0 {
330 writeln!(out, "commits by bots: {ignored_bot_commits}")?;
331 }
332 if needs_stats && skipped_merge_commits != 0 {
333 writeln!(out, "stats omitted for {skipped_merge_commits} merge commits")?;
334 }
335 assert_eq!(
336 total_commits,
337 commit_authors.len() as u32 - ignored_bot_commits,
338 "need to get all commits"
339 );
340 Ok(())
341}
342
343mod core;
344use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY};
345
346mod util;
347use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson};
348
349use crate::hours::core::spawn_tree_delta_threads;