1use std::{collections::BTreeSet, io, path::Path, time::Instant};
2
3use anyhow::bail;
4use gix::{
5 actor,
6 bstr::{BStr, ByteSlice},
7 prelude::*,
8 progress, Count, NestedProgress, Progress,
9};
10
11pub struct Context<W> {
13 pub ignore_bots: bool,
15 pub show_pii: bool,
17 pub file_stats: bool,
19 pub line_stats: bool,
21 pub threads: Option<usize>,
23 pub omit_unify_identities: bool,
26 pub out: W,
28}
29
30pub fn estimate<W, P>(
37 working_dir: &Path,
38 rev_spec: &BStr,
39 mut progress: P,
40 Context {
41 show_pii,
42 ignore_bots,
43 file_stats,
44 line_stats,
45 omit_unify_identities,
46 threads,
47 mut out,
48 }: Context<W>,
49) -> anyhow::Result<()>
50where
51 W: io::Write,
52 P: NestedProgress,
53{
54 let repo = gix::discover(working_dir)?;
55 let commit_id = repo.rev_parse_single(rev_spec)?.detach();
56 let mut string_heap = BTreeSet::<&'static [u8]>::new();
57 let needs_stats = file_stats || line_stats;
58 let threads = gix::features::parallel::num_threads(threads);
59
60 let (commit_authors, stats, is_shallow, skipped_merge_commits) = {
61 std::thread::scope(|scope| -> anyhow::Result<_> {
62 let start = Instant::now();
63 let (tx, rx) = std::sync::mpsc::channel::<(u32, Vec<u8>)>();
64 let mailmap = repo.open_mailmap();
65
66 let extract_signatures = scope.spawn(move || -> anyhow::Result<Vec<_>> {
67 let mut out = Vec::new();
68 for (commit_idx, commit_data) in rx {
69 if let Ok(author) = gix::objs::CommitRefIter::from_bytes(&commit_data)
70 .author()
71 .map(|author| mailmap.resolve_cow(author.trim()))
72 {
73 let mut string_ref = |s: &[u8]| -> &'static BStr {
74 match string_heap.get(s) {
75 Some(n) => n.as_bstr(),
76 None => {
77 let sv: Vec<u8> = s.to_owned();
78 string_heap.insert(Box::leak(sv.into_boxed_slice()));
79 (*string_heap.get(s).expect("present")).as_ref()
80 }
81 }
82 };
83 let name = string_ref(author.name.as_ref());
84 let email = string_ref(author.email.as_ref());
85
86 out.push((
87 commit_idx,
88 actor::SignatureRef {
89 name,
90 email,
91 time: author.time,
92 },
93 ));
94 }
95 }
96 out.shrink_to_fit();
97 out.sort_by(|a, b| {
98 a.1.email
99 .cmp(b.1.email)
100 .then(a.1.time.seconds.cmp(&b.1.time.seconds).reverse())
101 });
102 Ok(out)
103 });
104
105 let (stats_progresses, stats_counters) = needs_stats
106 .then(|| {
107 let mut sp = progress.add_child("extract stats");
108 sp.init(None, progress::count("commits"));
109 let sc = sp.counter();
110
111 let mut cp = progress.add_child("find changes");
112 cp.init(None, progress::count("modified files"));
113 let cc = cp.counter();
114
115 let mut lp = progress.add_child("find changes");
116 lp.init(None, progress::count("diff lines"));
117 let lc = lp.counter();
118
119 (Some((sp, cp, lp)), Some((sc, cc, lc)))
120 })
121 .unwrap_or_default();
122
123 let mut progress = progress.add_child("traverse commit graph");
124 progress.init(None, progress::count("commits"));
125
126 let (tx_tree_id, stat_threads) = needs_stats
127 .then(|| {
128 let (tx, threads) = spawn_tree_delta_threads(
129 scope,
130 threads,
131 line_stats,
132 repo.clone(),
133 stats_counters.clone().expect("counters are set"),
134 );
135 (Some(tx), threads)
136 })
137 .unwrap_or_default();
138
139 let mut commit_idx = 0_u32;
140 let mut skipped_merge_commits = 0;
141 const CHUNK_SIZE: usize = 50;
142 let mut chunk = Vec::with_capacity(CHUNK_SIZE);
143 let mut commit_iter = commit_id.ancestors(&repo.objects);
144 let mut is_shallow = false;
145 while let Some(c) = commit_iter.next() {
146 progress.inc();
147 if gix::interrupt::is_triggered() {
148 bail!("Cancelled by user");
149 }
150 match c {
151 Ok(c) => {
152 tx.send((commit_idx, commit_iter.commit_data().to_owned())).ok();
153 let tree_delta_info = tx_tree_id.as_ref().and_then(|tx| {
154 let mut parents = c.parent_ids.into_iter();
155 parents
156 .next()
157 .map(|first_parent| (tx, Some(first_parent), c.id.to_owned()))
158 .filter(|_| {
159 if parents.next().is_some() {
160 skipped_merge_commits += 1;
161 false
162 } else {
163 true
164 }
165 })
166 });
167 if let Some((tx_tree, first_parent, commit)) = tree_delta_info {
168 if chunk.len() == CHUNK_SIZE {
169 tx_tree
170 .send(std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))
171 .ok();
172 } else {
173 chunk.push((commit_idx, first_parent, commit));
174 }
175 }
176 commit_idx += 1;
177 }
178 Err(gix::traverse::commit::simple::Error::Find { .. }) => {
179 is_shallow = true;
180 break;
181 }
182 Err(err) => return Err(err.into()),
183 };
184 }
185 if let Some(tx) = tx_tree_id {
186 tx.send(chunk).ok();
187 }
188 drop(tx);
189 progress.show_throughput(start);
190 drop(progress);
191
192 let stats_by_commit_idx = match stats_progresses {
193 Some((mut stat_progress, change_progress, line_progress)) => {
194 stat_progress.set_max(Some(commit_idx as usize - skipped_merge_commits));
195 let mut stats = Vec::new();
196 for handle in stat_threads {
197 stats.extend(handle.join().expect("no panic")?);
198 if gix::interrupt::is_triggered() {
199 bail!("Cancelled by user");
200 }
201 }
202 stats.sort_by_key(|t| t.0);
203 stat_progress.show_throughput(start);
204 change_progress.show_throughput(start);
205 line_progress.show_throughput(start);
206 stats
207 }
208 None => Vec::new(),
209 };
210
211 Ok((
212 extract_signatures.join().expect("no panic")?,
213 stats_by_commit_idx,
214 is_shallow,
215 skipped_merge_commits,
216 ))
217 })?
218 };
219
220 if commit_authors.is_empty() {
221 bail!("No commits to process");
222 }
223
224 let start = Instant::now();
225 let mut current_email = &commit_authors[0].1.email;
226 let mut slice_start = 0;
227 let mut results_by_hours = Vec::new();
228 let mut ignored_bot_commits = 0_u32;
229 for (idx, (_, elm)) in commit_authors.iter().enumerate() {
230 if elm.email != *current_email {
231 let estimate = estimate_hours(&commit_authors[slice_start..idx], &stats);
232 slice_start = idx;
233 current_email = &elm.email;
234 if ignore_bots && estimate.name.contains_str(b"[bot]") {
235 ignored_bot_commits += estimate.num_commits;
236 continue;
237 }
238 results_by_hours.push(estimate);
239 }
240 }
241 if let Some(commits) = commit_authors.get(slice_start..) {
242 results_by_hours.push(estimate_hours(commits, &stats));
243 }
244
245 let num_authors = results_by_hours.len();
246 let mut results_by_hours = if !omit_unify_identities {
247 deduplicate_identities(&results_by_hours)
248 } else {
249 results_by_hours
250 .iter()
251 .fold(Vec::with_capacity(results_by_hours.len()), |mut acc, e| {
252 acc.push(e.into());
253 acc
254 })
255 };
256 let elapsed = start.elapsed();
257 progress.done(format!(
258 "Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
259 commit_authors.len(),
260 elapsed,
261 commit_authors.len() as f32 / elapsed.as_secs_f32()
262 ));
263
264 let num_unique_authors = results_by_hours.len();
265 let (total_hours, total_commits, total_files, total_lines) = results_by_hours
266 .iter()
267 .map(|e| (e.hours, e.num_commits, e.files, e.lines))
268 .reduce(|a, b| (a.0 + b.0, a.1 + b.1, a.2.clone().added(&b.2), a.3.clone().added(&b.3)))
269 .expect("at least one commit at this point");
270 if show_pii {
271 results_by_hours.sort_by(|a, b| a.hours.partial_cmp(&b.hours).unwrap_or(std::cmp::Ordering::Equal));
272 for entry in &results_by_hours {
273 entry.write_to(
274 total_hours,
275 file_stats.then_some(total_files),
276 line_stats.then_some(total_lines),
277 &mut out,
278 )?;
279 writeln!(out)?;
280 }
281 }
282 writeln!(
283 out,
284 "total hours: {:.02}\ntotal 8h days: {:.02}\ntotal commits = {}{}\ntotal authors: {}",
285 total_hours,
286 total_hours / HOURS_PER_WORKDAY,
287 total_commits,
288 is_shallow.then_some(" (shallow)").unwrap_or_default(),
289 num_authors
290 )?;
291 if file_stats {
292 writeln!(
293 out,
294 "total files added/removed/modified/remaining: {}/{}/{}/{}",
295 total_files.added,
296 total_files.removed,
297 total_files.modified,
298 total_files.added - total_files.removed
299 )?;
300 }
301 if line_stats {
302 writeln!(
303 out,
304 "total lines added/removed/remaining: {}/{}/{}",
305 total_lines.added,
306 total_lines.removed,
307 total_lines.added - total_lines.removed
308 )?;
309 }
310 if !omit_unify_identities {
311 writeln!(
312 out,
313 "total unique authors: {} ({:.02}% duplication)",
314 num_unique_authors,
315 (1.0 - (num_unique_authors as f32 / num_authors as f32)) * 100.0
316 )?;
317 }
318 if ignored_bot_commits != 0 {
319 writeln!(out, "commits by bots: {ignored_bot_commits}")?;
320 }
321 if needs_stats && skipped_merge_commits != 0 {
322 writeln!(out, "stats omitted for {skipped_merge_commits} merge commits")?;
323 }
324 assert_eq!(
325 total_commits,
326 commit_authors.len() as u32 - ignored_bot_commits,
327 "need to get all commits"
328 );
329 Ok(())
330}
331
332mod core;
333use self::core::{deduplicate_identities, estimate_hours, HOURS_PER_WORKDAY};
334
335mod util;
336use util::{CommitIdx, FileStats, LineStats, WorkByEmail, WorkByPerson};
337
338use crate::hours::core::spawn_tree_delta_threads;