git_stats/logic/
aggregate.rs1use std::collections::HashMap;
2
3use crate::model::{Author, CommitMeta, DiffStat, Review, Stat};
4
5#[derive(Debug, Clone)]
7pub struct CommitStat {
8 pub author_key: String,
9 pub diff: DiffStat,
10}
11
12const REVIEW_TOKENS: [&str; 3] = ["acked-by", "tested-by", "reviewed-by"];
14
15#[must_use]
17pub fn author_key(author: &Author, email: bool) -> String {
18 if email {
19 format!("{} <{}>", author.name, author.email)
20 } else {
21 author.name.clone()
22 }
23}
24
25#[must_use]
28pub fn aggregate(commits: &[CommitStat]) -> Vec<Stat> {
29 let mut stats: Vec<Stat> = Vec::new();
30 let mut index: HashMap<&str, usize> = HashMap::new();
31 for c in commits {
32 let i = *index.entry(c.author_key.as_str()).or_insert_with(|| {
33 stats.push(Stat {
34 author: c.author_key.clone(),
35 commits: 0,
36 num_files: 0,
37 insertions: 0,
38 deletions: 0,
39 net: 0,
40 });
41 stats.len() - 1
42 });
43 let s = &mut stats[i];
44 s.commits = s.commits.saturating_add(1);
45 s.num_files = s.num_files.saturating_add(c.diff.files);
46 s.insertions = s.insertions.saturating_add(c.diff.insertions);
47 s.deletions = s.deletions.saturating_add(c.diff.deletions);
48 }
49 for s in &mut stats {
50 s.net = net(s.insertions, s.deletions);
51 }
52 stats
53}
54
55#[must_use]
57pub fn compute_totals(stats: &[Stat]) -> Stat {
58 let mut total = Stat {
59 author: "Total".to_string(),
60 commits: 0,
61 num_files: 0,
62 insertions: 0,
63 deletions: 0,
64 net: 0,
65 };
66 for s in stats {
67 total.commits = total.commits.saturating_add(s.commits);
68 total.num_files = total.num_files.saturating_add(s.num_files);
69 total.insertions = total.insertions.saturating_add(s.insertions);
70 total.deletions = total.deletions.saturating_add(s.deletions);
71 }
72 total.net = net(total.insertions, total.deletions);
73 total
74}
75
76#[must_use]
81pub fn aggregate_reviews<'a>(
82 metas: impl IntoIterator<Item = &'a CommitMeta>,
83 email: bool,
84) -> Vec<Review> {
85 let mut reviews: Vec<Review> = Vec::new();
86 let mut index: HashMap<String, usize> = HashMap::new();
87 for m in metas {
88 let mut credited: Vec<String> = Vec::new();
89 for t in &m.trailers {
90 if !REVIEW_TOKENS
91 .iter()
92 .any(|token| t.token.eq_ignore_ascii_case(token))
93 {
94 continue;
95 }
96 let key = reviewer_key(&t.value, email);
97 if credited.contains(&key) {
98 continue;
99 }
100 credited.push(key.clone());
101 let i = *index.entry(key.clone()).or_insert_with(|| {
102 reviews.push(Review {
103 author: key.clone(),
104 commits: 0,
105 });
106 reviews.len() - 1
107 });
108 reviews[i].commits = reviews[i].commits.saturating_add(1);
109 }
110 }
111 reviews.sort_by_key(|r| std::cmp::Reverse(r.commits));
112 reviews
113}
114
115fn reviewer_key(value: &str, email: bool) -> String {
118 let value = value.trim();
119 if email {
120 return value.to_string();
121 }
122 match value.split_once(" <") {
123 Some((name, _)) => name.trim().to_string(),
124 None => value.to_string(),
125 }
126}
127
128fn net(insertions: u64, deletions: u64) -> i64 {
130 let ins = i64::try_from(insertions).unwrap_or(i64::MAX);
131 let del = i64::try_from(deletions).unwrap_or(i64::MAX);
132 ins - del
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138 use crate::model::DiffStat;
139 use hegel::generators;
140 use std::collections::{BTreeMap, BTreeSet};
141
142 #[hegel::composite]
147 fn commit_list(tc: hegel::TestCase) -> Vec<CommitStat> {
148 const NAMES: [&str; 5] = ["Ada Lovelace", "Grace Hopper", "Bob", "Carol Shaw", "Don"];
149 let n = tc.draw(generators::integers::<usize>().max_value(200));
150 let mut commits = Vec::with_capacity(n);
151 for _ in 0..n {
152 let who = tc.draw(generators::integers::<usize>().max_value(NAMES.len() - 1));
153 commits.push(CommitStat {
154 author_key: NAMES[who].to_string(),
155 diff: DiffStat {
156 insertions: u64::from(tc.draw(generators::integers::<u32>())),
157 deletions: u64::from(tc.draw(generators::integers::<u32>())),
158 files: u64::from(tc.draw(generators::integers::<u32>())),
159 },
160 });
161 }
162 commits
163 }
164
165 fn fingerprint(stats: &[Stat]) -> BTreeMap<String, (u64, u64, u64, u64, i64)> {
166 stats
167 .iter()
168 .map(|s| {
169 (
170 s.author.clone(),
171 (s.commits, s.num_files, s.insertions, s.deletions, s.net),
172 )
173 })
174 .collect()
175 }
176
177 #[hegel::test]
178 fn totals_match_independent_sums(tc: hegel::TestCase) {
179 let commits = tc.draw(commit_list());
180 let totals = compute_totals(&aggregate(&commits));
181
182 let exp_ins: u64 = commits.iter().map(|c| c.diff.insertions).sum();
183 let exp_del: u64 = commits.iter().map(|c| c.diff.deletions).sum();
184
185 assert_eq!(totals.commits, u64::try_from(commits.len()).unwrap());
186 assert_eq!(totals.insertions, exp_ins);
187 assert_eq!(totals.deletions, exp_del);
188 assert_eq!(totals.num_files, commits.iter().map(|c| c.diff.files).sum());
189 assert_eq!(
190 totals.net,
191 i64::try_from(exp_ins).unwrap() - i64::try_from(exp_del).unwrap()
192 );
193 }
194
195 #[hegel::test]
196 fn per_stat_net_is_insertions_minus_deletions(tc: hegel::TestCase) {
197 let commits = tc.draw(commit_list());
198 for s in aggregate(&commits) {
199 assert_eq!(
200 s.net,
201 i64::try_from(s.insertions).unwrap() - i64::try_from(s.deletions).unwrap()
202 );
203 }
204 }
205
206 #[hegel::test]
207 fn one_row_per_distinct_author(tc: hegel::TestCase) {
208 let commits = tc.draw(commit_list());
209 let distinct: BTreeSet<&str> = commits.iter().map(|c| c.author_key.as_str()).collect();
210 assert_eq!(aggregate(&commits).len(), distinct.len());
211 }
212
213 #[hegel::test]
214 fn aggregation_is_order_independent(tc: hegel::TestCase) {
215 let commits = tc.draw(commit_list());
216 let forward = aggregate(&commits);
217 let mut reversed = commits;
218 reversed.reverse();
219 let backward = aggregate(&reversed);
220 assert_eq!(fingerprint(&forward), fingerprint(&backward));
221 }
222
223 #[hegel::test]
230 fn reviewer_key_handles_multiword_names(tc: hegel::TestCase) {
231 let name = tc.draw(generators::from_regex(r"[A-Za-z]+( [A-Za-z]+){0,3}").fullmatch(true));
232 if tc.draw(generators::booleans()) {
233 let email = tc.draw(generators::from_regex(r"[a-z]+@[a-z]+\.[a-z]+").fullmatch(true));
234 let value = format!("{name} <{email}>");
235 assert_eq!(reviewer_key(&value, false), name);
236 assert_eq!(reviewer_key(&value, true), value);
237 } else {
238 assert_eq!(reviewer_key(&name, false), name);
240 assert_eq!(reviewer_key(&name, true), name);
241 }
242 }
243
244 #[hegel::test]
248 fn aggregate_never_panics_on_boundary_values(tc: hegel::TestCase) {
249 let n = tc.draw(generators::integers::<usize>().max_value(20));
250 let mut commits = Vec::with_capacity(n);
251 for _ in 0..n {
252 commits.push(CommitStat {
253 author_key: "boundary".to_string(),
254 diff: DiffStat {
255 insertions: tc.draw(generators::integers::<u64>()),
256 deletions: tc.draw(generators::integers::<u64>()),
257 files: tc.draw(generators::integers::<u64>()),
258 },
259 });
260 }
261 let _ = compute_totals(&aggregate(&commits));
263 }
264}