Skip to main content

git_stats/logic/
aggregate.rs

1use std::collections::HashMap;
2
3use crate::model::{Author, CommitMeta, DiffStat, Review, Stat};
4
5/// One commit's contribution to the aggregate: its grouping key and numstat.
6#[derive(Debug, Clone)]
7pub struct CommitStat {
8    pub author_key: String,
9    pub diff: DiffStat,
10}
11
12/// Trailer tokens (lowercased) that count as a review/test sign-off.
13const REVIEW_TOKENS: [&str; 3] = ["acked-by", "tested-by", "reviewed-by"];
14
15/// The author display and grouping key: `Name <email>` when `email`, else `Name`.
16#[must_use]
17pub fn author_key(author: &Author, email: bool) -> String {
18    if email {
19        format!("{} <{}>", author.name, author.email)
20    } else {
21        author.name.clone()
22    }
23}
24
25/// Aggregate per-commit stats into per-author totals, returned in first-seen
26/// order (callers sort afterwards). Sums saturate rather than overflow.
27#[must_use]
28pub fn aggregate(commits: &[CommitStat]) -> Vec<Stat> {
29    let mut stats: Vec<Stat> = Vec::new();
30    let mut index: HashMap<&str, usize> = HashMap::new();
31    for c in commits {
32        let i = *index.entry(c.author_key.as_str()).or_insert_with(|| {
33            stats.push(Stat {
34                author: c.author_key.clone(),
35                commits: 0,
36                num_files: 0,
37                insertions: 0,
38                deletions: 0,
39                net: 0,
40            });
41            stats.len() - 1
42        });
43        let s = &mut stats[i];
44        s.commits = s.commits.saturating_add(1);
45        s.num_files = s.num_files.saturating_add(c.diff.files);
46        s.insertions = s.insertions.saturating_add(c.diff.insertions);
47        s.deletions = s.deletions.saturating_add(c.diff.deletions);
48    }
49    for s in &mut stats {
50        s.net = net(s.insertions, s.deletions);
51    }
52    stats
53}
54
55/// Sum per-author rows into a single "Total" row.
56#[must_use]
57pub fn compute_totals(stats: &[Stat]) -> Stat {
58    let mut total = Stat {
59        author: "Total".to_string(),
60        commits: 0,
61        num_files: 0,
62        insertions: 0,
63        deletions: 0,
64        net: 0,
65    };
66    for s in stats {
67        total.commits = total.commits.saturating_add(s.commits);
68        total.num_files = total.num_files.saturating_add(s.num_files);
69        total.insertions = total.insertions.saturating_add(s.insertions);
70        total.deletions = total.deletions.saturating_add(s.deletions);
71    }
72    total.net = net(total.insertions, total.deletions);
73    total
74}
75
76/// Count, per reviewer, the commits they signed off on via Acked-by / Tested-by
77/// / Reviewed-by trailers. A reviewer is credited at most once per commit even
78/// if they appear in several of those trailers. Returned in descending commit
79/// count, ties broken by first-seen order.
80#[must_use]
81pub fn aggregate_reviews<'a>(
82    metas: impl IntoIterator<Item = &'a CommitMeta>,
83    email: bool,
84) -> Vec<Review> {
85    let mut reviews: Vec<Review> = Vec::new();
86    let mut index: HashMap<String, usize> = HashMap::new();
87    for m in metas {
88        let mut credited: Vec<String> = Vec::new();
89        for t in &m.trailers {
90            if !REVIEW_TOKENS
91                .iter()
92                .any(|token| t.token.eq_ignore_ascii_case(token))
93            {
94                continue;
95            }
96            let key = reviewer_key(&t.value, email);
97            if credited.contains(&key) {
98                continue;
99            }
100            credited.push(key.clone());
101            let i = *index.entry(key.clone()).or_insert_with(|| {
102                reviews.push(Review {
103                    author: key.clone(),
104                    commits: 0,
105                });
106                reviews.len() - 1
107            });
108            reviews[i].commits = reviews[i].commits.saturating_add(1);
109        }
110    }
111    reviews.sort_by_key(|r| std::cmp::Reverse(r.commits));
112    reviews
113}
114
115/// The reviewer key from a trailer value: the full `Name <email>` when `email`,
116/// otherwise just the name preceding ` <`.
117fn reviewer_key(value: &str, email: bool) -> String {
118    let value = value.trim();
119    if email {
120        return value.to_string();
121    }
122    match value.split_once(" <") {
123        Some((name, _)) => name.trim().to_string(),
124        None => value.to_string(),
125    }
126}
127
128/// Net line delta, clamped so absurd inputs cannot overflow (panic-free).
129fn net(insertions: u64, deletions: u64) -> i64 {
130    let ins = i64::try_from(insertions).unwrap_or(i64::MAX);
131    let del = i64::try_from(deletions).unwrap_or(i64::MAX);
132    ins - del
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use crate::model::DiffStat;
139    use hegel::generators;
140    use std::collections::{BTreeMap, BTreeSet};
141
142    /// A small author pool forces grouping; the names include multi-word
143    /// entries so grouping is exercised with spaces. `u32`-range diffs keep the
144    /// sums of up to 200 commits comfortably inside `u64` and `i64`, so the
145    /// test's own arithmetic cannot overflow before the code under test runs.
146    #[hegel::composite]
147    fn commit_list(tc: hegel::TestCase) -> Vec<CommitStat> {
148        const NAMES: [&str; 5] = ["Ada Lovelace", "Grace Hopper", "Bob", "Carol Shaw", "Don"];
149        let n = tc.draw(generators::integers::<usize>().max_value(200));
150        let mut commits = Vec::with_capacity(n);
151        for _ in 0..n {
152            let who = tc.draw(generators::integers::<usize>().max_value(NAMES.len() - 1));
153            commits.push(CommitStat {
154                author_key: NAMES[who].to_string(),
155                diff: DiffStat {
156                    insertions: u64::from(tc.draw(generators::integers::<u32>())),
157                    deletions: u64::from(tc.draw(generators::integers::<u32>())),
158                    files: u64::from(tc.draw(generators::integers::<u32>())),
159                },
160            });
161        }
162        commits
163    }
164
165    fn fingerprint(stats: &[Stat]) -> BTreeMap<String, (u64, u64, u64, u64, i64)> {
166        stats
167            .iter()
168            .map(|s| {
169                (
170                    s.author.clone(),
171                    (s.commits, s.num_files, s.insertions, s.deletions, s.net),
172                )
173            })
174            .collect()
175    }
176
177    #[hegel::test]
178    fn totals_match_independent_sums(tc: hegel::TestCase) {
179        let commits = tc.draw(commit_list());
180        let totals = compute_totals(&aggregate(&commits));
181
182        let exp_ins: u64 = commits.iter().map(|c| c.diff.insertions).sum();
183        let exp_del: u64 = commits.iter().map(|c| c.diff.deletions).sum();
184
185        assert_eq!(totals.commits, u64::try_from(commits.len()).unwrap());
186        assert_eq!(totals.insertions, exp_ins);
187        assert_eq!(totals.deletions, exp_del);
188        assert_eq!(totals.num_files, commits.iter().map(|c| c.diff.files).sum());
189        assert_eq!(
190            totals.net,
191            i64::try_from(exp_ins).unwrap() - i64::try_from(exp_del).unwrap()
192        );
193    }
194
195    #[hegel::test]
196    fn per_stat_net_is_insertions_minus_deletions(tc: hegel::TestCase) {
197        let commits = tc.draw(commit_list());
198        for s in aggregate(&commits) {
199            assert_eq!(
200                s.net,
201                i64::try_from(s.insertions).unwrap() - i64::try_from(s.deletions).unwrap()
202            );
203        }
204    }
205
206    #[hegel::test]
207    fn one_row_per_distinct_author(tc: hegel::TestCase) {
208        let commits = tc.draw(commit_list());
209        let distinct: BTreeSet<&str> = commits.iter().map(|c| c.author_key.as_str()).collect();
210        assert_eq!(aggregate(&commits).len(), distinct.len());
211    }
212
213    #[hegel::test]
214    fn aggregation_is_order_independent(tc: hegel::TestCase) {
215        let commits = tc.draw(commit_list());
216        let forward = aggregate(&commits);
217        let mut reversed = commits;
218        reversed.reverse();
219        let backward = aggregate(&reversed);
220        assert_eq!(fingerprint(&forward), fingerprint(&backward));
221    }
222
223    /// `reviewer_key` keeps a full multi-word name (spaces and all). With an
224    /// email present and `email=false` it strips the ` <email>`; with `email=true`
225    /// it keeps the whole value. When the trailer carries only a bare name (no
226    /// ` <email>`) both modes return the name unchanged. The name generator
227    /// produces 1-4 words so the ` <` split is exercised against names that
228    /// themselves contain spaces, and `with_email` exercises both branches.
229    #[hegel::test]
230    fn reviewer_key_handles_multiword_names(tc: hegel::TestCase) {
231        let name = tc.draw(generators::from_regex(r"[A-Za-z]+( [A-Za-z]+){0,3}").fullmatch(true));
232        if tc.draw(generators::booleans()) {
233            let email = tc.draw(generators::from_regex(r"[a-z]+@[a-z]+\.[a-z]+").fullmatch(true));
234            let value = format!("{name} <{email}>");
235            assert_eq!(reviewer_key(&value, false), name);
236            assert_eq!(reviewer_key(&value, true), value);
237        } else {
238            // No email in the trailer: both modes return the name unchanged.
239            assert_eq!(reviewer_key(&name, false), name);
240            assert_eq!(reviewer_key(&name, true), name);
241        }
242    }
243
244    /// Aggregation must stay panic-free even when counts approach `u64::MAX`.
245    /// The saturating sums and the clamped `net` exist precisely for this, so
246    /// the generator draws the full `u64` range (including the boundaries).
247    #[hegel::test]
248    fn aggregate_never_panics_on_boundary_values(tc: hegel::TestCase) {
249        let n = tc.draw(generators::integers::<usize>().max_value(20));
250        let mut commits = Vec::with_capacity(n);
251        for _ in 0..n {
252            commits.push(CommitStat {
253                author_key: "boundary".to_string(),
254                diff: DiffStat {
255                    insertions: tc.draw(generators::integers::<u64>()),
256                    deletions: tc.draw(generators::integers::<u64>()),
257                    files: tc.draw(generators::integers::<u64>()),
258                },
259            });
260        }
261        // Neither call may panic; net is clamped, sums saturate.
262        let _ = compute_totals(&aggregate(&commits));
263    }
264}