Skip to main content

git_stats/logic/
filter.rs

1use regex::Regex;
2
3use crate::model::CommitMeta;
4
5/// Compile author regex patterns, surfacing a clear error on a bad pattern.
6///
7/// # Errors
8///
9/// Returns the underlying [`regex::Error`] if any pattern is not a valid regex.
10pub fn compile_authors(patterns: &[String]) -> Result<Vec<Regex>, regex::Error> {
11    patterns.iter().map(|p| Regex::new(p)).collect()
12}
13
14/// Indices of the commits to keep after author and date filtering.
15///
16/// A commit is kept when it matches at least one author pattern (or there are
17/// none) and its committer time lies within `[since, until]` (each bound
18/// optional). Mirrors `git log --author/--since/--until`.
19pub fn keep_indices<'a>(
20    metas: impl IntoIterator<Item = &'a CommitMeta>,
21    authors: &[Regex],
22    since: Option<i64>,
23    until: Option<i64>,
24) -> Vec<usize> {
25    metas
26        .into_iter()
27        .enumerate()
28        .filter(|(_, m)| {
29            let author_ok = authors.is_empty() || {
30                let ident = format!("{} <{}>", m.author.name, m.author.email);
31                authors.iter().any(|re| re.is_match(&ident))
32            };
33            author_ok
34                && since.is_none_or(|s| m.time_seconds >= s)
35                && until.is_none_or(|u| m.time_seconds <= u)
36        })
37        .map(|(i, _)| i)
38        .collect()
39}
40
41#[cfg(test)]
42mod tests {
43    use super::*;
44    use crate::model::Author;
45    use hegel::generators;
46
47    #[hegel::composite]
48    fn commit_metas(tc: hegel::TestCase) -> Vec<CommitMeta> {
49        let n = tc.draw(generators::integers::<usize>().max_value(100));
50        let mut metas = Vec::with_capacity(n);
51        for _ in 0..n {
52            // Multi-word names exercise filtering against idents that contain
53            // spaces; some still begin with "a" so the `^a` test keeps a mix.
54            let name = tc.draw(generators::sampled_from(vec![
55                "ada lovelace",
56                "amy pond",
57                "bob ross",
58                "carol kane",
59            ]));
60            metas.push(CommitMeta {
61                author: Author {
62                    name: name.to_string(),
63                    email: format!("{name}@example.com"),
64                },
65                time_seconds: tc.draw(generators::integers::<i64>()),
66                trailers: Vec::new(),
67            });
68        }
69        metas
70    }
71
72    #[hegel::test]
73    fn no_filters_keeps_everything(tc: hegel::TestCase) {
74        let metas = tc.draw(commit_metas());
75        let kept = keep_indices(metas.iter(), &[], None, None);
76        assert_eq!(kept, (0..metas.len()).collect::<Vec<_>>());
77    }
78
79    #[hegel::test]
80    fn kept_indices_are_sorted_and_in_range(tc: hegel::TestCase) {
81        let metas = tc.draw(commit_metas());
82        let since = tc.draw(generators::optional(generators::integers::<i64>()));
83        let until = tc.draw(generators::optional(generators::integers::<i64>()));
84        let kept = keep_indices(metas.iter(), &[], since, until);
85        assert!(kept.iter().all(|&i| i < metas.len()));
86        assert!(kept.windows(2).all(|w| w[0] < w[1]));
87    }
88
89    /// A commit is kept exactly when its committer time lies in `[since, until]`.
90    #[hegel::test]
91    fn date_window_membership(tc: hegel::TestCase) {
92        let metas = tc.draw(commit_metas());
93        let since = tc.draw(generators::optional(generators::integers::<i64>()));
94        let until = tc.draw(generators::optional(generators::integers::<i64>()));
95        let kept = keep_indices(metas.iter(), &[], since, until);
96        for (i, m) in metas.iter().enumerate() {
97            let want = since.is_none_or(|s| m.time_seconds >= s)
98                && until.is_none_or(|u| m.time_seconds <= u);
99            assert_eq!(kept.contains(&i), want);
100        }
101    }
102
103    /// With one author pattern, kept commits are exactly those whose ident matches.
104    #[hegel::test]
105    fn author_filter_matches_ident(tc: hegel::TestCase) {
106        let metas = tc.draw(commit_metas());
107        let patterns = vec![Regex::new("^a").unwrap()];
108        let kept = keep_indices(metas.iter(), &patterns, None, None);
109        for (i, m) in metas.iter().enumerate() {
110            let ident = format!("{} <{}>", m.author.name, m.author.email);
111            assert_eq!(kept.contains(&i), patterns[0].is_match(&ident));
112        }
113    }
114}