1pub mod github;
25pub mod html;
26pub mod identity;
27pub mod model;
28pub mod repo;
29pub mod svg;
30
31use anyhow::{bail, Result};
32
33pub use model::{Contributor, RepoMeta};
34
35#[derive(Clone)]
38pub struct Config {
39 pub branch: Option<String>,
41 pub since: Option<String>,
43 pub until: Option<String>,
45 pub no_merges: bool,
47 pub title: Option<String>,
49 pub exclude: Vec<String>,
51 pub groups: Vec<(String, String)>,
53 pub identities: Vec<Vec<String>>,
55 pub use_github: bool,
57 pub detect_affiliation: bool,
59 pub merge_names: bool,
61 pub embed_avatars: bool,
63 pub avatar_size: u32,
65 pub verbose: bool,
67}
68
69impl Default for Config {
70 fn default() -> Self {
71 Config {
72 branch: None,
73 since: None,
74 until: None,
75 no_merges: false,
76 title: None,
77 exclude: Vec::new(),
78 groups: Vec::new(),
79 identities: Vec::new(),
80 use_github: true,
81 detect_affiliation: true,
82 merge_names: true,
83 embed_avatars: true,
84 avatar_size: 64,
85 verbose: false,
86 }
87 }
88}
89
90pub struct Analysis {
93 pub contributors: Vec<Contributor>,
94 pub meta: RepoMeta,
95}
96
97#[derive(Copy, Clone, PartialEq, Eq)]
99pub enum Sort {
100 First,
102 Last,
104 Commits,
106 Duration,
108 Name,
110}
111
112pub fn sort(rows: &mut [Contributor], key: Sort) {
114 match key {
115 Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
116 Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
117 Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
118 Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
119 Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
120 }
121}
122
123pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
126 macro_rules! log {
127 ($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
128 }
129
130 let prepared = repo::prepare(input, cfg.branch.as_deref())?;
131 log!(
132 "→ repository: {} (branch {})",
133 prepared.display_name,
134 prepared.branch
135 );
136
137 let commits = repo::read_commits(
138 &prepared,
139 cfg.branch.as_deref(),
140 cfg.since.as_deref(),
141 cfg.until.as_deref(),
142 cfg.no_merges,
143 )?;
144 if commits.is_empty() {
145 bail!("no commits found");
146 }
147 log!(
148 "→ {} commits from {} distinct author emails",
149 model::thousands(commits.len() as u64),
150 distinct_emails(&commits)
151 );
152
153 let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
154
155 let client = github::GhClient::new(if cfg.use_github {
156 github::find_token()
157 } else {
158 None
159 });
160 if cfg.use_github {
161 if let Some(slug) = &prepared.slug {
162 log!("→ enriching from GitHub ({slug})");
163 github::enrich_clusters(&mut clusters, &commits, slug, &client, cfg.verbose);
164 clusters = identity::merge_by_login(clusters);
165 github::fetch_profiles(&mut clusters, &client, cfg.verbose);
166 if !cfg.detect_affiliation {
167 for cl in clusters.iter_mut() {
168 cl.affiliation = None;
169 }
170 }
171 } else {
172 log!("→ not a GitHub repo, skipping enrichment");
173 }
174 }
175
176 if !cfg.identities.is_empty() {
177 clusters = identity::apply_identity_file(clusters, &cfg.identities);
178 log!("→ applied {} identity overrides", cfg.identities.len());
179 }
180
181 let mut contributors = identity::build_contributors(&clusters, &commits, &cfg.groups);
182
183 let n_groups = canonicalize_groups(&mut contributors);
184 if n_groups > 0 {
185 log!("→ {n_groups} distinct affiliations/groups");
186 }
187
188 if !cfg.exclude.is_empty() {
189 contributors.retain(|c| {
190 !cfg.exclude.iter().any(|pat| {
191 let p = pat.to_lowercase();
192 c.name.to_lowercase().contains(&p)
193 || c.login
194 .as_deref()
195 .is_some_and(|l| l.to_lowercase().contains(&p))
196 })
197 });
198 }
199
200 log!(
201 "→ merged to {} contributors ({} bots)",
202 contributors.len(),
203 contributors.iter().filter(|c| c.bot).count()
204 );
205
206 if cfg.embed_avatars && cfg.use_github {
207 github::embed_avatars(&mut contributors, &client, cfg.avatar_size, cfg.verbose);
208 }
209
210 let owner_avatar = if cfg.use_github && cfg.embed_avatars {
212 prepared
213 .slug
214 .as_deref()
215 .and_then(|s| s.split('/').next())
216 .and_then(|owner| github::fetch_avatar(&client, owner, 48))
217 } else {
218 None
219 };
220
221 let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
222 let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
223 let meta = RepoMeta {
224 name: cfg
225 .title
226 .clone()
227 .unwrap_or_else(|| prepared.display_name.clone()),
228 url: prepared.url.clone(),
229 slug: prepared.slug.clone(),
230 branch: prepared.branch.clone(),
231 first,
232 last,
233 total_commits: commits.len() as u64,
234 total_contributors: contributors.iter().filter(|c| !c.bot).count(),
235 generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
236 owner_avatar,
237 };
238
239 Ok(Analysis { contributors, meta })
240}
241
242fn distinct_emails(commits: &[model::Commit]) -> usize {
243 let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
244 e.sort_unstable();
245 e.dedup();
246 e.len()
247}
248
249fn canonicalize_groups(contributors: &mut [Contributor]) -> usize {
254 use std::collections::HashMap;
255 let alnum_key = |g: &str| -> String {
256 let lower = g.to_lowercase();
257 let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
258 trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
259 };
260
261 let mut variants: HashMap<String, usize> = HashMap::new();
262 for c in contributors.iter() {
263 if let Some(g) = &c.group {
264 *variants.entry(g.clone()).or_default() += 1;
265 }
266 }
267
268 let mut keys: Vec<String> = variants.keys().map(|g| alnum_key(g)).collect();
269 keys.sort();
270 keys.dedup();
271 let resolve = |key: &str| -> String {
272 keys.iter()
273 .filter(|k| k.len() >= 6 && key.starts_with(*k))
274 .min_by_key(|k| k.len())
275 .map(|k| k.to_string())
276 .unwrap_or_else(|| key.to_string())
277 };
278
279 let mut best: HashMap<String, (&String, usize)> = HashMap::new();
280 for (g, n) in &variants {
281 let cluster = resolve(&alnum_key(g));
282 let score = |g: &str, n: usize| {
283 n * 4
284 + usize::from(g.contains(' ')) * 2
285 + usize::from(g.chars().any(|c| c.is_uppercase()))
286 };
287 let entry = best.entry(cluster).or_insert((g, *n));
288 if score(g, *n) > score(entry.0, entry.1) {
289 *entry = (g, *n);
290 }
291 }
292
293 let display: HashMap<String, String> = best
294 .iter()
295 .map(|(k, (g, _))| (k.clone(), (*g).clone()))
296 .collect();
297 for c in contributors.iter_mut() {
298 if let Some(g) = &c.group {
299 c.group = display
300 .get(&resolve(&alnum_key(g)))
301 .cloned()
302 .or(c.group.clone());
303 }
304 }
305 display.len()
306}