1pub mod cache;
25pub mod github;
26pub mod html;
27pub mod identity;
28pub mod model;
29pub mod repo;
30pub mod svg;
31pub mod theme;
32
33use anyhow::{bail, Result};
34use std::sync::atomic::{AtomicUsize, Ordering};
35use std::sync::Mutex;
36
37pub use model::{Contributor, RepoMeta};
38
39const READ_THREADS: usize = 8;
41
42#[derive(Clone)]
45pub struct Config {
46 pub branch: Option<String>,
48 pub since: Option<String>,
50 pub until: Option<String>,
52 pub no_merges: bool,
54 pub title: Option<String>,
56 pub exclude: Vec<String>,
58 pub groups: Vec<model::GroupRule>,
61 pub group_aliases: Vec<(String, Vec<String>)>,
64 pub identities: Vec<Vec<String>>,
66 pub use_github: bool,
68 pub detect_affiliation: bool,
70 pub merge_names: bool,
72 pub count_coauthors: bool,
74 pub embed_avatars: bool,
76 pub avatar_size: u32,
78 pub refresh: bool,
81 pub verbose: bool,
83}
84
85impl Default for Config {
86 fn default() -> Self {
87 Config {
88 branch: None,
89 since: None,
90 until: None,
91 no_merges: false,
92 title: None,
93 exclude: Vec::new(),
94 groups: Vec::new(),
95 group_aliases: Vec::new(),
96 identities: Vec::new(),
97 use_github: true,
98 detect_affiliation: true,
99 merge_names: true,
100 count_coauthors: true,
101 embed_avatars: true,
102 avatar_size: 64,
103 refresh: false,
104 verbose: false,
105 }
106 }
107}
108
109pub struct Analysis {
112 pub contributors: Vec<Contributor>,
113 pub meta: RepoMeta,
114}
115
116#[derive(Copy, Clone, PartialEq, Eq)]
118pub enum Sort {
119 First,
121 Last,
123 Commits,
125 Duration,
127 Name,
129}
130
131pub fn sort(rows: &mut [Contributor], key: Sort) {
133 match key {
134 Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
135 Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
136 Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
137 Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
138 Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
139 }
140}
141
142pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
146 analyze_many(std::slice::from_ref(&input), cfg)
147}
148
149pub fn analyze_many(inputs: &[&str], cfg: &Config) -> Result<Analysis> {
155 macro_rules! log {
156 ($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
157 }
158 if inputs.is_empty() {
159 bail!("no repository sources given");
160 }
161
162 let client = github::GhClient::new(if cfg.use_github {
163 github::find_token()
164 } else {
165 None
166 });
167 let now = chrono::Utc::now().timestamp();
168 let mut caches = cache::Caches::load(cfg.refresh, now);
169
170 let mut sources: Vec<String> = Vec::new();
175 for input in inputs {
176 if repo::looks_like_owner(input) {
177 if !cfg.use_github {
178 bail!("'{input}' looks like an org/user, but listing its repositories needs GitHub access (remove --no-github, or pass owner/repo slugs)");
179 }
180 let (slugs, cached) = match caches.org_repos(input) {
181 Some(repos) => (repos, true),
182 None => {
183 log!("→ listing repositories for '{input}'");
184 let fetched = client.list_owner_repos(input);
185 if !fetched.is_empty() {
186 caches.put_org_repos((*input).to_string(), fetched.clone());
187 }
188 (fetched, false)
189 }
190 };
191 if slugs.is_empty() {
192 if inputs.len() == 1 {
193 bail!("no repositories found for org/user '{input}' (it may not exist or has no non-fork repos)");
194 }
195 log!(" warning: no repositories found for '{input}'");
196 } else {
197 log!(
198 " {} repositories{}",
199 slugs.len(),
200 if cached { " (cached)" } else { "" }
201 );
202 sources.extend(slugs);
203 }
204 } else {
205 sources.push((*input).to_string());
206 }
207 }
208 if sources.is_empty() {
209 bail!("no usable repository sources");
210 }
211
212 let mut prepared: Vec<repo::PreparedRepo> = Vec::new();
216 for input in &sources {
217 match repo::prepare(input, cfg.branch.as_deref()) {
218 Ok(p) => prepared.push(p),
219 Err(e) if sources.len() > 1 => log!(" warning: skipping source '{input}' ({e})"),
220 Err(e) => return Err(e),
221 }
222 }
223 if prepared.is_empty() {
224 bail!("no usable repository sources");
225 }
226 let source_slugs: Vec<Option<String>> = prepared.iter().map(|p| p.slug.clone()).collect();
227 for p in &prepared {
228 log!("→ source: {} (branch {})", p.display_name, p.branch);
229 }
230
231 let filter = model::CommitFilter {
232 since: cfg.since.clone(),
233 until: cfg.until.clone(),
234 no_merges: cfg.no_merges,
235 };
236 let branch = cfg.branch.as_deref();
237
238 let outcomes: Vec<Mutex<Option<Result<SourceRead>>>> =
243 (0..prepared.len()).map(|_| Mutex::new(None)).collect();
244 let cursor = AtomicUsize::new(0);
245 std::thread::scope(|s| {
246 for _ in 0..READ_THREADS.min(prepared.len()) {
247 s.spawn(|| loop {
248 let i = cursor.fetch_add(1, Ordering::Relaxed);
249 let Some(p) = prepared.get(i) else { break };
250 let r = read_source(p, &caches, &filter, branch);
251 *outcomes[i].lock().unwrap() = Some(r);
252 });
253 }
254 });
255
256 let mut commits: Vec<model::Commit> = Vec::new();
257 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
258 let mut duplicates = 0u64;
259 let mut cached_sources = 0usize;
260 for (i, (p, slot)) in prepared.iter().zip(outcomes).enumerate() {
261 let read = match slot.into_inner().unwrap() {
262 Some(Ok(r)) => r,
263 Some(Err(e)) if prepared.len() > 1 => {
264 log!(" warning: skipping {} ({e})", p.display_name);
265 continue;
266 }
267 Some(Err(e)) => return Err(e),
268 None => continue,
269 };
270 if read.from_cache {
271 cached_sources += 1;
272 }
273 for mut c in read.commits {
274 if !seen.insert(c.sha.clone()) {
275 duplicates += 1;
276 continue;
277 }
278 c.src = i as u32;
279 commits.push(c);
280 }
281 }
282 if commits.is_empty() {
283 bail!("no commits found");
284 }
285 if cached_sources > 0 {
286 log!(
287 "→ reused cached history for {cached_sources}/{} sources",
288 prepared.len()
289 );
290 }
291 if prepared.len() > 1 {
292 log!(
293 "→ {} commits from {} sources ({} duplicate commits dropped), {} distinct author emails",
294 model::thousands(commits.len() as u64),
295 prepared.len(),
296 model::thousands(duplicates),
297 distinct_emails(&commits)
298 );
299 } else {
300 log!(
301 "→ {} commits from {} distinct author emails",
302 model::thousands(commits.len() as u64),
303 distinct_emails(&commits)
304 );
305 }
306
307 let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
308
309 let any_slug = source_slugs.iter().any(|s| s.is_some());
310 if cfg.use_github {
311 if any_slug {
312 log!("→ enriching from GitHub");
313 github::enrich_clusters(
314 &mut clusters,
315 &commits,
316 &source_slugs,
317 &client,
318 &mut caches,
319 cfg.verbose,
320 );
321 clusters = identity::merge_by_login(clusters);
322 github::fetch_profiles(&mut clusters, &client, &mut caches, cfg.verbose);
323 if !cfg.detect_affiliation {
324 for cl in clusters.iter_mut() {
325 cl.affiliation = None;
326 }
327 }
328 } else {
329 log!("→ no GitHub sources, skipping enrichment");
330 }
331 }
332
333 if !cfg.identities.is_empty() {
334 clusters = identity::apply_identity_file(clusters, &cfg.identities);
335 log!("→ applied {} identity overrides", cfg.identities.len());
336 }
337
338 let mut contributors =
339 identity::build_contributors(&clusters, &commits, &cfg.groups, cfg.count_coauthors);
340
341 apply_group_aliases(&mut contributors, &cfg.group_aliases);
345
346 let mut manual_groups: std::collections::HashSet<String> =
350 cfg.groups.iter().map(|r| r.group.clone()).collect();
351 manual_groups.extend(cfg.group_aliases.iter().map(|(canon, _)| canon.clone()));
352 let n_groups = canonicalize_groups(&mut contributors, &manual_groups);
353 if n_groups > 0 {
354 log!("→ {n_groups} distinct affiliations/groups");
355 }
356
357 if !cfg.exclude.is_empty() {
358 contributors.retain(|c| {
359 !cfg.exclude.iter().any(|pat| {
360 let p = pat.to_lowercase();
361 c.name.to_lowercase().contains(&p)
362 || c.login
363 .as_deref()
364 .is_some_and(|l| l.to_lowercase().contains(&p))
365 })
366 });
367 }
368
369 log!(
370 "→ merged to {} contributors ({} bots)",
371 contributors.len(),
372 contributors.iter().filter(|c| c.bot).count()
373 );
374
375 if cfg.embed_avatars && cfg.use_github {
376 github::embed_avatars(
377 &mut contributors,
378 &client,
379 &mut caches,
380 cfg.avatar_size,
381 cfg.verbose,
382 );
383 }
384
385 let single = if prepared.len() == 1 {
388 Some(&prepared[0])
389 } else {
390 None
391 };
392
393 let owner = common_owner(&prepared);
396
397 let owner_avatar = if cfg.use_github && cfg.embed_avatars {
400 owner
401 .as_deref()
402 .and_then(|owner| github::fetch_avatar(&client, &mut caches, owner, 48))
403 } else {
404 None
405 };
406
407 let description = if cfg.use_github {
409 single
410 .and_then(|p| p.slug.as_deref())
411 .and_then(|slug| github::fetch_repo_description(&client, slug))
412 } else {
413 None
414 };
415
416 let default_name = match (single, &owner) {
419 (Some(p), _) => p.display_name.clone(),
420 (None, Some(owner)) => owner.clone(),
421 (None, None) => combined_name(&prepared),
422 };
423 let branch = match single {
424 Some(p) => p.branch.clone(),
425 None => "combined".to_string(),
426 };
427
428 let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
429 let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
430 let meta = RepoMeta {
431 name: cfg.title.clone().unwrap_or(default_name),
432 url: single.and_then(|p| p.url.clone()),
433 slug: single.and_then(|p| p.slug.clone()),
434 branch,
435 first,
436 last,
437 total_commits: commits.len() as u64,
438 total_contributors: contributors.iter().filter(|c| !c.bot).count(),
439 generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
440 owner_avatar,
441 description,
442 };
443
444 caches.save();
445 Ok(Analysis { contributors, meta })
446}
447
448struct SourceRead {
449 commits: Vec<model::Commit>,
450 from_cache: bool,
451}
452
453fn read_source(
457 p: &repo::PreparedRepo,
458 caches: &cache::Caches,
459 filter: &model::CommitFilter,
460 branch: Option<&str>,
461) -> Result<SourceRead> {
462 let key = source_cache_key(p);
463 let remote = repo::remote_tip(p);
466 let tip = remote.clone().or_else(|| repo::local_tip(p));
467
468 if let Some(tip) = &tip {
469 if let Some(cached) = caches.commits(&key, tip, filter) {
470 let commits = cached
471 .into_iter()
472 .map(|c| model::Commit {
473 sha: c.sha,
474 ts: c.ts,
475 name: c.name,
476 email: c.email,
477 coauthors: c.coauthors,
478 src: 0,
479 })
480 .collect();
481 return Ok(SourceRead {
482 commits,
483 from_cache: true,
484 });
485 }
486 }
487
488 let local = repo::local_tip(p);
491 if p.is_remote && remote.is_some() && remote != local {
492 repo::fetch(p);
493 }
494 let commits = repo::read_commits(p, branch, filter)?;
495 if let Some(tip) = repo::local_tip(p) {
498 let cached = commits
499 .iter()
500 .map(|c| cache::CachedCommit {
501 sha: c.sha.clone(),
502 ts: c.ts,
503 name: c.name.clone(),
504 email: c.email.clone(),
505 coauthors: c.coauthors.clone(),
506 })
507 .collect();
508 caches.put_commits(&key, &tip, filter, cached);
509 }
510 Ok(SourceRead {
511 commits,
512 from_cache: false,
513 })
514}
515
516fn source_cache_key(p: &repo::PreparedRepo) -> String {
518 let base = p.slug.as_deref().unwrap_or(&p.display_name);
519 repo::sanitize(&format!("{base}__{}", p.branch))
520}
521
522fn combined_name(prepared: &[repo::PreparedRepo]) -> String {
525 let names: Vec<&str> = prepared.iter().map(|p| p.display_name.as_str()).collect();
526 match names.len() {
527 0 => "repositories".to_string(),
528 1..=3 => names.join(" + "),
529 n => format!("{} + {} more", names[..2].join(" + "), n - 2),
530 }
531}
532
533fn common_owner(prepared: &[repo::PreparedRepo]) -> Option<String> {
537 let mut owner: Option<String> = None;
538 for p in prepared {
539 let o = p.slug.as_deref()?.split('/').next()?.to_string();
540 match &owner {
541 Some(prev) if *prev != o => return None,
542 _ => owner = Some(o),
543 }
544 }
545 owner
546}
547
548fn distinct_emails(commits: &[model::Commit]) -> usize {
549 let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
550 e.sort_unstable();
551 e.dedup();
552 e.len()
553}
554
555fn apply_group_aliases(contributors: &mut [Contributor], aliases: &[(String, Vec<String>)]) {
562 if aliases.is_empty() {
563 return;
564 }
565 let mut map: std::collections::HashMap<String, String> = std::collections::HashMap::new();
566 for (canon, variants) in aliases {
567 map.insert(canon.to_lowercase(), canon.clone());
568 for v in variants {
569 map.insert(v.to_lowercase(), canon.clone());
570 }
571 }
572 let canon = |g: &str| map.get(&g.to_lowercase()).cloned();
573 for c in contributors.iter_mut() {
574 if let Some(g) = &c.group {
575 if let Some(cn) = canon(g) {
576 c.group = Some(cn);
577 }
578 }
579 if let Some(mg) = &mut c.month_groups {
580 for slot in mg.iter_mut().flatten() {
581 if let Some(cn) = canon(slot) {
582 *slot = cn;
583 }
584 }
585 }
586 }
587}
588
589fn canonicalize_groups(
590 contributors: &mut [Contributor],
591 manual: &std::collections::HashSet<String>,
592) -> usize {
593 use std::collections::HashMap;
594 let alnum_key = |g: &str| -> String {
595 let lower = g.to_lowercase();
596 let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
597 trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
598 };
599
600 let mut variants: HashMap<String, usize> = HashMap::new();
601 for c in contributors.iter() {
602 if let Some(g) = &c.group {
603 *variants.entry(g.clone()).or_default() += 1;
604 }
605 }
606
607 let mut keys: Vec<String> = variants
611 .keys()
612 .filter(|g| !manual.contains(*g))
613 .map(|g| alnum_key(g))
614 .collect();
615 keys.sort();
616 keys.dedup();
617 let resolve = |key: &str| -> String {
618 keys.iter()
619 .filter(|k| k.len() >= 6 && key.starts_with(*k))
620 .min_by_key(|k| k.len())
621 .map(|k| k.to_string())
622 .unwrap_or_else(|| key.to_string())
623 };
624 let cluster_of = |g: &str| -> String {
625 if manual.contains(g) {
626 format!("\u{0}{g}")
627 } else {
628 resolve(&alnum_key(g))
629 }
630 };
631
632 let mut best: HashMap<String, (&String, usize)> = HashMap::new();
633 for (g, n) in &variants {
634 let cluster = cluster_of(g);
635 let score = |g: &str, n: usize| {
636 n * 4
637 + usize::from(g.contains(' ')) * 2
638 + usize::from(g.chars().any(|c| c.is_uppercase()))
639 };
640 let entry = best.entry(cluster).or_insert((g, *n));
641 if score(g, *n) > score(entry.0, entry.1) {
642 *entry = (g, *n);
643 }
644 }
645
646 let display: HashMap<String, String> = best
647 .iter()
648 .map(|(k, (g, _))| (k.clone(), (*g).clone()))
649 .collect();
650 for c in contributors.iter_mut() {
651 if let Some(g) = &c.group {
652 c.group = display.get(&cluster_of(g)).cloned().or(c.group.clone());
653 }
654 }
655 display.len()
656}