1pub mod cache;
25pub mod github;
26pub mod html;
27pub mod identity;
28pub mod model;
29pub mod progress;
30pub mod repo;
31pub mod svg;
32pub mod theme;
33
34use anyhow::{bail, Result};
35use std::io::IsTerminal;
36use std::sync::atomic::{AtomicUsize, Ordering};
37use std::sync::Mutex;
38
39pub use model::{Contributor, RepoMeta};
40
41const READ_THREADS: usize = 8;
43
44#[derive(Clone)]
47pub struct Config {
48 pub branch: Option<String>,
50 pub since: Option<String>,
52 pub until: Option<String>,
54 pub no_merges: bool,
56 pub title: Option<String>,
58 pub exclude: Vec<String>,
60 pub exclude_repos: Vec<String>,
63 pub groups: Vec<model::GroupRule>,
66 pub group_aliases: Vec<(String, Vec<String>)>,
69 pub identities: Vec<Vec<String>>,
71 pub forced_names: Vec<(String, String)>,
76 pub use_github: bool,
78 pub detect_affiliation: bool,
80 pub merge_names: bool,
82 pub count_coauthors: bool,
84 pub embed_avatars: bool,
86 pub avatar_size: u32,
88 pub refresh: bool,
91 pub verbose: bool,
93}
94
95impl Default for Config {
96 fn default() -> Self {
97 Config {
98 branch: None,
99 since: None,
100 until: None,
101 no_merges: false,
102 title: None,
103 exclude: Vec::new(),
104 exclude_repos: Vec::new(),
105 groups: Vec::new(),
106 group_aliases: Vec::new(),
107 identities: Vec::new(),
108 forced_names: Vec::new(),
109 use_github: true,
110 detect_affiliation: true,
111 merge_names: true,
112 count_coauthors: true,
113 embed_avatars: true,
114 avatar_size: 64,
115 refresh: false,
116 verbose: false,
117 }
118 }
119}
120
121pub struct Analysis {
124 pub contributors: Vec<Contributor>,
125 pub meta: RepoMeta,
126}
127
128#[derive(Copy, Clone, PartialEq, Eq)]
130pub enum Sort {
131 First,
133 Last,
135 Commits,
137 Duration,
139 Name,
141}
142
143pub fn sort(rows: &mut [Contributor], key: Sort) {
145 match key {
146 Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
147 Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
148 Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
149 Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
150 Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
151 }
152}
153
154pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
158 analyze_many(std::slice::from_ref(&input), cfg)
159}
160
161fn repo_excluded(slug: &str, excludes: &[String]) -> bool {
166 if excludes.is_empty() {
167 return false;
168 }
169 let slug_l = slug.to_lowercase();
170 let name_l = slug_l.rsplit('/').next().unwrap_or(slug_l.as_str());
171 excludes.iter().any(|e| {
172 let e = e.trim().to_lowercase();
173 !e.is_empty() && (e == slug_l || e == name_l)
174 })
175}
176
177pub fn analyze_many(inputs: &[&str], cfg: &Config) -> Result<Analysis> {
183 macro_rules! log {
184 ($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
185 }
186 if inputs.is_empty() {
187 bail!("no repository sources given");
188 }
189
190 let client = github::GhClient::new(if cfg.use_github {
191 github::find_token()
192 } else {
193 None
194 });
195 let now = chrono::Utc::now().timestamp();
196 let mut caches = cache::Caches::load(cfg.refresh, now);
197
198 let mut sources: Vec<String> = Vec::new();
203 for input in inputs {
204 if repo::looks_like_owner(input) {
205 if !cfg.use_github {
206 bail!("'{input}' looks like an org/user, but listing its repositories needs GitHub access (remove --no-github, or pass owner/repo slugs)");
207 }
208 let (slugs, cached) = match caches.org_repos(input) {
209 Some(repos) => (repos, true),
210 None => {
211 log!("→ listing repositories for '{input}'");
212 let fetched = client.list_owner_repos(input);
213 if !fetched.is_empty() {
214 caches.put_org_repos((*input).to_string(), fetched.clone());
215 }
216 (fetched, false)
217 }
218 };
219 if slugs.is_empty() {
220 if inputs.len() == 1 {
221 bail!("no repositories found for org/user '{input}' (it may not exist or has no non-fork repos)");
222 }
223 log!(" warning: no repositories found for '{input}'");
224 } else {
225 let before = slugs.len();
226 let kept: Vec<String> = slugs
227 .into_iter()
228 .filter(|s| !repo_excluded(s, &cfg.exclude_repos))
229 .collect();
230 let excluded = before - kept.len();
231 log!(
232 " {} repositories{}{}",
233 kept.len(),
234 if cached { " (cached)" } else { "" },
235 if excluded > 0 {
236 format!(", {excluded} excluded")
237 } else {
238 String::new()
239 }
240 );
241 sources.extend(kept);
242 }
243 } else {
244 sources.push((*input).to_string());
245 }
246 }
247 if sources.is_empty() {
248 bail!("no usable repository sources");
249 }
250
251 let multi = sources.len() > 1;
257 let show_bars = cfg.verbose && multi && std::io::stderr().is_terminal();
258 let mut prepared: Vec<repo::PreparedRepo> = Vec::new();
259 let clone_bar = progress::bar("cloning repositories", sources.len(), show_bars);
260 for input in &sources {
261 match repo::prepare(input, cfg.branch.as_deref(), show_bars) {
263 Ok(p) => prepared.push(p),
264 Err(e) if multi => {
265 clone_bar.suspend(|| log!(" warning: skipping source '{input}' ({e})"))
266 }
267 Err(e) => return Err(e),
268 }
269 clone_bar.inc(1);
270 }
271 clone_bar.finish_and_clear();
272 if prepared.is_empty() {
273 bail!("no usable repository sources");
274 }
275 let source_slugs: Vec<Option<String>> = prepared.iter().map(|p| p.slug.clone()).collect();
276 if !show_bars {
279 for p in &prepared {
280 log!("→ source: {} (branch {})", p.display_name, p.branch);
281 }
282 }
283
284 let filter = model::CommitFilter {
285 since: cfg.since.clone(),
286 until: cfg.until.clone(),
287 no_merges: cfg.no_merges,
288 };
289 let branch = cfg.branch.as_deref();
290
291 let outcomes: Vec<Mutex<Option<Result<SourceRead>>>> =
296 (0..prepared.len()).map(|_| Mutex::new(None)).collect();
297 let cursor = AtomicUsize::new(0);
298 let read_bar = progress::bar(
299 "reading history",
300 prepared.len(),
301 show_bars && prepared.len() > 1,
302 );
303 std::thread::scope(|s| {
304 for _ in 0..READ_THREADS.min(prepared.len()) {
305 s.spawn(|| loop {
306 let i = cursor.fetch_add(1, Ordering::Relaxed);
307 let Some(p) = prepared.get(i) else { break };
308 let r = read_source(p, &caches, &filter, branch);
309 *outcomes[i].lock().unwrap() = Some(r);
310 read_bar.inc(1);
311 });
312 }
313 });
314 read_bar.finish_and_clear();
315
316 let mut commits: Vec<model::Commit> = Vec::new();
317 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
318 let mut duplicates = 0u64;
319 let mut cached_sources = 0usize;
320 for (i, (p, slot)) in prepared.iter().zip(outcomes).enumerate() {
321 let read = match slot.into_inner().unwrap() {
322 Some(Ok(r)) => r,
323 Some(Err(e)) if prepared.len() > 1 => {
324 log!(" warning: skipping {} ({e})", p.display_name);
325 continue;
326 }
327 Some(Err(e)) => return Err(e),
328 None => continue,
329 };
330 if read.from_cache {
331 cached_sources += 1;
332 }
333 for mut c in read.commits {
334 if !seen.insert(c.sha.clone()) {
335 duplicates += 1;
336 continue;
337 }
338 c.src = i as u32;
339 commits.push(c);
340 }
341 }
342 if commits.is_empty() {
343 bail!("no commits found");
344 }
345 if cached_sources > 0 {
346 log!(
347 "→ reused cached history for {cached_sources}/{} sources",
348 prepared.len()
349 );
350 }
351 if prepared.len() > 1 {
352 log!(
353 "→ {} commits from {} sources ({} duplicate commits dropped), {} distinct author emails",
354 model::thousands(commits.len() as u64),
355 prepared.len(),
356 model::thousands(duplicates),
357 distinct_emails(&commits)
358 );
359 } else {
360 log!(
361 "→ {} commits from {} distinct author emails",
362 model::thousands(commits.len() as u64),
363 distinct_emails(&commits)
364 );
365 }
366
367 let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
368
369 let any_slug = source_slugs.iter().any(|s| s.is_some());
370 if cfg.use_github {
371 if any_slug {
372 log!("→ enriching from GitHub");
373 github::enrich_clusters(
374 &mut clusters,
375 &commits,
376 &source_slugs,
377 &client,
378 &mut caches,
379 cfg.verbose,
380 );
381 clusters = identity::merge_by_login(clusters);
382 github::fetch_profiles(&mut clusters, &client, &mut caches, cfg.verbose);
383 if !cfg.detect_affiliation {
384 for cl in clusters.iter_mut() {
385 cl.affiliation = None;
386 }
387 }
388 } else {
389 log!("→ no GitHub sources, skipping enrichment");
390 }
391 }
392
393 if !cfg.identities.is_empty() {
394 clusters = identity::apply_identity_file(clusters, &cfg.identities);
395 log!("→ applied {} identity overrides", cfg.identities.len());
396 }
397
398 let mut contributors = identity::build_contributors(
399 &clusters,
400 &commits,
401 &cfg.groups,
402 &cfg.forced_names,
403 cfg.count_coauthors,
404 );
405
406 apply_group_aliases(&mut contributors, &cfg.group_aliases);
410
411 strip_leading_the(&mut contributors);
414
415 let mut manual_groups: std::collections::HashSet<String> = cfg
419 .groups
420 .iter()
421 .map(|r| strip_the(&r.group).to_string())
422 .collect();
423 manual_groups.extend(
424 cfg.group_aliases
425 .iter()
426 .map(|(canon, _)| strip_the(canon).to_string()),
427 );
428 let n_groups = canonicalize_groups(&mut contributors, &manual_groups);
429 if n_groups > 0 {
430 log!("→ {n_groups} distinct affiliations/groups");
431 }
432
433 if !cfg.exclude.is_empty() {
434 contributors.retain(|c| {
435 !cfg.exclude.iter().any(|pat| {
436 let p = pat.to_lowercase();
437 c.name.to_lowercase().contains(&p)
438 || c.login
439 .as_deref()
440 .is_some_and(|l| l.to_lowercase().contains(&p))
441 })
442 });
443 }
444
445 log!(
446 "→ merged to {} contributors ({} bots)",
447 contributors.len(),
448 contributors.iter().filter(|c| c.bot).count()
449 );
450
451 if cfg.embed_avatars && cfg.use_github {
452 github::embed_avatars(
453 &mut contributors,
454 &client,
455 &mut caches,
456 cfg.avatar_size,
457 cfg.verbose,
458 );
459 }
460
461 let single = if prepared.len() == 1 {
464 Some(&prepared[0])
465 } else {
466 None
467 };
468
469 let owner = common_owner(&prepared);
472
473 let owner_avatar = if cfg.use_github && cfg.embed_avatars {
476 owner
477 .as_deref()
478 .and_then(|owner| github::fetch_avatar(&client, &mut caches, owner, 48))
479 } else {
480 None
481 };
482
483 let description = if cfg.use_github {
485 single
486 .and_then(|p| p.slug.as_deref())
487 .and_then(|slug| github::fetch_repo_description(&client, slug))
488 } else {
489 None
490 };
491
492 let default_name = match (single, &owner) {
495 (Some(p), _) => p.display_name.clone(),
496 (None, Some(owner)) => owner.clone(),
497 (None, None) => combined_name(&prepared),
498 };
499 let branch = match single {
500 Some(p) => p.branch.clone(),
501 None => "combined".to_string(),
502 };
503
504 let releases: Vec<model::Release> = if prepared.len() == 1 {
509 repo::read_tags(&prepared[0])
510 } else {
511 prepared
512 .iter()
513 .flat_map(|p| {
514 repo::read_tags(p).into_iter().map(|mut r| {
515 r.name = format!("{} {}", p.display_name, r.name);
516 r
517 })
518 })
519 .collect()
520 };
521 if !releases.is_empty() {
522 log!("→ {} releases", releases.len());
523 }
524
525 let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
526 let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
527 let meta = RepoMeta {
528 name: cfg.title.clone().unwrap_or(default_name),
529 url: single.and_then(|p| p.url.clone()),
530 slug: single.and_then(|p| p.slug.clone()),
531 branch,
532 first,
533 last,
534 total_commits: commits.len() as u64,
535 total_contributors: contributors.iter().filter(|c| !c.bot).count(),
536 generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
537 owner_avatar,
538 description,
539 releases,
540 };
541
542 caches.save();
543 Ok(Analysis { contributors, meta })
544}
545
546struct SourceRead {
547 commits: Vec<model::Commit>,
548 from_cache: bool,
549}
550
551fn read_source(
555 p: &repo::PreparedRepo,
556 caches: &cache::Caches,
557 filter: &model::CommitFilter,
558 branch: Option<&str>,
559) -> Result<SourceRead> {
560 let key = source_cache_key(p);
561 let remote = repo::remote_tip(p);
564 let tip = remote.clone().or_else(|| repo::local_tip(p));
565
566 if let Some(tip) = &tip {
567 if let Some(cached) = caches.commits(&key, tip, filter) {
568 let commits = cached
569 .into_iter()
570 .map(|c| model::Commit {
571 sha: c.sha,
572 ts: c.ts,
573 name: c.name,
574 email: c.email,
575 coauthors: c.coauthors,
576 src: 0,
577 })
578 .collect();
579 return Ok(SourceRead {
580 commits,
581 from_cache: true,
582 });
583 }
584 }
585
586 let local = repo::local_tip(p);
589 if p.is_remote && remote.is_some() && remote != local {
590 repo::fetch(p);
591 }
592 let commits = repo::read_commits(p, branch, filter)?;
593 if let Some(tip) = repo::local_tip(p) {
596 let cached = commits
597 .iter()
598 .map(|c| cache::CachedCommit {
599 sha: c.sha.clone(),
600 ts: c.ts,
601 name: c.name.clone(),
602 email: c.email.clone(),
603 coauthors: c.coauthors.clone(),
604 })
605 .collect();
606 caches.put_commits(&key, &tip, filter, cached);
607 }
608 Ok(SourceRead {
609 commits,
610 from_cache: false,
611 })
612}
613
614fn source_cache_key(p: &repo::PreparedRepo) -> String {
616 let base = p.slug.as_deref().unwrap_or(&p.display_name);
617 repo::sanitize(&format!("{base}__{}", p.branch))
618}
619
620fn combined_name(prepared: &[repo::PreparedRepo]) -> String {
623 let names: Vec<&str> = prepared.iter().map(|p| p.display_name.as_str()).collect();
624 match names.len() {
625 0 => "repositories".to_string(),
626 1..=3 => names.join(" + "),
627 n => format!("{} + {} more", names[..2].join(" + "), n - 2),
628 }
629}
630
631fn common_owner(prepared: &[repo::PreparedRepo]) -> Option<String> {
635 let mut owner: Option<String> = None;
636 for p in prepared {
637 let o = p.slug.as_deref()?.split('/').next()?.to_string();
638 match &owner {
639 Some(prev) if *prev != o => return None,
640 _ => owner = Some(o),
641 }
642 }
643 owner
644}
645
646fn distinct_emails(commits: &[model::Commit]) -> usize {
647 let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
648 e.sort_unstable();
649 e.dedup();
650 e.len()
651}
652
653fn apply_group_aliases(contributors: &mut [Contributor], aliases: &[(String, Vec<String>)]) {
660 if aliases.is_empty() {
661 return;
662 }
663 let mut map: std::collections::HashMap<String, String> = std::collections::HashMap::new();
664 for (canon, variants) in aliases {
665 map.insert(canon.to_lowercase(), canon.clone());
666 for v in variants {
667 map.insert(v.to_lowercase(), canon.clone());
668 }
669 }
670 let canon = |g: &str| map.get(&g.to_lowercase()).cloned();
671 for c in contributors.iter_mut() {
672 if let Some(g) = &c.group {
673 if let Some(cn) = canon(g) {
674 c.group = Some(cn);
675 }
676 }
677 if let Some(mg) = &mut c.month_groups {
678 for slot in mg.iter_mut().flatten() {
679 if let Some(cn) = canon(slot) {
680 *slot = cn;
681 }
682 }
683 }
684 }
685}
686
687fn strip_the(g: &str) -> &str {
690 match g.get(..4) {
691 Some(head) if head.eq_ignore_ascii_case("the ") => {
692 let rest = g[4..].trim_start();
693 if rest.is_empty() {
694 g
695 } else {
696 rest
697 }
698 }
699 _ => g,
700 }
701}
702
703fn strip_leading_the(contributors: &mut [Contributor]) {
706 for c in contributors.iter_mut() {
707 if let Some(g) = &mut c.group {
708 if strip_the(g).len() != g.len() {
709 *g = strip_the(g).to_string();
710 }
711 }
712 if let Some(mg) = &mut c.month_groups {
713 for slot in mg.iter_mut().flatten() {
714 if strip_the(slot).len() != slot.len() {
715 *slot = strip_the(slot).to_string();
716 }
717 }
718 }
719 }
720}
721
722fn canonicalize_groups(
723 contributors: &mut [Contributor],
724 manual: &std::collections::HashSet<String>,
725) -> usize {
726 use std::collections::HashMap;
727 let alnum_key = |g: &str| -> String {
728 let lower = g.to_lowercase();
729 let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
730 trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
731 };
732
733 let mut variants: HashMap<String, usize> = HashMap::new();
734 for c in contributors.iter() {
735 if let Some(g) = &c.group {
736 *variants.entry(g.clone()).or_default() += 1;
737 }
738 }
739
740 let mut keys: Vec<String> = variants
744 .keys()
745 .filter(|g| !manual.contains(*g))
746 .map(|g| alnum_key(g))
747 .collect();
748 keys.sort();
749 keys.dedup();
750 let resolve = |key: &str| -> String {
751 keys.iter()
752 .filter(|k| k.len() >= 6 && key.starts_with(*k))
753 .min_by_key(|k| k.len())
754 .map(|k| k.to_string())
755 .unwrap_or_else(|| key.to_string())
756 };
757 let cluster_of = |g: &str| -> String {
758 if manual.contains(g) {
759 format!("\u{0}{g}")
760 } else {
761 resolve(&alnum_key(g))
762 }
763 };
764
765 let mut best: HashMap<String, (&String, usize)> = HashMap::new();
766 for (g, n) in &variants {
767 let cluster = cluster_of(g);
768 let score = |g: &str, n: usize| {
769 n * 4
770 + usize::from(g.contains(' ')) * 2
771 + usize::from(g.chars().any(|c| c.is_uppercase()))
772 };
773 let entry = best.entry(cluster).or_insert((g, *n));
774 if score(g, *n) > score(entry.0, entry.1) {
775 *entry = (g, *n);
776 }
777 }
778
779 let display: HashMap<String, String> = best
780 .iter()
781 .map(|(k, (g, _))| (k.clone(), (*g).clone()))
782 .collect();
783 for c in contributors.iter_mut() {
784 if let Some(g) = &c.group {
785 c.group = display.get(&cluster_of(g)).cloned().or(c.group.clone());
786 }
787 }
788 display.len()
789}