1pub mod cache;
25pub mod github;
26pub mod html;
27pub mod identity;
28pub mod model;
29pub mod repo;
30pub mod svg;
31pub mod theme;
32
33use anyhow::{bail, Result};
34use std::sync::atomic::{AtomicUsize, Ordering};
35use std::sync::Mutex;
36
37pub use model::{Contributor, RepoMeta};
38
39const READ_THREADS: usize = 8;
41
42#[derive(Clone)]
45pub struct Config {
46 pub branch: Option<String>,
48 pub since: Option<String>,
50 pub until: Option<String>,
52 pub no_merges: bool,
54 pub title: Option<String>,
56 pub exclude: Vec<String>,
58 pub groups: Vec<model::GroupRule>,
61 pub group_aliases: Vec<(String, Vec<String>)>,
64 pub identities: Vec<Vec<String>>,
66 pub forced_names: Vec<(String, String)>,
71 pub use_github: bool,
73 pub detect_affiliation: bool,
75 pub merge_names: bool,
77 pub count_coauthors: bool,
79 pub embed_avatars: bool,
81 pub avatar_size: u32,
83 pub refresh: bool,
86 pub verbose: bool,
88}
89
90impl Default for Config {
91 fn default() -> Self {
92 Config {
93 branch: None,
94 since: None,
95 until: None,
96 no_merges: false,
97 title: None,
98 exclude: Vec::new(),
99 groups: Vec::new(),
100 group_aliases: Vec::new(),
101 identities: Vec::new(),
102 forced_names: Vec::new(),
103 use_github: true,
104 detect_affiliation: true,
105 merge_names: true,
106 count_coauthors: true,
107 embed_avatars: true,
108 avatar_size: 64,
109 refresh: false,
110 verbose: false,
111 }
112 }
113}
114
115pub struct Analysis {
118 pub contributors: Vec<Contributor>,
119 pub meta: RepoMeta,
120}
121
122#[derive(Copy, Clone, PartialEq, Eq)]
124pub enum Sort {
125 First,
127 Last,
129 Commits,
131 Duration,
133 Name,
135}
136
137pub fn sort(rows: &mut [Contributor], key: Sort) {
139 match key {
140 Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
141 Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
142 Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
143 Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
144 Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
145 }
146}
147
148pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
152 analyze_many(std::slice::from_ref(&input), cfg)
153}
154
155pub fn analyze_many(inputs: &[&str], cfg: &Config) -> Result<Analysis> {
161 macro_rules! log {
162 ($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
163 }
164 if inputs.is_empty() {
165 bail!("no repository sources given");
166 }
167
168 let client = github::GhClient::new(if cfg.use_github {
169 github::find_token()
170 } else {
171 None
172 });
173 let now = chrono::Utc::now().timestamp();
174 let mut caches = cache::Caches::load(cfg.refresh, now);
175
176 let mut sources: Vec<String> = Vec::new();
181 for input in inputs {
182 if repo::looks_like_owner(input) {
183 if !cfg.use_github {
184 bail!("'{input}' looks like an org/user, but listing its repositories needs GitHub access (remove --no-github, or pass owner/repo slugs)");
185 }
186 let (slugs, cached) = match caches.org_repos(input) {
187 Some(repos) => (repos, true),
188 None => {
189 log!("→ listing repositories for '{input}'");
190 let fetched = client.list_owner_repos(input);
191 if !fetched.is_empty() {
192 caches.put_org_repos((*input).to_string(), fetched.clone());
193 }
194 (fetched, false)
195 }
196 };
197 if slugs.is_empty() {
198 if inputs.len() == 1 {
199 bail!("no repositories found for org/user '{input}' (it may not exist or has no non-fork repos)");
200 }
201 log!(" warning: no repositories found for '{input}'");
202 } else {
203 log!(
204 " {} repositories{}",
205 slugs.len(),
206 if cached { " (cached)" } else { "" }
207 );
208 sources.extend(slugs);
209 }
210 } else {
211 sources.push((*input).to_string());
212 }
213 }
214 if sources.is_empty() {
215 bail!("no usable repository sources");
216 }
217
218 let mut prepared: Vec<repo::PreparedRepo> = Vec::new();
222 for input in &sources {
223 match repo::prepare(input, cfg.branch.as_deref()) {
224 Ok(p) => prepared.push(p),
225 Err(e) if sources.len() > 1 => log!(" warning: skipping source '{input}' ({e})"),
226 Err(e) => return Err(e),
227 }
228 }
229 if prepared.is_empty() {
230 bail!("no usable repository sources");
231 }
232 let source_slugs: Vec<Option<String>> = prepared.iter().map(|p| p.slug.clone()).collect();
233 for p in &prepared {
234 log!("→ source: {} (branch {})", p.display_name, p.branch);
235 }
236
237 let filter = model::CommitFilter {
238 since: cfg.since.clone(),
239 until: cfg.until.clone(),
240 no_merges: cfg.no_merges,
241 };
242 let branch = cfg.branch.as_deref();
243
244 let outcomes: Vec<Mutex<Option<Result<SourceRead>>>> =
249 (0..prepared.len()).map(|_| Mutex::new(None)).collect();
250 let cursor = AtomicUsize::new(0);
251 std::thread::scope(|s| {
252 for _ in 0..READ_THREADS.min(prepared.len()) {
253 s.spawn(|| loop {
254 let i = cursor.fetch_add(1, Ordering::Relaxed);
255 let Some(p) = prepared.get(i) else { break };
256 let r = read_source(p, &caches, &filter, branch);
257 *outcomes[i].lock().unwrap() = Some(r);
258 });
259 }
260 });
261
262 let mut commits: Vec<model::Commit> = Vec::new();
263 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
264 let mut duplicates = 0u64;
265 let mut cached_sources = 0usize;
266 for (i, (p, slot)) in prepared.iter().zip(outcomes).enumerate() {
267 let read = match slot.into_inner().unwrap() {
268 Some(Ok(r)) => r,
269 Some(Err(e)) if prepared.len() > 1 => {
270 log!(" warning: skipping {} ({e})", p.display_name);
271 continue;
272 }
273 Some(Err(e)) => return Err(e),
274 None => continue,
275 };
276 if read.from_cache {
277 cached_sources += 1;
278 }
279 for mut c in read.commits {
280 if !seen.insert(c.sha.clone()) {
281 duplicates += 1;
282 continue;
283 }
284 c.src = i as u32;
285 commits.push(c);
286 }
287 }
288 if commits.is_empty() {
289 bail!("no commits found");
290 }
291 if cached_sources > 0 {
292 log!(
293 "→ reused cached history for {cached_sources}/{} sources",
294 prepared.len()
295 );
296 }
297 if prepared.len() > 1 {
298 log!(
299 "→ {} commits from {} sources ({} duplicate commits dropped), {} distinct author emails",
300 model::thousands(commits.len() as u64),
301 prepared.len(),
302 model::thousands(duplicates),
303 distinct_emails(&commits)
304 );
305 } else {
306 log!(
307 "→ {} commits from {} distinct author emails",
308 model::thousands(commits.len() as u64),
309 distinct_emails(&commits)
310 );
311 }
312
313 let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
314
315 let any_slug = source_slugs.iter().any(|s| s.is_some());
316 if cfg.use_github {
317 if any_slug {
318 log!("→ enriching from GitHub");
319 github::enrich_clusters(
320 &mut clusters,
321 &commits,
322 &source_slugs,
323 &client,
324 &mut caches,
325 cfg.verbose,
326 );
327 clusters = identity::merge_by_login(clusters);
328 github::fetch_profiles(&mut clusters, &client, &mut caches, cfg.verbose);
329 if !cfg.detect_affiliation {
330 for cl in clusters.iter_mut() {
331 cl.affiliation = None;
332 }
333 }
334 } else {
335 log!("→ no GitHub sources, skipping enrichment");
336 }
337 }
338
339 if !cfg.identities.is_empty() {
340 clusters = identity::apply_identity_file(clusters, &cfg.identities);
341 log!("→ applied {} identity overrides", cfg.identities.len());
342 }
343
344 let mut contributors = identity::build_contributors(
345 &clusters,
346 &commits,
347 &cfg.groups,
348 &cfg.forced_names,
349 cfg.count_coauthors,
350 );
351
352 apply_group_aliases(&mut contributors, &cfg.group_aliases);
356
357 strip_leading_the(&mut contributors);
360
361 let mut manual_groups: std::collections::HashSet<String> = cfg
365 .groups
366 .iter()
367 .map(|r| strip_the(&r.group).to_string())
368 .collect();
369 manual_groups.extend(
370 cfg.group_aliases
371 .iter()
372 .map(|(canon, _)| strip_the(canon).to_string()),
373 );
374 let n_groups = canonicalize_groups(&mut contributors, &manual_groups);
375 if n_groups > 0 {
376 log!("→ {n_groups} distinct affiliations/groups");
377 }
378
379 if !cfg.exclude.is_empty() {
380 contributors.retain(|c| {
381 !cfg.exclude.iter().any(|pat| {
382 let p = pat.to_lowercase();
383 c.name.to_lowercase().contains(&p)
384 || c.login
385 .as_deref()
386 .is_some_and(|l| l.to_lowercase().contains(&p))
387 })
388 });
389 }
390
391 log!(
392 "→ merged to {} contributors ({} bots)",
393 contributors.len(),
394 contributors.iter().filter(|c| c.bot).count()
395 );
396
397 if cfg.embed_avatars && cfg.use_github {
398 github::embed_avatars(
399 &mut contributors,
400 &client,
401 &mut caches,
402 cfg.avatar_size,
403 cfg.verbose,
404 );
405 }
406
407 let single = if prepared.len() == 1 {
410 Some(&prepared[0])
411 } else {
412 None
413 };
414
415 let owner = common_owner(&prepared);
418
419 let owner_avatar = if cfg.use_github && cfg.embed_avatars {
422 owner
423 .as_deref()
424 .and_then(|owner| github::fetch_avatar(&client, &mut caches, owner, 48))
425 } else {
426 None
427 };
428
429 let description = if cfg.use_github {
431 single
432 .and_then(|p| p.slug.as_deref())
433 .and_then(|slug| github::fetch_repo_description(&client, slug))
434 } else {
435 None
436 };
437
438 let default_name = match (single, &owner) {
441 (Some(p), _) => p.display_name.clone(),
442 (None, Some(owner)) => owner.clone(),
443 (None, None) => combined_name(&prepared),
444 };
445 let branch = match single {
446 Some(p) => p.branch.clone(),
447 None => "combined".to_string(),
448 };
449
450 let releases: Vec<model::Release> = if prepared.len() == 1 {
455 repo::read_tags(&prepared[0])
456 } else {
457 prepared
458 .iter()
459 .flat_map(|p| {
460 repo::read_tags(p).into_iter().map(|mut r| {
461 r.name = format!("{} {}", p.display_name, r.name);
462 r
463 })
464 })
465 .collect()
466 };
467 if !releases.is_empty() {
468 log!("→ {} releases", releases.len());
469 }
470
471 let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
472 let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
473 let meta = RepoMeta {
474 name: cfg.title.clone().unwrap_or(default_name),
475 url: single.and_then(|p| p.url.clone()),
476 slug: single.and_then(|p| p.slug.clone()),
477 branch,
478 first,
479 last,
480 total_commits: commits.len() as u64,
481 total_contributors: contributors.iter().filter(|c| !c.bot).count(),
482 generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
483 owner_avatar,
484 description,
485 releases,
486 };
487
488 caches.save();
489 Ok(Analysis { contributors, meta })
490}
491
492struct SourceRead {
493 commits: Vec<model::Commit>,
494 from_cache: bool,
495}
496
497fn read_source(
501 p: &repo::PreparedRepo,
502 caches: &cache::Caches,
503 filter: &model::CommitFilter,
504 branch: Option<&str>,
505) -> Result<SourceRead> {
506 let key = source_cache_key(p);
507 let remote = repo::remote_tip(p);
510 let tip = remote.clone().or_else(|| repo::local_tip(p));
511
512 if let Some(tip) = &tip {
513 if let Some(cached) = caches.commits(&key, tip, filter) {
514 let commits = cached
515 .into_iter()
516 .map(|c| model::Commit {
517 sha: c.sha,
518 ts: c.ts,
519 name: c.name,
520 email: c.email,
521 coauthors: c.coauthors,
522 src: 0,
523 })
524 .collect();
525 return Ok(SourceRead {
526 commits,
527 from_cache: true,
528 });
529 }
530 }
531
532 let local = repo::local_tip(p);
535 if p.is_remote && remote.is_some() && remote != local {
536 repo::fetch(p);
537 }
538 let commits = repo::read_commits(p, branch, filter)?;
539 if let Some(tip) = repo::local_tip(p) {
542 let cached = commits
543 .iter()
544 .map(|c| cache::CachedCommit {
545 sha: c.sha.clone(),
546 ts: c.ts,
547 name: c.name.clone(),
548 email: c.email.clone(),
549 coauthors: c.coauthors.clone(),
550 })
551 .collect();
552 caches.put_commits(&key, &tip, filter, cached);
553 }
554 Ok(SourceRead {
555 commits,
556 from_cache: false,
557 })
558}
559
560fn source_cache_key(p: &repo::PreparedRepo) -> String {
562 let base = p.slug.as_deref().unwrap_or(&p.display_name);
563 repo::sanitize(&format!("{base}__{}", p.branch))
564}
565
566fn combined_name(prepared: &[repo::PreparedRepo]) -> String {
569 let names: Vec<&str> = prepared.iter().map(|p| p.display_name.as_str()).collect();
570 match names.len() {
571 0 => "repositories".to_string(),
572 1..=3 => names.join(" + "),
573 n => format!("{} + {} more", names[..2].join(" + "), n - 2),
574 }
575}
576
577fn common_owner(prepared: &[repo::PreparedRepo]) -> Option<String> {
581 let mut owner: Option<String> = None;
582 for p in prepared {
583 let o = p.slug.as_deref()?.split('/').next()?.to_string();
584 match &owner {
585 Some(prev) if *prev != o => return None,
586 _ => owner = Some(o),
587 }
588 }
589 owner
590}
591
592fn distinct_emails(commits: &[model::Commit]) -> usize {
593 let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
594 e.sort_unstable();
595 e.dedup();
596 e.len()
597}
598
599fn apply_group_aliases(contributors: &mut [Contributor], aliases: &[(String, Vec<String>)]) {
606 if aliases.is_empty() {
607 return;
608 }
609 let mut map: std::collections::HashMap<String, String> = std::collections::HashMap::new();
610 for (canon, variants) in aliases {
611 map.insert(canon.to_lowercase(), canon.clone());
612 for v in variants {
613 map.insert(v.to_lowercase(), canon.clone());
614 }
615 }
616 let canon = |g: &str| map.get(&g.to_lowercase()).cloned();
617 for c in contributors.iter_mut() {
618 if let Some(g) = &c.group {
619 if let Some(cn) = canon(g) {
620 c.group = Some(cn);
621 }
622 }
623 if let Some(mg) = &mut c.month_groups {
624 for slot in mg.iter_mut().flatten() {
625 if let Some(cn) = canon(slot) {
626 *slot = cn;
627 }
628 }
629 }
630 }
631}
632
633fn strip_the(g: &str) -> &str {
636 match g.get(..4) {
637 Some(head) if head.eq_ignore_ascii_case("the ") => {
638 let rest = g[4..].trim_start();
639 if rest.is_empty() {
640 g
641 } else {
642 rest
643 }
644 }
645 _ => g,
646 }
647}
648
649fn strip_leading_the(contributors: &mut [Contributor]) {
652 for c in contributors.iter_mut() {
653 if let Some(g) = &mut c.group {
654 if strip_the(g).len() != g.len() {
655 *g = strip_the(g).to_string();
656 }
657 }
658 if let Some(mg) = &mut c.month_groups {
659 for slot in mg.iter_mut().flatten() {
660 if strip_the(slot).len() != slot.len() {
661 *slot = strip_the(slot).to_string();
662 }
663 }
664 }
665 }
666}
667
668fn canonicalize_groups(
669 contributors: &mut [Contributor],
670 manual: &std::collections::HashSet<String>,
671) -> usize {
672 use std::collections::HashMap;
673 let alnum_key = |g: &str| -> String {
674 let lower = g.to_lowercase();
675 let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
676 trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
677 };
678
679 let mut variants: HashMap<String, usize> = HashMap::new();
680 for c in contributors.iter() {
681 if let Some(g) = &c.group {
682 *variants.entry(g.clone()).or_default() += 1;
683 }
684 }
685
686 let mut keys: Vec<String> = variants
690 .keys()
691 .filter(|g| !manual.contains(*g))
692 .map(|g| alnum_key(g))
693 .collect();
694 keys.sort();
695 keys.dedup();
696 let resolve = |key: &str| -> String {
697 keys.iter()
698 .filter(|k| k.len() >= 6 && key.starts_with(*k))
699 .min_by_key(|k| k.len())
700 .map(|k| k.to_string())
701 .unwrap_or_else(|| key.to_string())
702 };
703 let cluster_of = |g: &str| -> String {
704 if manual.contains(g) {
705 format!("\u{0}{g}")
706 } else {
707 resolve(&alnum_key(g))
708 }
709 };
710
711 let mut best: HashMap<String, (&String, usize)> = HashMap::new();
712 for (g, n) in &variants {
713 let cluster = cluster_of(g);
714 let score = |g: &str, n: usize| {
715 n * 4
716 + usize::from(g.contains(' ')) * 2
717 + usize::from(g.chars().any(|c| c.is_uppercase()))
718 };
719 let entry = best.entry(cluster).or_insert((g, *n));
720 if score(g, *n) > score(entry.0, entry.1) {
721 *entry = (g, *n);
722 }
723 }
724
725 let display: HashMap<String, String> = best
726 .iter()
727 .map(|(k, (g, _))| (k.clone(), (*g).clone()))
728 .collect();
729 for c in contributors.iter_mut() {
730 if let Some(g) = &c.group {
731 c.group = display.get(&cluster_of(g)).cloned().or(c.group.clone());
732 }
733 }
734 display.len()
735}