pub mod cache;
pub mod github;
pub mod html;
pub mod identity;
pub mod model;
pub mod repo;
pub mod svg;
pub mod theme;
use anyhow::{bail, Result};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Mutex;
pub use model::{Contributor, RepoMeta};
const READ_THREADS: usize = 8;
#[derive(Clone)]
pub struct Config {
pub branch: Option<String>,
pub since: Option<String>,
pub until: Option<String>,
pub no_merges: bool,
pub title: Option<String>,
pub exclude: Vec<String>,
pub groups: Vec<model::GroupRule>,
pub group_aliases: Vec<(String, Vec<String>)>,
pub identities: Vec<Vec<String>>,
pub use_github: bool,
pub detect_affiliation: bool,
pub merge_names: bool,
pub count_coauthors: bool,
pub embed_avatars: bool,
pub avatar_size: u32,
pub refresh: bool,
pub verbose: bool,
}
impl Default for Config {
fn default() -> Self {
Config {
branch: None,
since: None,
until: None,
no_merges: false,
title: None,
exclude: Vec::new(),
groups: Vec::new(),
group_aliases: Vec::new(),
identities: Vec::new(),
use_github: true,
detect_affiliation: true,
merge_names: true,
count_coauthors: true,
embed_avatars: true,
avatar_size: 64,
refresh: false,
verbose: false,
}
}
}
pub struct Analysis {
pub contributors: Vec<Contributor>,
pub meta: RepoMeta,
}
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum Sort {
First,
Last,
Commits,
Duration,
Name,
}
pub fn sort(rows: &mut [Contributor], key: Sort) {
match key {
Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
}
}
pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
analyze_many(std::slice::from_ref(&input), cfg)
}
pub fn analyze_many(inputs: &[&str], cfg: &Config) -> Result<Analysis> {
macro_rules! log {
($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
}
if inputs.is_empty() {
bail!("no repository sources given");
}
let client = github::GhClient::new(if cfg.use_github {
github::find_token()
} else {
None
});
let now = chrono::Utc::now().timestamp();
let mut caches = cache::Caches::load(cfg.refresh, now);
let mut sources: Vec<String> = Vec::new();
for input in inputs {
if repo::looks_like_owner(input) {
if !cfg.use_github {
bail!("'{input}' looks like an org/user, but listing its repositories needs GitHub access (remove --no-github, or pass owner/repo slugs)");
}
let (slugs, cached) = match caches.org_repos(input) {
Some(repos) => (repos, true),
None => {
log!("→ listing repositories for '{input}'");
let fetched = client.list_owner_repos(input);
if !fetched.is_empty() {
caches.put_org_repos((*input).to_string(), fetched.clone());
}
(fetched, false)
}
};
if slugs.is_empty() {
if inputs.len() == 1 {
bail!("no repositories found for org/user '{input}' (it may not exist or has no non-fork repos)");
}
log!(" warning: no repositories found for '{input}'");
} else {
log!(
" {} repositories{}",
slugs.len(),
if cached { " (cached)" } else { "" }
);
sources.extend(slugs);
}
} else {
sources.push((*input).to_string());
}
}
if sources.is_empty() {
bail!("no usable repository sources");
}
let mut prepared: Vec<repo::PreparedRepo> = Vec::new();
for input in &sources {
match repo::prepare(input, cfg.branch.as_deref()) {
Ok(p) => prepared.push(p),
Err(e) if sources.len() > 1 => log!(" warning: skipping source '{input}' ({e})"),
Err(e) => return Err(e),
}
}
if prepared.is_empty() {
bail!("no usable repository sources");
}
let source_slugs: Vec<Option<String>> = prepared.iter().map(|p| p.slug.clone()).collect();
for p in &prepared {
log!("→ source: {} (branch {})", p.display_name, p.branch);
}
let filter = model::CommitFilter {
since: cfg.since.clone(),
until: cfg.until.clone(),
no_merges: cfg.no_merges,
};
let branch = cfg.branch.as_deref();
let outcomes: Vec<Mutex<Option<Result<SourceRead>>>> =
(0..prepared.len()).map(|_| Mutex::new(None)).collect();
let cursor = AtomicUsize::new(0);
std::thread::scope(|s| {
for _ in 0..READ_THREADS.min(prepared.len()) {
s.spawn(|| loop {
let i = cursor.fetch_add(1, Ordering::Relaxed);
let Some(p) = prepared.get(i) else { break };
let r = read_source(p, &caches, &filter, branch);
*outcomes[i].lock().unwrap() = Some(r);
});
}
});
let mut commits: Vec<model::Commit> = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut duplicates = 0u64;
let mut cached_sources = 0usize;
for (i, (p, slot)) in prepared.iter().zip(outcomes).enumerate() {
let read = match slot.into_inner().unwrap() {
Some(Ok(r)) => r,
Some(Err(e)) if prepared.len() > 1 => {
log!(" warning: skipping {} ({e})", p.display_name);
continue;
}
Some(Err(e)) => return Err(e),
None => continue,
};
if read.from_cache {
cached_sources += 1;
}
for mut c in read.commits {
if !seen.insert(c.sha.clone()) {
duplicates += 1;
continue;
}
c.src = i as u32;
commits.push(c);
}
}
if commits.is_empty() {
bail!("no commits found");
}
if cached_sources > 0 {
log!(
"→ reused cached history for {cached_sources}/{} sources",
prepared.len()
);
}
if prepared.len() > 1 {
log!(
"→ {} commits from {} sources ({} duplicate commits dropped), {} distinct author emails",
model::thousands(commits.len() as u64),
prepared.len(),
model::thousands(duplicates),
distinct_emails(&commits)
);
} else {
log!(
"→ {} commits from {} distinct author emails",
model::thousands(commits.len() as u64),
distinct_emails(&commits)
);
}
let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
let any_slug = source_slugs.iter().any(|s| s.is_some());
if cfg.use_github {
if any_slug {
log!("→ enriching from GitHub");
github::enrich_clusters(
&mut clusters,
&commits,
&source_slugs,
&client,
&mut caches,
cfg.verbose,
);
clusters = identity::merge_by_login(clusters);
github::fetch_profiles(&mut clusters, &client, &mut caches, cfg.verbose);
if !cfg.detect_affiliation {
for cl in clusters.iter_mut() {
cl.affiliation = None;
}
}
} else {
log!("→ no GitHub sources, skipping enrichment");
}
}
if !cfg.identities.is_empty() {
clusters = identity::apply_identity_file(clusters, &cfg.identities);
log!("→ applied {} identity overrides", cfg.identities.len());
}
let mut contributors =
identity::build_contributors(&clusters, &commits, &cfg.groups, cfg.count_coauthors);
apply_group_aliases(&mut contributors, &cfg.group_aliases);
let mut manual_groups: std::collections::HashSet<String> =
cfg.groups.iter().map(|r| r.group.clone()).collect();
manual_groups.extend(cfg.group_aliases.iter().map(|(canon, _)| canon.clone()));
let n_groups = canonicalize_groups(&mut contributors, &manual_groups);
if n_groups > 0 {
log!("→ {n_groups} distinct affiliations/groups");
}
if !cfg.exclude.is_empty() {
contributors.retain(|c| {
!cfg.exclude.iter().any(|pat| {
let p = pat.to_lowercase();
c.name.to_lowercase().contains(&p)
|| c.login
.as_deref()
.is_some_and(|l| l.to_lowercase().contains(&p))
})
});
}
log!(
"→ merged to {} contributors ({} bots)",
contributors.len(),
contributors.iter().filter(|c| c.bot).count()
);
if cfg.embed_avatars && cfg.use_github {
github::embed_avatars(
&mut contributors,
&client,
&mut caches,
cfg.avatar_size,
cfg.verbose,
);
}
let single = if prepared.len() == 1 {
Some(&prepared[0])
} else {
None
};
let owner = common_owner(&prepared);
let owner_avatar = if cfg.use_github && cfg.embed_avatars {
owner
.as_deref()
.and_then(|owner| github::fetch_avatar(&client, &mut caches, owner, 48))
} else {
None
};
let description = if cfg.use_github {
single
.and_then(|p| p.slug.as_deref())
.and_then(|slug| github::fetch_repo_description(&client, slug))
} else {
None
};
let default_name = match (single, &owner) {
(Some(p), _) => p.display_name.clone(),
(None, Some(owner)) => owner.clone(),
(None, None) => combined_name(&prepared),
};
let branch = match single {
Some(p) => p.branch.clone(),
None => "combined".to_string(),
};
let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
let meta = RepoMeta {
name: cfg.title.clone().unwrap_or(default_name),
url: single.and_then(|p| p.url.clone()),
slug: single.and_then(|p| p.slug.clone()),
branch,
first,
last,
total_commits: commits.len() as u64,
total_contributors: contributors.iter().filter(|c| !c.bot).count(),
generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
owner_avatar,
description,
};
caches.save();
Ok(Analysis { contributors, meta })
}
struct SourceRead {
commits: Vec<model::Commit>,
from_cache: bool,
}
fn read_source(
p: &repo::PreparedRepo,
caches: &cache::Caches,
filter: &model::CommitFilter,
branch: Option<&str>,
) -> Result<SourceRead> {
let key = source_cache_key(p);
let remote = repo::remote_tip(p);
let tip = remote.clone().or_else(|| repo::local_tip(p));
if let Some(tip) = &tip {
if let Some(cached) = caches.commits(&key, tip, filter) {
let commits = cached
.into_iter()
.map(|c| model::Commit {
sha: c.sha,
ts: c.ts,
name: c.name,
email: c.email,
coauthors: c.coauthors,
src: 0,
})
.collect();
return Ok(SourceRead {
commits,
from_cache: true,
});
}
}
let local = repo::local_tip(p);
if p.is_remote && remote.is_some() && remote != local {
repo::fetch(p);
}
let commits = repo::read_commits(p, branch, filter)?;
if let Some(tip) = repo::local_tip(p) {
let cached = commits
.iter()
.map(|c| cache::CachedCommit {
sha: c.sha.clone(),
ts: c.ts,
name: c.name.clone(),
email: c.email.clone(),
coauthors: c.coauthors.clone(),
})
.collect();
caches.put_commits(&key, &tip, filter, cached);
}
Ok(SourceRead {
commits,
from_cache: false,
})
}
fn source_cache_key(p: &repo::PreparedRepo) -> String {
let base = p.slug.as_deref().unwrap_or(&p.display_name);
repo::sanitize(&format!("{base}__{}", p.branch))
}
fn combined_name(prepared: &[repo::PreparedRepo]) -> String {
let names: Vec<&str> = prepared.iter().map(|p| p.display_name.as_str()).collect();
match names.len() {
0 => "repositories".to_string(),
1..=3 => names.join(" + "),
n => format!("{} + {} more", names[..2].join(" + "), n - 2),
}
}
fn common_owner(prepared: &[repo::PreparedRepo]) -> Option<String> {
let mut owner: Option<String> = None;
for p in prepared {
let o = p.slug.as_deref()?.split('/').next()?.to_string();
match &owner {
Some(prev) if *prev != o => return None,
_ => owner = Some(o),
}
}
owner
}
fn distinct_emails(commits: &[model::Commit]) -> usize {
let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
e.sort_unstable();
e.dedup();
e.len()
}
fn apply_group_aliases(contributors: &mut [Contributor], aliases: &[(String, Vec<String>)]) {
if aliases.is_empty() {
return;
}
let mut map: std::collections::HashMap<String, String> = std::collections::HashMap::new();
for (canon, variants) in aliases {
map.insert(canon.to_lowercase(), canon.clone());
for v in variants {
map.insert(v.to_lowercase(), canon.clone());
}
}
let canon = |g: &str| map.get(&g.to_lowercase()).cloned();
for c in contributors.iter_mut() {
if let Some(g) = &c.group {
if let Some(cn) = canon(g) {
c.group = Some(cn);
}
}
if let Some(mg) = &mut c.month_groups {
for slot in mg.iter_mut().flatten() {
if let Some(cn) = canon(slot) {
*slot = cn;
}
}
}
}
}
fn canonicalize_groups(
contributors: &mut [Contributor],
manual: &std::collections::HashSet<String>,
) -> usize {
use std::collections::HashMap;
let alnum_key = |g: &str| -> String {
let lower = g.to_lowercase();
let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
};
let mut variants: HashMap<String, usize> = HashMap::new();
for c in contributors.iter() {
if let Some(g) = &c.group {
*variants.entry(g.clone()).or_default() += 1;
}
}
let mut keys: Vec<String> = variants
.keys()
.filter(|g| !manual.contains(*g))
.map(|g| alnum_key(g))
.collect();
keys.sort();
keys.dedup();
let resolve = |key: &str| -> String {
keys.iter()
.filter(|k| k.len() >= 6 && key.starts_with(*k))
.min_by_key(|k| k.len())
.map(|k| k.to_string())
.unwrap_or_else(|| key.to_string())
};
let cluster_of = |g: &str| -> String {
if manual.contains(g) {
format!("\u{0}{g}")
} else {
resolve(&alnum_key(g))
}
};
let mut best: HashMap<String, (&String, usize)> = HashMap::new();
for (g, n) in &variants {
let cluster = cluster_of(g);
let score = |g: &str, n: usize| {
n * 4
+ usize::from(g.contains(' ')) * 2
+ usize::from(g.chars().any(|c| c.is_uppercase()))
};
let entry = best.entry(cluster).or_insert((g, *n));
if score(g, *n) > score(entry.0, entry.1) {
*entry = (g, *n);
}
}
let display: HashMap<String, String> = best
.iter()
.map(|(k, (g, _))| (k.clone(), (*g).clone()))
.collect();
for c in contributors.iter_mut() {
if let Some(g) = &c.group {
c.group = display.get(&cluster_of(g)).cloned().or(c.group.clone());
}
}
display.len()
}