pub mod github;
pub mod html;
pub mod identity;
pub mod model;
pub mod repo;
pub mod svg;
use anyhow::{bail, Result};
pub use model::{Contributor, RepoMeta};
#[derive(Clone)]
pub struct Config {
pub branch: Option<String>,
pub since: Option<String>,
pub until: Option<String>,
pub no_merges: bool,
pub title: Option<String>,
pub exclude: Vec<String>,
pub groups: Vec<(String, String)>,
pub identities: Vec<Vec<String>>,
pub use_github: bool,
pub detect_affiliation: bool,
pub merge_names: bool,
pub embed_avatars: bool,
pub avatar_size: u32,
pub verbose: bool,
}
impl Default for Config {
fn default() -> Self {
Config {
branch: None,
since: None,
until: None,
no_merges: false,
title: None,
exclude: Vec::new(),
groups: Vec::new(),
identities: Vec::new(),
use_github: true,
detect_affiliation: true,
merge_names: true,
embed_avatars: true,
avatar_size: 64,
verbose: false,
}
}
}
pub struct Analysis {
pub contributors: Vec<Contributor>,
pub meta: RepoMeta,
}
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum Sort {
First,
Last,
Commits,
Duration,
Name,
}
pub fn sort(rows: &mut [Contributor], key: Sort) {
match key {
Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
}
}
pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
macro_rules! log {
($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
}
let prepared = repo::prepare(input, cfg.branch.as_deref())?;
log!(
"→ repository: {} (branch {})",
prepared.display_name,
prepared.branch
);
let commits = repo::read_commits(
&prepared,
cfg.branch.as_deref(),
cfg.since.as_deref(),
cfg.until.as_deref(),
cfg.no_merges,
)?;
if commits.is_empty() {
bail!("no commits found");
}
log!(
"→ {} commits from {} distinct author emails",
model::thousands(commits.len() as u64),
distinct_emails(&commits)
);
let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
let client = github::GhClient::new(if cfg.use_github {
github::find_token()
} else {
None
});
if cfg.use_github {
if let Some(slug) = &prepared.slug {
log!("→ enriching from GitHub ({slug})");
github::enrich_clusters(&mut clusters, &commits, slug, &client, cfg.verbose);
clusters = identity::merge_by_login(clusters);
github::fetch_profiles(&mut clusters, &client, cfg.verbose);
if !cfg.detect_affiliation {
for cl in clusters.iter_mut() {
cl.affiliation = None;
}
}
} else {
log!("→ not a GitHub repo, skipping enrichment");
}
}
if !cfg.identities.is_empty() {
clusters = identity::apply_identity_file(clusters, &cfg.identities);
log!("→ applied {} identity overrides", cfg.identities.len());
}
let mut contributors = identity::build_contributors(&clusters, &commits, &cfg.groups);
let n_groups = canonicalize_groups(&mut contributors);
if n_groups > 0 {
log!("→ {n_groups} distinct affiliations/groups");
}
if !cfg.exclude.is_empty() {
contributors.retain(|c| {
!cfg.exclude.iter().any(|pat| {
let p = pat.to_lowercase();
c.name.to_lowercase().contains(&p)
|| c.login
.as_deref()
.is_some_and(|l| l.to_lowercase().contains(&p))
})
});
}
log!(
"→ merged to {} contributors ({} bots)",
contributors.len(),
contributors.iter().filter(|c| c.bot).count()
);
if cfg.embed_avatars && cfg.use_github {
github::embed_avatars(&mut contributors, &client, cfg.avatar_size, cfg.verbose);
}
let owner_avatar = if cfg.use_github && cfg.embed_avatars {
prepared
.slug
.as_deref()
.and_then(|s| s.split('/').next())
.and_then(|owner| github::fetch_avatar(&client, owner, 48))
} else {
None
};
let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
let meta = RepoMeta {
name: cfg
.title
.clone()
.unwrap_or_else(|| prepared.display_name.clone()),
url: prepared.url.clone(),
slug: prepared.slug.clone(),
branch: prepared.branch.clone(),
first,
last,
total_commits: commits.len() as u64,
total_contributors: contributors.iter().filter(|c| !c.bot).count(),
generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
owner_avatar,
};
Ok(Analysis { contributors, meta })
}
fn distinct_emails(commits: &[model::Commit]) -> usize {
let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
e.sort_unstable();
e.dedup();
e.len()
}
fn canonicalize_groups(contributors: &mut [Contributor]) -> usize {
use std::collections::HashMap;
let alnum_key = |g: &str| -> String {
let lower = g.to_lowercase();
let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
};
let mut variants: HashMap<String, usize> = HashMap::new();
for c in contributors.iter() {
if let Some(g) = &c.group {
*variants.entry(g.clone()).or_default() += 1;
}
}
let mut keys: Vec<String> = variants.keys().map(|g| alnum_key(g)).collect();
keys.sort();
keys.dedup();
let resolve = |key: &str| -> String {
keys.iter()
.filter(|k| k.len() >= 6 && key.starts_with(*k))
.min_by_key(|k| k.len())
.map(|k| k.to_string())
.unwrap_or_else(|| key.to_string())
};
let mut best: HashMap<String, (&String, usize)> = HashMap::new();
for (g, n) in &variants {
let cluster = resolve(&alnum_key(g));
let score = |g: &str, n: usize| {
n * 4
+ usize::from(g.contains(' ')) * 2
+ usize::from(g.chars().any(|c| c.is_uppercase()))
};
let entry = best.entry(cluster).or_insert((g, *n));
if score(g, *n) > score(entry.0, entry.1) {
*entry = (g, *n);
}
}
let display: HashMap<String, String> = best
.iter()
.map(|(k, (g, _))| (k.clone(), (*g).clone()))
.collect();
for c in contributors.iter_mut() {
if let Some(g) = &c.group {
c.group = display
.get(&resolve(&alnum_key(g)))
.cloned()
.or(c.group.clone());
}
}
display.len()
}