Skip to main content

contributor_graphs/
lib.rs

1//! Build contributor timelines from a git or GitHub repository.
2//!
3//! `contributor-graphs` is primarily a command-line tool, but the same engine
4//! is available as a library. The usual flow is [`analyze`] to turn a
5//! repository into [`Contributor`] rows plus [`RepoMeta`], then one of the
6//! renderers in [`svg`] or [`html`].
7//!
8//! ```no_run
9//! use contributor_graphs::{analyze, svg, Config};
10//!
11//! let analysis = analyze("nf-core/rnaseq", &Config::default())?;
12//! let rows: Vec<_> = analysis.contributors.iter().filter(|c| !c.bot).cloned().collect();
13//! let opts = svg::SvgOptions {
14//!     title: analysis.meta.name.clone(),
15//!     ..Default::default()
16//! };
17//! std::fs::write("rnaseq.svg", svg::render_svg(&rows, &opts))?;
18//! # Ok::<(), anyhow::Error>(())
19//! ```
20//!
21//! The lower-level modules ([`repo`], [`identity`], [`github`]) are public too,
22//! for callers who want to assemble a custom pipeline.
23
24pub mod github;
25pub mod html;
26pub mod identity;
27pub mod model;
28pub mod repo;
29pub mod svg;
30
31use anyhow::{bail, Result};
32
33pub use model::{Contributor, RepoMeta};
34
35/// How to read history and resolve identities. Construct with `Config::default()`
36/// and override fields as needed.
37#[derive(Clone)]
38pub struct Config {
39    /// Branch or ref to read (default: `HEAD`).
40    pub branch: Option<String>,
41    /// Only include commits after this date (passed to `git log --since`).
42    pub since: Option<String>,
43    /// Only include commits before this date (`git log --until`).
44    pub until: Option<String>,
45    /// Skip merge commits.
46    pub no_merges: bool,
47    /// Override the chart/repository title.
48    pub title: Option<String>,
49    /// Exclude contributors whose name or login contains any of these strings.
50    pub exclude: Vec<String>,
51    /// Manual `matcher → group` mappings (matcher = name, email, or login).
52    pub groups: Vec<(String, String)>,
53    /// Manual identity merges: each row is `[canonical, alias, …]`.
54    pub identities: Vec<Vec<String>>,
55    /// Query the GitHub API for logins, avatars, and profiles.
56    pub use_github: bool,
57    /// Auto-detect affiliations from GitHub profile companies.
58    pub detect_affiliation: bool,
59    /// Merge identities that share a normalised author name.
60    pub merge_names: bool,
61    /// Download avatars and embed them as data URIs.
62    pub embed_avatars: bool,
63    /// Avatar pixel size to request when embedding.
64    pub avatar_size: u32,
65    /// Print progress to stderr.
66    pub verbose: bool,
67}
68
69impl Default for Config {
70    fn default() -> Self {
71        Config {
72            branch: None,
73            since: None,
74            until: None,
75            no_merges: false,
76            title: None,
77            exclude: Vec::new(),
78            groups: Vec::new(),
79            identities: Vec::new(),
80            use_github: true,
81            detect_affiliation: true,
82            merge_names: true,
83            embed_avatars: true,
84            avatar_size: 64,
85            verbose: false,
86        }
87    }
88}
89
90/// The result of [`analyze`]: every contributor (bots included — filter on
91/// [`Contributor::bot`] if you don't want them) and repository metadata.
92pub struct Analysis {
93    pub contributors: Vec<Contributor>,
94    pub meta: RepoMeta,
95}
96
97/// Row ordering for [`sort`].
98#[derive(Copy, Clone, PartialEq, Eq)]
99pub enum Sort {
100    /// Oldest first commit at the top.
101    First,
102    /// Most recent commit first.
103    Last,
104    /// Most commits first.
105    Commits,
106    /// Longest active period first.
107    Duration,
108    /// Alphabetical by name.
109    Name,
110}
111
112/// Sort contributor rows in place.
113pub fn sort(rows: &mut [Contributor], key: Sort) {
114    match key {
115        Sort::First => rows.sort_by(|a, b| a.first.cmp(&b.first).then(b.commits.cmp(&a.commits))),
116        Sort::Last => rows.sort_by(|a, b| b.last.cmp(&a.last).then(b.commits.cmp(&a.commits))),
117        Sort::Commits => rows.sort_by_key(|c| std::cmp::Reverse(c.commits)),
118        Sort::Duration => rows.sort_by_key(|c| std::cmp::Reverse(c.last - c.first)),
119        Sort::Name => rows.sort_by_key(|a| a.name.to_lowercase()),
120    }
121}
122
123/// Resolve a repository (local path, `owner/repo` slug, or git URL) into
124/// contributor data and metadata.
125pub fn analyze(input: &str, cfg: &Config) -> Result<Analysis> {
126    macro_rules! log {
127        ($($arg:tt)*) => { if cfg.verbose { eprintln!($($arg)*); } };
128    }
129
130    let prepared = repo::prepare(input, cfg.branch.as_deref())?;
131    log!(
132        "→ repository: {} (branch {})",
133        prepared.display_name,
134        prepared.branch
135    );
136
137    let commits = repo::read_commits(
138        &prepared,
139        cfg.branch.as_deref(),
140        cfg.since.as_deref(),
141        cfg.until.as_deref(),
142        cfg.no_merges,
143    )?;
144    if commits.is_empty() {
145        bail!("no commits found");
146    }
147    log!(
148        "→ {} commits from {} distinct author emails",
149        model::thousands(commits.len() as u64),
150        distinct_emails(&commits)
151    );
152
153    let mut clusters = identity::cluster_commits(&commits, cfg.merge_names);
154
155    let client = github::GhClient::new(if cfg.use_github {
156        github::find_token()
157    } else {
158        None
159    });
160    if cfg.use_github {
161        if let Some(slug) = &prepared.slug {
162            log!("→ enriching from GitHub ({slug})");
163            github::enrich_clusters(&mut clusters, &commits, slug, &client, cfg.verbose);
164            clusters = identity::merge_by_login(clusters);
165            github::fetch_profiles(&mut clusters, &client, cfg.verbose);
166            if !cfg.detect_affiliation {
167                for cl in clusters.iter_mut() {
168                    cl.affiliation = None;
169                }
170            }
171        } else {
172            log!("→ not a GitHub repo, skipping enrichment");
173        }
174    }
175
176    if !cfg.identities.is_empty() {
177        clusters = identity::apply_identity_file(clusters, &cfg.identities);
178        log!("→ applied {} identity overrides", cfg.identities.len());
179    }
180
181    let mut contributors = identity::build_contributors(&clusters, &commits, &cfg.groups);
182
183    let n_groups = canonicalize_groups(&mut contributors);
184    if n_groups > 0 {
185        log!("→ {n_groups} distinct affiliations/groups");
186    }
187
188    if !cfg.exclude.is_empty() {
189        contributors.retain(|c| {
190            !cfg.exclude.iter().any(|pat| {
191                let p = pat.to_lowercase();
192                c.name.to_lowercase().contains(&p)
193                    || c.login
194                        .as_deref()
195                        .is_some_and(|l| l.to_lowercase().contains(&p))
196            })
197        });
198    }
199
200    log!(
201        "→ merged to {} contributors ({} bots)",
202        contributors.len(),
203        contributors.iter().filter(|c| c.bot).count()
204    );
205
206    if cfg.embed_avatars && cfg.use_github {
207        github::embed_avatars(&mut contributors, &client, cfg.avatar_size, cfg.verbose);
208    }
209
210    // Owner/org avatar for the interactive page header.
211    let owner_avatar = if cfg.use_github && cfg.embed_avatars {
212        prepared
213            .slug
214            .as_deref()
215            .and_then(|s| s.split('/').next())
216            .and_then(|owner| github::fetch_avatar(&client, owner, 48))
217    } else {
218        None
219    };
220
221    let first = contributors.iter().map(|c| c.first).min().unwrap_or(0);
222    let last = contributors.iter().map(|c| c.last).max().unwrap_or(0);
223    let meta = RepoMeta {
224        name: cfg
225            .title
226            .clone()
227            .unwrap_or_else(|| prepared.display_name.clone()),
228        url: prepared.url.clone(),
229        slug: prepared.slug.clone(),
230        branch: prepared.branch.clone(),
231        first,
232        last,
233        total_commits: commits.len() as u64,
234        total_contributors: contributors.iter().filter(|c| !c.bot).count(),
235        generated: chrono::Utc::now().format("%Y-%m-%d").to_string(),
236        owner_avatar,
237    };
238
239    Ok(Analysis { contributors, meta })
240}
241
242fn distinct_emails(commits: &[model::Commit]) -> usize {
243    let mut e: Vec<&str> = commits.iter().map(|c| c.email.as_str()).collect();
244    e.sort_unstable();
245    e.dedup();
246    e.len()
247}
248
249/// Merge group-name variants that refer to the same organisation:
250/// case/punctuation differences ("Seqera Labs" vs "seqeralabs"), a leading
251/// "The", and prefix forms ("Seqera" vs "Seqera Labs"). Returns the final
252/// group count.
253fn canonicalize_groups(contributors: &mut [Contributor]) -> usize {
254    use std::collections::HashMap;
255    let alnum_key = |g: &str| -> String {
256        let lower = g.to_lowercase();
257        let trimmed = lower.strip_prefix("the ").unwrap_or(&lower);
258        trimmed.chars().filter(|c| c.is_alphanumeric()).collect()
259    };
260
261    let mut variants: HashMap<String, usize> = HashMap::new();
262    for c in contributors.iter() {
263        if let Some(g) = &c.group {
264            *variants.entry(g.clone()).or_default() += 1;
265        }
266    }
267
268    let mut keys: Vec<String> = variants.keys().map(|g| alnum_key(g)).collect();
269    keys.sort();
270    keys.dedup();
271    let resolve = |key: &str| -> String {
272        keys.iter()
273            .filter(|k| k.len() >= 6 && key.starts_with(*k))
274            .min_by_key(|k| k.len())
275            .map(|k| k.to_string())
276            .unwrap_or_else(|| key.to_string())
277    };
278
279    let mut best: HashMap<String, (&String, usize)> = HashMap::new();
280    for (g, n) in &variants {
281        let cluster = resolve(&alnum_key(g));
282        let score = |g: &str, n: usize| {
283            n * 4
284                + usize::from(g.contains(' ')) * 2
285                + usize::from(g.chars().any(|c| c.is_uppercase()))
286        };
287        let entry = best.entry(cluster).or_insert((g, *n));
288        if score(g, *n) > score(entry.0, entry.1) {
289            *entry = (g, *n);
290        }
291    }
292
293    let display: HashMap<String, String> = best
294        .iter()
295        .map(|(k, (g, _))| (k.clone(), (*g).clone()))
296        .collect();
297    for c in contributors.iter_mut() {
298        if let Some(g) = &c.group {
299            c.group = display
300                .get(&resolve(&alnum_key(g)))
301                .cloned()
302                .or(c.group.clone());
303        }
304    }
305    display.len()
306}