contributor_graphs/
github.rs1use crate::identity::Cluster;
2use crate::model::{Commit, Contributor};
3use base64::Engine;
4use std::collections::HashMap;
5use std::process::Command;
6use std::sync::atomic::{AtomicUsize, Ordering};
7use std::sync::Mutex;
8use std::time::Duration;
9
10const THREADS: usize = 8;
11
12type Profile = (Option<String>, Option<String>);
14
15pub struct GhClient {
16 agent: ureq::Agent,
17 token: Option<String>,
18}
19
20pub fn find_token() -> Option<String> {
22 for var in ["GITHUB_TOKEN", "GH_TOKEN"] {
23 if let Ok(t) = std::env::var(var) {
24 let t = t.trim().to_string();
25 if !t.is_empty() {
26 return Some(t);
27 }
28 }
29 }
30 let out = Command::new("gh").args(["auth", "token"]).output().ok()?;
31 if out.status.success() {
32 let t = String::from_utf8_lossy(&out.stdout).trim().to_string();
33 if !t.is_empty() {
34 return Some(t);
35 }
36 }
37 None
38}
39
40pub fn parse_noreply(email: &str) -> Option<(Option<u64>, String)> {
43 let local = email.strip_suffix("@users.noreply.github.com")?;
44 match local.split_once('+') {
45 Some((id, login)) => {
46 let id = id.parse::<u64>().ok();
47 Some((id, login.to_string()))
48 }
49 None => Some((None, local.to_string())),
50 }
51}
52
53impl GhClient {
54 pub fn new(token: Option<String>) -> Self {
55 let agent = ureq::AgentBuilder::new()
56 .timeout(Duration::from_secs(30))
57 .user_agent("contributor-graphs (https://github.com/ewels/contributor-graphs)")
58 .build();
59 GhClient { agent, token }
60 }
61
62 pub fn has_token(&self) -> bool {
63 self.token.is_some()
64 }
65
66 fn get_json(&self, url: &str) -> Option<serde_json::Value> {
67 let mut req = self
68 .agent
69 .get(url)
70 .set("Accept", "application/vnd.github+json")
71 .set("X-GitHub-Api-Version", "2022-11-28");
72 if let Some(t) = &self.token {
73 req = req.set("Authorization", &format!("Bearer {t}"));
74 }
75 match req.call() {
76 Ok(resp) => resp.into_json().ok(),
77 Err(ureq::Error::Status(code, _)) => {
78 if code == 403 || code == 429 {
79 eprintln!(" warning: GitHub API rate limited (HTTP {code})");
80 }
81 None
82 }
83 Err(_) => None,
84 }
85 }
86
87 fn commit_author(&self, slug: &str, sha: &str) -> Option<(String, String)> {
89 let v = self.get_json(&format!(
90 "https://api.github.com/repos/{slug}/commits/{sha}"
91 ))?;
92 let author = v.get("author")?;
93 let login = author.get("login")?.as_str()?.to_string();
94 let avatar = author
95 .get("avatar_url")
96 .and_then(|a| a.as_str())
97 .map(String::from)
98 .unwrap_or_else(|| format!("https://avatars.githubusercontent.com/{login}"));
99 Some((login, avatar))
100 }
101
102 fn user_profile(&self, login: &str) -> Profile {
104 let Some(v) = self.get_json(&format!("https://api.github.com/users/{login}")) else {
105 return (None, None);
106 };
107 let name = v
108 .get("name")
109 .and_then(|n| n.as_str())
110 .map(str::trim)
111 .filter(|n| !n.is_empty())
112 .map(String::from);
113 let company = v
114 .get("company")
115 .and_then(|c| c.as_str())
116 .and_then(normalize_company);
117 (name, company)
118 }
119
120 pub fn fetch_bytes(&self, url: &str) -> Option<(Vec<u8>, String)> {
121 let resp = self.agent.get(url).call().ok()?;
122 let ct = resp.content_type().to_string();
123 let mut buf = Vec::new();
124 use std::io::Read;
125 resp.into_reader()
126 .take(4 * 1024 * 1024)
127 .read_to_end(&mut buf)
128 .ok()?;
129 Some((buf, ct))
130 }
131}
132
133pub fn enrich_clusters(
137 clusters: &mut [Cluster],
138 commits: &[Commit],
139 slug: &str,
140 client: &GhClient,
141 verbose: bool,
142) {
143 let mut need_api: Vec<(usize, String)> = Vec::new();
144 for (i, cl) in clusters.iter_mut().enumerate() {
145 for email in &cl.emails {
146 if let Some((id, login)) = parse_noreply(email) {
147 cl.avatar_url = Some(match id {
148 Some(id) => format!("https://avatars.githubusercontent.com/u/{id}?v=4"),
149 None => format!("https://avatars.githubusercontent.com/{login}"),
150 });
151 cl.login = Some(login);
152 break;
153 }
154 }
155 if cl.login.is_none() {
156 if let Some(&idx) = cl.commit_idxs.iter().max_by_key(|&&i| commits[i].ts) {
159 need_api.push((i, commits[idx].sha.clone()));
160 }
161 }
162 }
163
164 if need_api.is_empty() || !client.has_token() {
165 if !need_api.is_empty() && verbose {
166 eprintln!(
167 " no GitHub token found ({} identities left unresolved) — run `gh auth login` to enable lookups",
168 need_api.len()
169 );
170 }
171 return;
172 }
173
174 let cursor = AtomicUsize::new(0);
175 let results: Mutex<HashMap<usize, (String, String)>> = Mutex::new(HashMap::new());
176 std::thread::scope(|s| {
177 for _ in 0..THREADS.min(need_api.len()) {
178 s.spawn(|| loop {
179 let i = cursor.fetch_add(1, Ordering::Relaxed);
180 let Some((cluster_idx, sha)) = need_api.get(i) else {
181 break;
182 };
183 if let Some(found) = client.commit_author(slug, sha) {
184 results.lock().unwrap().insert(*cluster_idx, found);
185 }
186 });
187 }
188 });
189
190 let results = results.into_inner().unwrap();
191 let resolved = results.len();
192 for (idx, (login, avatar)) in results {
193 clusters[idx].login = Some(login);
194 clusters[idx].avatar_url = Some(avatar);
195 }
196 if verbose {
197 eprintln!(
198 " resolved {resolved}/{} identities via GitHub API",
199 need_api.len()
200 );
201 }
202}
203
204pub fn fetch_avatar(client: &GhClient, login: &str, size: u32) -> Option<String> {
206 let url = format!("https://avatars.githubusercontent.com/{login}?s={size}");
207 let (bytes, ct) = client.fetch_bytes(&url)?;
208 let ct = if ct.starts_with("image/") {
209 ct
210 } else {
211 "image/png".into()
212 };
213 let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
214 Some(format!("data:{ct};base64,{b64}"))
215}
216
217pub fn normalize_company(raw: &str) -> Option<String> {
221 let mut s = raw.trim();
222 for sep in [" | ", " / ", ";", " · ", ","] {
224 if let Some(i) = s.find(sep) {
225 s = &s[..i];
226 }
227 }
228 s = s.trim().trim_start_matches('@').trim();
229 if let Some(i) = s.find(" @") {
231 s = &s[..i];
232 }
233 let s = s.trim().trim_end_matches(['.', ',', ';', '|', '/']).trim();
234 if s.is_empty() || s.chars().count() > 60 {
235 return None;
236 }
237 Some(s.to_string())
238}
239
240pub fn fetch_profiles(clusters: &mut [Cluster], client: &GhClient, verbose: bool) {
243 if !client.has_token() {
244 return;
245 }
246 let mut logins: Vec<String> = clusters.iter().filter_map(|c| c.login.clone()).collect();
247 logins.sort();
248 logins.dedup();
249 if logins.is_empty() {
250 return;
251 }
252
253 let cursor = AtomicUsize::new(0);
254 let results: Mutex<HashMap<String, Profile>> = Mutex::new(HashMap::new());
255 std::thread::scope(|s| {
256 for _ in 0..THREADS.min(logins.len()) {
257 s.spawn(|| loop {
258 let i = cursor.fetch_add(1, Ordering::Relaxed);
259 let Some(login) = logins.get(i) else { break };
260 let profile = client.user_profile(login);
261 results.lock().unwrap().insert(login.clone(), profile);
262 });
263 }
264 });
265
266 let results = results.into_inner().unwrap();
267 let with_company = results.values().filter(|(_, c)| c.is_some()).count();
268 for cl in clusters.iter_mut() {
269 if let Some(login) = &cl.login {
270 if let Some((name, company)) = results.get(login) {
271 cl.profile_name = name.clone();
272 cl.affiliation = company.clone();
273 }
274 }
275 }
276 if verbose {
277 eprintln!(
278 " fetched {} profiles ({} with an affiliation)",
279 results.len(),
280 with_company
281 );
282 }
283}
284
285pub fn embed_avatars(
288 contributors: &mut [Contributor],
289 client: &GhClient,
290 size: u32,
291 verbose: bool,
292) {
293 let mut urls: Vec<String> = Vec::new();
294 for c in contributors.iter() {
295 if let Some(u) = &c.avatar {
296 if !u.starts_with("data:") && !urls.contains(u) {
297 urls.push(u.clone());
298 }
299 }
300 }
301 if urls.is_empty() {
302 return;
303 }
304
305 let sized: Vec<String> = urls
306 .iter()
307 .map(|u| {
308 if u.contains('?') {
309 format!("{u}&s={size}")
310 } else {
311 format!("{u}?s={size}")
312 }
313 })
314 .collect();
315
316 let cursor = AtomicUsize::new(0);
317 let results: Mutex<HashMap<String, String>> = Mutex::new(HashMap::new());
318 std::thread::scope(|s| {
319 for _ in 0..THREADS.min(urls.len()) {
320 s.spawn(|| loop {
321 let i = cursor.fetch_add(1, Ordering::Relaxed);
322 let (Some(orig), Some(fetch_url)) = (urls.get(i), sized.get(i)) else {
323 break;
324 };
325 if let Some((bytes, ct)) = client.fetch_bytes(fetch_url) {
326 let ct = if ct.starts_with("image/") {
327 ct
328 } else {
329 "image/png".into()
330 };
331 let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
332 results
333 .lock()
334 .unwrap()
335 .insert(orig.clone(), format!("data:{ct};base64,{b64}"));
336 }
337 });
338 }
339 });
340
341 let results = results.into_inner().unwrap();
342 let n = results.len();
343 for c in contributors.iter_mut() {
344 if let Some(u) = &c.avatar {
345 if let Some(data) = results.get(u) {
346 c.avatar = Some(data.clone());
347 }
348 }
349 }
350 if verbose {
351 eprintln!(" embedded {n} avatars as data URIs");
352 }
353}