use chrono::{DateTime, Utc};
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT};
use rusqlite::params;
use serde::Deserialize;
use tracing::{debug, warn};
use async_trait::async_trait;
use crate::collect::env_expand::expand_env_var;
use crate::collect::errors::{CollectError, Result};
use crate::collect::github::retry::retry_get;
use crate::collect::pr_provider::PrProvider;
use crate::core::config::{GithubConfig, RepositoryConfig};
use crate::core::db::Database;
use crate::core::models::{PrState, PullRequest};
const USER_AGENT_VALUE: &str = "trusty-git-analytics/0.1";
pub(crate) const GITHUB_API_BASE: &str = "https://api.github.com";
pub(crate) const PAGE_SIZE: u32 = 100;
pub struct GitHubClient {
client: reqwest::Client,
token: Option<String>,
owner: String,
repo: String,
repos: Vec<(String, String)>,
}
#[derive(Debug, Deserialize)]
struct ApiPull {
number: u64,
title: String,
user: Option<ApiUser>,
state: String,
created_at: DateTime<Utc>,
merged_at: Option<DateTime<Utc>>,
#[serde(default)]
merge_commit_sha: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ApiUser {
login: String,
}
fn commit_shas_for_pull(p: &ApiPull) -> Result<String> {
match (&p.merge_commit_sha, p.merged_at.is_some()) {
(Some(s), true) => Ok(serde_json::to_string(&vec![s.clone()])?),
_ => Ok("[]".to_string()),
}
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubIssue {
pub number: u64,
pub title: String,
pub state: String,
pub html_url: String,
#[serde(default)]
pub labels: Vec<GhLabel>,
#[serde(default)]
pub body: Option<String>,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GhLabel {
pub name: String,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GhUser {
pub login: String,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GhAuthor {
pub name: String,
pub email: String,
#[serde(default)]
pub date: Option<String>,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubCommitDetail {
pub message: String,
#[serde(default)]
pub author: Option<GhAuthor>,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubPrCommit {
pub sha: String,
pub commit: GitHubCommitDetail,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubReview {
pub id: u64,
pub state: String,
#[serde(default)]
pub user: Option<GhUser>,
#[serde(default)]
pub submitted_at: Option<String>,
}
fn parse_slug(slug: &str) -> Result<(String, String)> {
let (owner, repo) = slug.split_once('/').ok_or_else(|| {
CollectError::Config(format!("github repo must be 'owner/name', got '{slug}'"))
})?;
if owner.is_empty() || repo.is_empty() {
return Err(CollectError::Config(format!(
"github repo must be 'owner/name', got '{slug}'"
)));
}
Ok((owner.to_string(), repo.to_string()))
}
pub(crate) fn build_http_client(config: &GithubConfig) -> Result<reqwest::Client> {
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_static(USER_AGENT_VALUE));
headers.insert(
ACCEPT,
HeaderValue::from_static("application/vnd.github+json"),
);
if let Some(raw) = &config.token {
let val = HeaderValue::from_str(&format!("Bearer {}", expand_env_var(raw)))
.map_err(|e| CollectError::Config(format!("invalid token header: {e}")))?;
headers.insert(AUTHORIZATION, val);
}
Ok(reqwest::Client::builder()
.default_headers(headers)
.timeout(std::time::Duration::from_secs(30))
.build()?)
}
fn owner_repo_from_remote(repo_path: &std::path::Path) -> Option<(String, String)> {
let repo = git2::Repository::open(repo_path).ok()?;
let remote = repo.find_remote("origin").ok()?;
let url = remote.url()?;
extract_owner_repo_from_url(url)
}
fn extract_owner_repo_from_url(url: &str) -> Option<(String, String)> {
let cleaned = url.strip_suffix(".git").unwrap_or(url);
if let Some(rest) = cleaned.strip_prefix("git@github.com:") {
return split_owner_repo(rest);
}
for prefix in [
"https://github.com/",
"http://github.com/",
"ssh://git@github.com/",
] {
if let Some(rest) = cleaned.strip_prefix(prefix) {
return split_owner_repo(rest);
}
}
if let Some(after_scheme) = cleaned.strip_prefix("https://") {
if let Some(at_idx) = after_scheme.find('@') {
let after_at = &after_scheme[at_idx + 1..];
if let Some(rest) = after_at.strip_prefix("github.com/") {
return split_owner_repo(rest);
}
}
}
None
}
fn split_owner_repo(rest: &str) -> Option<(String, String)> {
let mut parts = rest.splitn(3, '/');
let owner = parts.next()?;
let name = parts.next()?;
if owner.is_empty() || name.is_empty() {
return None;
}
Some((owner.to_string(), name.to_string()))
}
pub fn resolve_github_repos(
github: &GithubConfig,
repositories: &[RepositoryConfig],
) -> Vec<(String, String)> {
if let Some(slug) = &github.repo {
if let Ok(pair) = parse_slug(slug) {
return vec![pair];
} else {
tracing::warn!(slug = %slug, "github.repo is malformed; falling back to repositories[]");
}
}
let mut out: Vec<(String, String)> = Vec::new();
let mut seen: std::collections::HashSet<(String, String)> = std::collections::HashSet::new();
for repo_cfg in repositories {
let repo_name = repo_cfg
.name
.clone()
.or_else(|| {
repo_cfg
.path
.file_name()
.and_then(|n| n.to_str())
.map(str::to_string)
})
.unwrap_or_default();
let owner_from_cfg = repo_cfg.org.clone().or_else(|| github.org.clone());
let pair = if let Some(owner) = &owner_from_cfg {
if repo_name.is_empty() {
owner_repo_from_remote(&repo_cfg.path)
} else {
Some((owner.clone(), repo_name.clone()))
}
} else {
owner_repo_from_remote(&repo_cfg.path)
};
if let Some(p) = pair {
if seen.insert(p.clone()) {
out.push(p);
}
} else {
debug!(
path = %repo_cfg.path.display(),
"could not resolve owner/repo for repository; skipping for GitHub PR fetch"
);
}
}
out
}
impl GitHubClient {
pub fn new(config: &GithubConfig) -> Result<Self> {
let repo_slug = config
.repo
.as_ref()
.ok_or_else(|| CollectError::Config("github.repo is required (owner/name)".into()))?;
let (owner, repo) = parse_slug(repo_slug)?;
let http = build_http_client(config)?;
Ok(Self {
client: http,
token: config.token.clone(),
owner: owner.clone(),
repo: repo.clone(),
repos: vec![(owner, repo)],
})
}
pub fn new_for_prs(config: &GithubConfig, repos: Vec<(String, String)>) -> Result<Self> {
if repos.is_empty() {
return Err(CollectError::Config(
"GitHubClient::new_for_prs requires at least one (owner, repo)".into(),
));
}
let (primary_owner, primary_repo) = repos[0].clone();
let http = build_http_client(config)?;
Ok(Self {
client: http,
token: config.token.clone(),
owner: primary_owner,
repo: primary_repo,
repos,
})
}
pub fn new_for_reviews(config: &GithubConfig) -> Result<Self> {
let http = build_http_client(config)?;
Ok(Self {
client: http,
token: config.token.clone(),
owner: String::new(),
repo: String::new(),
repos: Vec::new(),
})
}
pub async fn fetch_pull_requests(&self) -> Result<Vec<PullRequest>> {
let mut out: Vec<PullRequest> = Vec::new();
for (owner, repo) in &self.repos {
match self.fetch_pull_requests_for_repo(owner, repo).await {
Ok(mut prs) => out.append(&mut prs),
Err(e) => {
warn!(
owner = %owner,
repo = %repo,
error = %e,
"GitHub PR fetch failed for repo; continuing with remaining repos"
);
}
}
}
Ok(out)
}
async fn fetch_pull_requests_for_repo(
&self,
owner: &str,
repo: &str,
) -> Result<Vec<PullRequest>> {
let mut out: Vec<PullRequest> = Vec::new();
let mut page = 1u32;
loop {
let url = format!(
"{GITHUB_API_BASE}/repos/{owner}/{repo}/pulls?state=all&per_page={PAGE_SIZE}&page={page}"
);
debug!(url = %url, "GET");
let resp = self.retry_request(&url).await?;
if let Some(rem) = resp
.headers()
.get("x-ratelimit-remaining")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u32>().ok())
{
if rem < 5 {
warn!(remaining = rem, "GitHub rate limit nearly exhausted");
}
}
let resp = resp.error_for_status()?;
let pulls: Vec<ApiPull> = resp.json().await?;
if pulls.is_empty() {
break;
}
let n = pulls.len();
for p in pulls {
let state = if p.merged_at.is_some() {
PrState::Merged
} else if p.state == "closed" {
PrState::Closed
} else {
PrState::Open
};
let commit_shas = commit_shas_for_pull(&p)?;
out.push(PullRequest {
id: 0,
pr_number: p.number,
repository: format!("{owner}/{repo}"),
title: p.title,
author: p.user.map(|u| u.login).unwrap_or_default(),
state,
created_at: p.created_at,
merged_at: p.merged_at,
commit_shas,
});
}
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
pub fn store_pull_requests(
&self,
db: &Database,
prs: &[PullRequest],
) -> crate::core::Result<usize> {
let conn = db.connection();
let mut count = 0usize;
for pr in prs {
conn.execute(
"INSERT INTO pull_requests \
(provider,repository,pr_number,title,author,state,created_at,merged_at,commit_shas) \
VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9) \
ON CONFLICT(provider,repository,pr_number) DO UPDATE SET \
title=excluded.title,author=excluded.author,state=excluded.state,\
merged_at=excluded.merged_at,commit_shas=excluded.commit_shas",
params![
"github",
pr.repository,
pr.pr_number as i64,
pr.title,
pr.author,
pr.state.as_str(),
pr.created_at.to_rfc3339(),
pr.merged_at.map(|t| t.to_rfc3339()),
pr.commit_shas,
],
)?;
count += 1;
}
Ok(count)
}
pub fn has_token(&self) -> bool {
self.token.is_some()
}
pub async fn fetch_issue(&self, number: u64) -> Result<Option<GitHubIssue>> {
let url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/issues/{number}",
self.owner, self.repo
);
debug!(url = %url, "GET");
let resp = self.client.get(&url).send().await?;
if resp.status() == reqwest::StatusCode::NOT_FOUND {
return Ok(None);
}
let resp = resp.error_for_status()?;
let issue: GitHubIssue = resp.json().await?;
Ok(Some(issue))
}
async fn retry_request(&self, url: &str) -> Result<reqwest::Response> {
retry_get(&self.client, url).await
}
pub async fn fetch_pr_reviews_for_repo(
&self,
owner: &str,
repo: &str,
pr_number: u64,
) -> Result<Vec<GitHubReview>> {
let mut out = Vec::new();
let mut page = 1u32;
loop {
let url = format!(
"{GITHUB_API_BASE}/repos/{owner}/{repo}/pulls/{pr_number}/reviews?per_page={PAGE_SIZE}&page={page}"
);
let resp = self.retry_request(&url).await?.error_for_status()?;
let batch: Vec<GitHubReview> = resp.json().await?;
let n = batch.len();
out.extend(batch);
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
pub fn http_client(&self) -> &reqwest::Client {
&self.client
}
pub async fn fetch_pr_commits(&self, pr_number: u64) -> Result<Vec<GitHubPrCommit>> {
let mut out = Vec::new();
let mut page = 1u32;
loop {
let url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/pulls/{pr_number}/commits?per_page={PAGE_SIZE}&page={page}",
self.owner, self.repo
);
let resp = self.retry_request(&url).await?.error_for_status()?;
let batch: Vec<GitHubPrCommit> = resp.json().await?;
let n = batch.len();
out.extend(batch);
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
pub async fn list_issues(&self, state: &str, since: Option<&str>) -> Result<Vec<GitHubIssue>> {
let mut out = Vec::new();
let mut page = 1u32;
loop {
let mut url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/issues?state={state}&per_page={PAGE_SIZE}&page={page}",
self.owner, self.repo
);
if let Some(s) = since {
url.push_str("&since=");
url.push_str(s);
}
let resp = self.retry_request(&url).await?.error_for_status()?;
let batch: Vec<GitHubIssue> = resp.json().await?;
let n = batch.len();
out.extend(batch);
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
}
#[async_trait]
impl PrProvider for GitHubClient {
fn name(&self) -> &str {
"github"
}
async fn fetch_pull_requests(&self) -> Result<Vec<PullRequest>> {
GitHubClient::fetch_pull_requests(self).await
}
fn store_pull_requests(
&self,
db: &Database,
prs: &[PullRequest],
) -> crate::core::Result<usize> {
GitHubClient::store_pull_requests(self, db, prs)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn github_issue_deserializes_full_payload() {
let json = r#"{
"number": 42,
"title": "Crash on startup",
"state": "open",
"html_url": "https://github.com/o/r/issues/42",
"labels": [
{"name": "bug"},
{"name": "high-priority"}
],
"body": "Stack trace: ..."
}"#;
let issue: GitHubIssue = serde_json::from_str(json).expect("parses");
assert_eq!(issue.number, 42);
assert_eq!(issue.title, "Crash on startup");
assert_eq!(issue.state, "open");
assert_eq!(issue.html_url, "https://github.com/o/r/issues/42");
assert_eq!(issue.labels.len(), 2);
assert_eq!(issue.labels[0].name, "bug");
assert_eq!(issue.labels[1].name, "high-priority");
assert_eq!(issue.body.as_deref(), Some("Stack trace: ..."));
}
#[test]
fn github_review_deserializes() {
let json = r#"{
"id": 12345,
"state": "APPROVED",
"user": {"login": "octocat"},
"submitted_at": "2024-01-01T00:00:00Z"
}"#;
let r: GitHubReview = serde_json::from_str(json).expect("parses");
assert_eq!(r.id, 12345);
assert_eq!(r.state, "APPROVED");
assert_eq!(r.user.as_ref().map(|u| u.login.as_str()), Some("octocat"));
assert_eq!(r.submitted_at.as_deref(), Some("2024-01-01T00:00:00Z"));
let pending = r#"{"id": 1, "state": "PENDING"}"#;
let r2: GitHubReview = serde_json::from_str(pending).expect("parses pending");
assert!(r2.user.is_none());
assert!(r2.submitted_at.is_none());
}
#[test]
fn github_pr_commit_deserializes() {
let json = r#"{
"sha": "deadbeefcafebabe",
"commit": {
"message": "feat: do the thing",
"author": {
"name": "Ada Lovelace",
"email": "ada@example.com",
"date": "2024-01-01T00:00:00Z"
}
}
}"#;
let c: GitHubPrCommit = serde_json::from_str(json).expect("parses");
assert_eq!(c.sha, "deadbeefcafebabe");
assert_eq!(c.commit.message, "feat: do the thing");
let author = c.commit.author.expect("author present");
assert_eq!(author.name, "Ada Lovelace");
assert_eq!(author.email, "ada@example.com");
assert_eq!(author.date.as_deref(), Some("2024-01-01T00:00:00Z"));
}
#[test]
fn github_issue_tolerates_missing_optional_fields() {
let json = r#"{
"number": 7,
"title": "Q",
"state": "closed",
"html_url": "https://github.com/o/r/issues/7"
}"#;
let issue: GitHubIssue = serde_json::from_str(json).expect("parses");
assert_eq!(issue.number, 7);
assert!(issue.labels.is_empty());
assert!(issue.body.is_none());
}
use std::path::PathBuf;
use crate::core::config::RepositoryConfig;
fn gh(repo: Option<&str>, org: Option<&str>) -> GithubConfig {
GithubConfig {
token: None,
org: org.map(str::to_string),
orgs: vec![],
repo: repo.map(str::to_string),
fetch_prs: true,
fetch_pr_reviews: true,
review_fetch_concurrency: 1,
ticket_regex: None,
}
}
fn repo_cfg(path: &str, name: Option<&str>, org: Option<&str>) -> RepositoryConfig {
RepositoryConfig {
path: PathBuf::from(path),
name: name.map(str::to_string),
org: org.map(str::to_string),
..Default::default()
}
}
#[test]
fn resolve_github_repos_single_repo_mode() {
let cfg = gh(Some("acme/widget"), None);
let repos = resolve_github_repos(&cfg, &[]);
assert_eq!(repos, vec![("acme".to_string(), "widget".to_string())]);
}
#[test]
fn resolve_github_repos_org_mode_uses_path_basename() {
let cfg = gh(None, Some("acme"));
let repos = vec![
repo_cfg("/tmp/widget", None, None),
repo_cfg("/tmp/gadget", None, None),
];
let resolved = resolve_github_repos(&cfg, &repos);
assert_eq!(
resolved,
vec![
("acme".to_string(), "widget".to_string()),
("acme".to_string(), "gadget".to_string()),
]
);
}
#[test]
fn resolve_github_repos_per_repo_org_overrides() {
let cfg = gh(None, Some("default-org"));
let repos = vec![
repo_cfg("/tmp/alpha", None, Some("specific-org")),
repo_cfg("/tmp/beta", None, None),
];
let resolved = resolve_github_repos(&cfg, &repos);
assert_eq!(
resolved,
vec![
("specific-org".to_string(), "alpha".to_string()),
("default-org".to_string(), "beta".to_string()),
]
);
}
#[test]
fn resolve_github_repos_uses_explicit_name() {
let cfg = gh(None, Some("acme"));
let repos = vec![repo_cfg(
"/tmp/some-random-clone-dir",
Some("real-repo-name"),
None,
)];
let resolved = resolve_github_repos(&cfg, &repos);
assert_eq!(
resolved,
vec![("acme".to_string(), "real-repo-name".to_string())]
);
}
#[test]
fn resolve_github_repos_returns_empty_when_unresolvable() {
let cfg = gh(None, None);
let repos = vec![repo_cfg("/tmp/no-such-clone", None, None)];
let resolved = resolve_github_repos(&cfg, &repos);
assert!(resolved.is_empty(), "got: {resolved:?}");
}
#[test]
fn resolve_github_repos_empty_inputs() {
let cfg = gh(None, None);
let resolved = resolve_github_repos(&cfg, &[]);
assert!(resolved.is_empty());
}
#[test]
fn resolve_github_repos_deduplicates() {
let cfg = gh(None, Some("acme"));
let repos = vec![
repo_cfg("/clone-a/widget", None, None),
repo_cfg("/clone-b/widget", None, None),
];
let resolved = resolve_github_repos(&cfg, &repos);
assert_eq!(resolved, vec![("acme".to_string(), "widget".to_string())]);
}
#[test]
fn new_for_prs_rejects_empty_repos() {
let cfg = gh(None, None);
match GitHubClient::new_for_prs(&cfg, vec![]) {
Ok(_) => panic!("expected error for empty repos"),
Err(CollectError::Config(msg)) => {
assert!(msg.contains("at least one"), "unexpected msg: {msg}")
}
Err(other) => panic!("unexpected error variant: {other:?}"),
}
}
#[test]
fn new_for_reviews_builds_without_dummy_slugs() {
let cfg = gh(None, None);
let client = GitHubClient::new_for_reviews(&cfg).expect("client builds");
assert!(
client.owner.is_empty(),
"owner should be empty for reviews-only client"
);
assert!(
client.repo.is_empty(),
"repo should be empty for reviews-only client"
);
assert!(
client.repos.is_empty(),
"repos should be empty for reviews-only client"
);
}
#[test]
fn new_for_prs_stores_all_repos() {
let cfg = gh(None, Some("acme"));
let client = GitHubClient::new_for_prs(
&cfg,
vec![
("acme".into(), "alpha".into()),
("acme".into(), "beta".into()),
],
)
.expect("client builds");
assert_eq!(client.repos.len(), 2);
assert_eq!(client.owner, "acme");
assert_eq!(client.repo, "alpha");
}
#[test]
fn parse_slug_validates_input() {
assert_eq!(
parse_slug("owner/repo").unwrap(),
("owner".to_string(), "repo".to_string())
);
assert!(parse_slug("no-slash").is_err());
assert!(parse_slug("/repo").is_err());
assert!(parse_slug("owner/").is_err());
}
#[test]
fn extract_owner_repo_from_url_handles_common_forms() {
assert_eq!(
extract_owner_repo_from_url("https://github.com/acme/widget.git"),
Some(("acme".to_string(), "widget".to_string()))
);
assert_eq!(
extract_owner_repo_from_url("https://github.com/acme/widget"),
Some(("acme".to_string(), "widget".to_string()))
);
assert_eq!(
extract_owner_repo_from_url("git@github.com:acme/widget.git"),
Some(("acme".to_string(), "widget".to_string()))
);
assert_eq!(
extract_owner_repo_from_url("ssh://git@github.com/acme/widget.git"),
Some(("acme".to_string(), "widget".to_string()))
);
assert_eq!(
extract_owner_repo_from_url("https://user@github.com/acme/widget"),
Some(("acme".to_string(), "widget".to_string()))
);
assert!(extract_owner_repo_from_url("https://gitlab.com/acme/widget").is_none());
assert!(extract_owner_repo_from_url("nonsense").is_none());
}
#[test]
fn commit_shas_gated_on_merged_at() {
let json = r#"{
"number": 101,
"title": "Open PR",
"user": {"login": "octocat"},
"state": "open",
"created_at": "2024-01-15T10:30:00Z",
"merged_at": null,
"merge_commit_sha": "some-sha"
}"#;
let p: ApiPull = serde_json::from_str(json).expect("parses");
assert!(p.merge_commit_sha.is_some());
assert!(p.merged_at.is_none());
assert_eq!(
commit_shas_for_pull(&p).expect("encodes"),
"[]",
"non-merged PR with a populated SHA must not emit commit_shas",
);
let json = r#"{
"number": 102,
"title": "Closed-no-merge PR",
"user": {"login": "octocat"},
"state": "closed",
"created_at": "2024-01-15T10:30:00Z",
"merged_at": null,
"merge_commit_sha": "some-sha"
}"#;
let p: ApiPull = serde_json::from_str(json).expect("parses");
assert_eq!(
commit_shas_for_pull(&p).expect("encodes"),
"[]",
"closed-without-merge PR must not emit commit_shas",
);
let json = r#"{
"number": 103,
"title": "Merged PR",
"user": {"login": "octocat"},
"state": "closed",
"created_at": "2024-01-15T10:30:00Z",
"merged_at": "2024-01-16T12:00:00Z",
"merge_commit_sha": "some-sha"
}"#;
let p: ApiPull = serde_json::from_str(json).expect("parses");
assert!(p.merged_at.is_some());
assert_eq!(
commit_shas_for_pull(&p).expect("encodes"),
r#"["some-sha"]"#,
"merged PR with a SHA should emit a joinable commit_shas array",
);
let json = r#"{
"number": 104,
"title": "Merged PR missing SHA",
"user": {"login": "octocat"},
"state": "closed",
"created_at": "2024-01-15T10:30:00Z",
"merged_at": "2024-01-16T12:00:00Z",
"merge_commit_sha": null
}"#;
let p: ApiPull = serde_json::from_str(json).expect("parses");
assert_eq!(
commit_shas_for_pull(&p).expect("encodes"),
"[]",
"merged PR without a SHA yields the empty array",
);
}
}