use std::time::Duration;
use chrono::{DateTime, Utc};
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT};
use rusqlite::params;
use serde::Deserialize;
use tracing::{debug, warn};
use crate::collect::errors::{CollectError, Result};
use crate::core::config::GithubConfig;
use crate::core::db::Database;
use crate::core::models::{PrState, PullRequest};
const USER_AGENT_VALUE: &str = "trusty-git-analytics/0.1";
const GITHUB_API_BASE: &str = "https://api.github.com";
const PAGE_SIZE: u32 = 100;
const MAX_RETRIES: u32 = 3;
const RETRY_BASE_MS: u64 = 1000;
pub struct GitHubClient {
client: reqwest::Client,
token: Option<String>,
owner: String,
repo: String,
}
#[derive(Debug, Deserialize)]
struct ApiPull {
number: u64,
title: String,
user: Option<ApiUser>,
state: String,
created_at: DateTime<Utc>,
merged_at: Option<DateTime<Utc>>,
#[serde(default)]
merge_commit_sha: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ApiUser {
login: String,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubIssue {
pub number: u64,
pub title: String,
pub state: String,
pub html_url: String,
#[serde(default)]
pub labels: Vec<GhLabel>,
#[serde(default)]
pub body: Option<String>,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GhLabel {
pub name: String,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GhUser {
pub login: String,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GhAuthor {
pub name: String,
pub email: String,
#[serde(default)]
pub date: Option<String>,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubCommitDetail {
pub message: String,
#[serde(default)]
pub author: Option<GhAuthor>,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubPrCommit {
pub sha: String,
pub commit: GitHubCommitDetail,
}
#[derive(Debug, Clone, Deserialize, serde::Serialize)]
pub struct GitHubReview {
pub id: u64,
pub state: String,
#[serde(default)]
pub user: Option<GhUser>,
#[serde(default)]
pub submitted_at: Option<String>,
}
impl GitHubClient {
pub fn new(config: &GithubConfig) -> Result<Self> {
let repo_slug = config
.repo
.as_ref()
.ok_or_else(|| CollectError::Config("github.repo is required (owner/name)".into()))?;
let (owner, repo) = repo_slug.split_once('/').ok_or_else(|| {
CollectError::Config(format!(
"github.repo must be 'owner/name', got '{repo_slug}'"
))
})?;
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_static(USER_AGENT_VALUE));
headers.insert(
ACCEPT,
HeaderValue::from_static("application/vnd.github+json"),
);
if let Some(token) = &config.token {
let val = HeaderValue::from_str(&format!("Bearer {token}"))
.map_err(|e| CollectError::Config(format!("invalid token header: {e}")))?;
headers.insert(AUTHORIZATION, val);
}
let client = reqwest::Client::builder()
.default_headers(headers)
.timeout(std::time::Duration::from_secs(30))
.build()?;
Ok(Self {
client,
token: config.token.clone(),
owner: owner.to_string(),
repo: repo.to_string(),
})
}
pub async fn fetch_pull_requests(&self) -> Result<Vec<PullRequest>> {
let mut out: Vec<PullRequest> = Vec::new();
let mut page = 1u32;
loop {
let url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/pulls?state=all&per_page={PAGE_SIZE}&page={page}",
self.owner, self.repo
);
debug!(url = %url, "GET");
let resp = self.retry_request(&url).await?;
if let Some(rem) = resp
.headers()
.get("x-ratelimit-remaining")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u32>().ok())
{
if rem < 5 {
warn!(remaining = rem, "GitHub rate limit nearly exhausted");
}
}
let resp = resp.error_for_status()?;
let pulls: Vec<ApiPull> = resp.json().await?;
if pulls.is_empty() {
break;
}
let n = pulls.len();
for p in pulls {
let state = if p.merged_at.is_some() {
PrState::Merged
} else if p.state == "closed" {
PrState::Closed
} else {
PrState::Open
};
let commit_shas = match &p.merge_commit_sha {
Some(s) => serde_json::to_string(&vec![s.clone()])?,
None => "[]".to_string(),
};
out.push(PullRequest {
id: 0,
pr_number: p.number,
title: p.title,
author: p.user.map(|u| u.login).unwrap_or_default(),
state,
created_at: p.created_at,
merged_at: p.merged_at,
commit_shas,
});
}
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
pub fn store_pull_requests(
&self,
db: &Database,
prs: &[PullRequest],
) -> crate::core::Result<usize> {
let conn = db.connection();
let mut count = 0usize;
for pr in prs {
conn.execute(
"INSERT OR REPLACE INTO pull_requests \
(provider, pr_number, title, author, state, created_at, merged_at, commit_shas) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
params![
"github",
pr.pr_number as i64,
pr.title,
pr.author,
pr.state.as_str(),
pr.created_at.to_rfc3339(),
pr.merged_at.map(|t| t.to_rfc3339()),
pr.commit_shas,
],
)?;
count += 1;
}
Ok(count)
}
pub fn has_token(&self) -> bool {
self.token.is_some()
}
pub async fn fetch_issue(&self, number: u64) -> Result<Option<GitHubIssue>> {
let url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/issues/{number}",
self.owner, self.repo
);
debug!(url = %url, "GET");
let resp = self.client.get(&url).send().await?;
if resp.status() == reqwest::StatusCode::NOT_FOUND {
return Ok(None);
}
let resp = resp.error_for_status()?;
let issue: GitHubIssue = resp.json().await?;
Ok(Some(issue))
}
async fn retry_request(&self, url: &str) -> Result<reqwest::Response> {
let mut last_err: Option<reqwest::Error> = None;
for attempt in 0..=MAX_RETRIES {
debug!(url = %url, attempt, "GET (with retry)");
match self.client.get(url).send().await {
Ok(resp) => {
let status = resp.status();
let transient =
status.as_u16() == 429 || (500..=599).contains(&status.as_u16());
if !transient || attempt == MAX_RETRIES {
return Ok(resp);
}
let delay = RETRY_BASE_MS * (1u64 << attempt);
warn!(
status = %status,
attempt,
delay_ms = delay,
"GitHub returned transient status; retrying"
);
tokio::time::sleep(Duration::from_millis(delay)).await;
}
Err(e) => {
if attempt == MAX_RETRIES {
return Err(CollectError::Http(e));
}
let delay = RETRY_BASE_MS * (1u64 << attempt);
warn!(error = %e, attempt, delay_ms = delay, "transport error; retrying");
last_err = Some(e);
tokio::time::sleep(Duration::from_millis(delay)).await;
}
}
}
Err(CollectError::Http(
last_err.expect("retry loop preserved error"),
))
}
pub async fn fetch_pr_reviews(&self, pr_number: u64) -> Result<Vec<GitHubReview>> {
let mut out = Vec::new();
let mut page = 1u32;
loop {
let url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/pulls/{pr_number}/reviews?per_page={PAGE_SIZE}&page={page}",
self.owner, self.repo
);
let resp = self.retry_request(&url).await?.error_for_status()?;
let batch: Vec<GitHubReview> = resp.json().await?;
let n = batch.len();
out.extend(batch);
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
pub async fn fetch_pr_commits(&self, pr_number: u64) -> Result<Vec<GitHubPrCommit>> {
let mut out = Vec::new();
let mut page = 1u32;
loop {
let url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/pulls/{pr_number}/commits?per_page={PAGE_SIZE}&page={page}",
self.owner, self.repo
);
let resp = self.retry_request(&url).await?.error_for_status()?;
let batch: Vec<GitHubPrCommit> = resp.json().await?;
let n = batch.len();
out.extend(batch);
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
pub async fn list_issues(&self, state: &str, since: Option<&str>) -> Result<Vec<GitHubIssue>> {
let mut out = Vec::new();
let mut page = 1u32;
loop {
let mut url = format!(
"{GITHUB_API_BASE}/repos/{}/{}/issues?state={state}&per_page={PAGE_SIZE}&page={page}",
self.owner, self.repo
);
if let Some(s) = since {
url.push_str("&since=");
url.push_str(s);
}
let resp = self.retry_request(&url).await?.error_for_status()?;
let batch: Vec<GitHubIssue> = resp.json().await?;
let n = batch.len();
out.extend(batch);
if (n as u32) < PAGE_SIZE {
break;
}
page += 1;
}
Ok(out)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn github_issue_deserializes_full_payload() {
let json = r#"{
"number": 42,
"title": "Crash on startup",
"state": "open",
"html_url": "https://github.com/o/r/issues/42",
"labels": [
{"name": "bug"},
{"name": "high-priority"}
],
"body": "Stack trace: ..."
}"#;
let issue: GitHubIssue = serde_json::from_str(json).expect("parses");
assert_eq!(issue.number, 42);
assert_eq!(issue.title, "Crash on startup");
assert_eq!(issue.state, "open");
assert_eq!(issue.html_url, "https://github.com/o/r/issues/42");
assert_eq!(issue.labels.len(), 2);
assert_eq!(issue.labels[0].name, "bug");
assert_eq!(issue.labels[1].name, "high-priority");
assert_eq!(issue.body.as_deref(), Some("Stack trace: ..."));
}
#[test]
fn github_review_deserializes() {
let json = r#"{
"id": 12345,
"state": "APPROVED",
"user": {"login": "octocat"},
"submitted_at": "2024-01-01T00:00:00Z"
}"#;
let r: GitHubReview = serde_json::from_str(json).expect("parses");
assert_eq!(r.id, 12345);
assert_eq!(r.state, "APPROVED");
assert_eq!(r.user.as_ref().map(|u| u.login.as_str()), Some("octocat"));
assert_eq!(r.submitted_at.as_deref(), Some("2024-01-01T00:00:00Z"));
let pending = r#"{"id": 1, "state": "PENDING"}"#;
let r2: GitHubReview = serde_json::from_str(pending).expect("parses pending");
assert!(r2.user.is_none());
assert!(r2.submitted_at.is_none());
}
#[test]
fn github_pr_commit_deserializes() {
let json = r#"{
"sha": "deadbeefcafebabe",
"commit": {
"message": "feat: do the thing",
"author": {
"name": "Ada Lovelace",
"email": "ada@example.com",
"date": "2024-01-01T00:00:00Z"
}
}
}"#;
let c: GitHubPrCommit = serde_json::from_str(json).expect("parses");
assert_eq!(c.sha, "deadbeefcafebabe");
assert_eq!(c.commit.message, "feat: do the thing");
let author = c.commit.author.expect("author present");
assert_eq!(author.name, "Ada Lovelace");
assert_eq!(author.email, "ada@example.com");
assert_eq!(author.date.as_deref(), Some("2024-01-01T00:00:00Z"));
}
#[test]
fn github_issue_tolerates_missing_optional_fields() {
let json = r#"{
"number": 7,
"title": "Q",
"state": "closed",
"html_url": "https://github.com/o/r/issues/7"
}"#;
let issue: GitHubIssue = serde_json::from_str(json).expect("parses");
assert_eq!(issue.number, 7);
assert!(issue.labels.is_empty());
assert!(issue.body.is_none());
}
}