use std::collections::HashMap;
use regex::Regex;
use serde::Deserialize;
use tracing::warn;
use super::{ExternalSignal, GithubIssuesSourceConfig, EXTERNAL_SOURCE_CONFIDENCE};
#[derive(Debug, Clone, PartialEq)]
pub struct GitHubRef {
pub repo: Option<String>,
pub number: u64,
}
fn bare_ref_regex() -> Regex {
Regex::new(r"(?:^|[\s(])#(\d+)\b").expect("static regex is valid")
}
fn qualified_ref_regex() -> Regex {
Regex::new(r"(?:^|\s)([A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)#(\d+)\b").expect("static regex is valid")
}
pub fn extract_github_refs(message: &str) -> Vec<GitHubRef> {
let mut seen: std::collections::HashSet<(Option<String>, u64)> = Default::default();
let mut out = Vec::new();
let qre = qualified_ref_regex();
for cap in qre.captures_iter(message) {
if let (Some(repo_m), Some(num_m)) = (cap.get(1), cap.get(2)) {
let repo = repo_m.as_str().to_string();
let number: u64 = num_m.as_str().parse().unwrap_or(0);
if number > 0 && seen.insert((Some(repo.clone()), number)) {
out.push(GitHubRef {
repo: Some(repo),
number,
});
}
}
}
let bre = bare_ref_regex();
for cap in bre.captures_iter(message) {
if let Some(num_m) = cap.get(1) {
let number: u64 = num_m.as_str().parse().unwrap_or(0);
if number > 0 && seen.insert((None, number)) {
out.push(GitHubRef { repo: None, number });
}
}
}
out
}
#[derive(Debug, Deserialize)]
pub struct GitHubIssue {
pub number: u64,
#[serde(default)]
pub labels: Vec<GitHubLabel>,
}
#[derive(Debug, Deserialize)]
pub struct GitHubLabel {
pub name: String,
}
pub fn classify_github_issue(
issue: &GitHubIssue,
config: &GithubIssuesSourceConfig,
) -> Option<ExternalSignal> {
for label in &issue.labels {
if let Some(cat) = config.label_mappings.get(label.name.as_str()) {
return Some(ExternalSignal {
category: cat.clone(),
confidence: EXTERNAL_SOURCE_CONFIDENCE,
source: format!("github_issues:label:{}", label.name),
});
}
}
None
}
pub async fn fetch_issue(
client: &reqwest::Client,
config: &GithubIssuesSourceConfig,
owner_repo: &str,
number: u64,
api_base_override: Option<&str>,
) -> Option<GitHubIssue> {
let token = std::env::var(&config.token_env)
.ok()
.filter(|t| !t.is_empty());
let base = api_base_override.unwrap_or("https://api.github.com");
let url = format!("{base}/repos/{owner_repo}/issues/{number}");
let mut req = client.get(&url).header("User-Agent", "tga/1.0");
if let Some(t) = &token {
req = req.bearer_auth(t);
}
match req.send().await {
Ok(resp) if resp.status().is_success() => match resp.json::<GitHubIssue>().await {
Ok(issue) => Some(issue),
Err(e) => {
warn!(owner_repo, number, error = %e, "failed to parse GitHub issue response");
None
}
},
Ok(resp) => {
warn!(
owner_repo,
number,
status = %resp.status(),
"GitHub Issues API returned non-success; skipping"
);
None
}
Err(e) => {
warn!(owner_repo, number, error = %e, "GitHub Issues API request failed; skipping");
None
}
}
}
pub async fn fetch_issues_batch(
client: &reqwest::Client,
config: &GithubIssuesSourceConfig,
refs: &[GitHubRef],
api_base_override: Option<&str>,
) -> HashMap<String, Option<ExternalSignal>> {
let mut out: HashMap<String, Option<ExternalSignal>> = HashMap::new();
for gh_ref in refs {
let repo = gh_ref.repo.as_deref().unwrap_or(config.repo.as_str());
let cache_key = format!("{repo}#{}", gh_ref.number);
if out.contains_key(&cache_key) {
continue;
}
let issue = fetch_issue(client, config, repo, gh_ref.number, api_base_override).await;
let signal = issue.and_then(|iss| classify_github_issue(&iss, config));
out.insert(cache_key, signal);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_github_refs_bare() {
let refs = extract_github_refs("fix: closes #123");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].number, 123);
assert!(refs[0].repo.is_none());
}
#[test]
fn extract_github_refs_qualified() {
let refs = extract_github_refs("see acme/widgets#456 for context");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].number, 456);
assert_eq!(refs[0].repo.as_deref(), Some("acme/widgets"));
}
#[test]
fn extract_github_refs_multiple_and_dedup() {
let refs = extract_github_refs("fixes #10 and closes #20 (see #10 again)");
let numbers: Vec<u64> = refs.iter().map(|r| r.number).collect();
assert_eq!(numbers, vec![10, 20]);
}
#[test]
fn extract_github_refs_ignores_hex_colors() {
let refs = extract_github_refs("color: #ff0000 or #FFF");
assert!(refs.is_empty(), "should not match hex colors, got {refs:?}");
}
#[test]
fn classify_github_issue_matches_label() {
let issue = GitHubIssue {
number: 1,
labels: vec![
GitHubLabel {
name: "bug".to_string(),
},
GitHubLabel {
name: "enhancement".to_string(),
},
],
};
let config = GithubIssuesSourceConfig {
repo: "acme/widgets".to_string(),
token_env: "GITHUB_TOKEN".to_string(),
label_mappings: {
let mut m = HashMap::new();
m.insert("bug".to_string(), "bug_fix".to_string());
m.insert("enhancement".to_string(), "new_feature".to_string());
m
},
};
let signal = classify_github_issue(&issue, &config).expect("should match");
assert_eq!(signal.category, "bug_fix");
assert!(signal.source.contains("bug"));
}
#[test]
fn classify_github_issue_returns_none_on_no_match() {
let issue = GitHubIssue {
number: 2,
labels: vec![GitHubLabel {
name: "wontfix".to_string(),
}],
};
let config = GithubIssuesSourceConfig {
repo: "acme/widgets".to_string(),
token_env: "GITHUB_TOKEN".to_string(),
label_mappings: HashMap::new(),
};
assert!(classify_github_issue(&issue, &config).is_none());
}
}