use reqwest::header::{HeaderMap, HeaderValue};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use tracing::{debug, info, warn};
use crate::classify::tiers::bedrock::BedrockClassifier;
use crate::classify::tiers::ClassificationResult;
use crate::core::config::{LlmConfig, LlmSource};
use crate::core::models::ClassificationMethod;
const DEFAULT_ENDPOINT: &str = "https://api.openai.com/v1/chat/completions";
const OPENROUTER_ENDPOINT: &str = "https://openrouter.ai/api/v1/chat/completions";
const OPENROUTER_REFERER: &str = "https://github.com/bobmatnyc/trusty-git-analytics";
const OPENROUTER_TITLE: &str = "trusty-git-analytics";
const ANTHROPIC_ENDPOINT: &str = "https://api.anthropic.com/v1/messages";
const ANTHROPIC_API_VERSION: &str = "2023-06-01";
pub const ANTHROPIC_DEFAULT_MODEL: &str = "claude-3-5-haiku-latest";
pub const SYSTEM_PROMPT: &str = "You are a git commit classifier. Respond with ONLY a JSON \
object: {\"category\": \"feature|bugfix|chore|documentation|refactor|test|ci|performance|style|build|revert|merge|breaking|uncategorized\", \
\"subcategory\": \"optional string or null\", \"confidence\": 0.0-1.0, \
\"complexity\": <integer 1-5>}. \
Complexity 1-5: \
1=trivial (config/version bump/typo), 2=simple (single-file bugfix), \
3=moderate (multi-file feature), 4=complex (cross-module/arch change), \
5=highly complex (system design/major refactor). \
No prose, no markdown. \
Example: {\"category\": \"bugfix\", \"subcategory\": \"null-check\", \
\"confidence\": 0.9, \"complexity\": 2}";
pub struct LlmClassifier {
client: Client,
model: String,
api_key: Option<String>,
endpoint: String,
extra_headers: HeaderMap,
bedrock: Option<BedrockClassifier>,
use_anthropic_format: bool,
}
impl LlmClassifier {
pub fn new(model: &str, api_key: Option<String>) -> Self {
Self {
client: Client::new(),
model: model.to_string(),
api_key,
endpoint: DEFAULT_ENDPOINT.to_string(),
extra_headers: HeaderMap::new(),
bedrock: None,
use_anthropic_format: false,
}
}
pub fn build_anthropic(model: &str, api_key: Option<String>) -> Self {
let mut headers = HeaderMap::new();
if let Ok(v) = HeaderValue::from_str(ANTHROPIC_API_VERSION) {
headers.insert("anthropic-version", v);
}
Self {
client: Client::new(),
model: model.to_string(),
api_key,
endpoint: ANTHROPIC_ENDPOINT.to_string(),
extra_headers: headers,
bedrock: None,
use_anthropic_format: true,
}
}
pub fn from_provider(
provider: &str,
model: &str,
openrouter_api_key: Option<String>,
) -> Result<Self, String> {
let normalized = provider.trim().to_ascii_lowercase();
match normalized.as_str() {
"openrouter" => Ok(Self::build_openrouter(model, openrouter_api_key)),
"openai" => Ok(Self::new(model, std::env::var("OPENAI_API_KEY").ok())),
"bedrock" => {
info!(model, "LLM provider: bedrock (requested via sync path)");
#[cfg(feature = "bedrock")]
{
Err("bedrock provider requires the async constructor; use \
LlmClassifier::from_provider_async"
.to_string())
}
#[cfg(not(feature = "bedrock"))]
{
let _ = model;
Err(
"bedrock feature not compiled in — rebuild with --features bedrock"
.to_string(),
)
}
}
"auto" | "" => {
let or_key =
openrouter_api_key.or_else(|| std::env::var("OPENROUTER_API_KEY").ok());
if or_key.is_some() {
info!("LLM provider auto-selected: openrouter");
Ok(Self::build_openrouter(model, or_key))
} else {
info!("LLM provider auto-selected: openai");
Ok(Self::new(model, std::env::var("OPENAI_API_KEY").ok()))
}
}
other => {
warn!(
provider = %other,
"unknown LLM provider; falling back to OpenAI endpoint"
);
Ok(Self::new(model, std::env::var("OPENAI_API_KEY").ok()))
}
}
}
pub async fn from_provider_async(
provider: &str,
model: &str,
openrouter_api_key: Option<String>,
) -> Result<Self, String> {
if provider.trim().eq_ignore_ascii_case("bedrock") {
info!(model, "LLM provider: bedrock (async init)");
let bedrock = BedrockClassifier::new(model).await?;
return Ok(Self {
client: Client::new(),
model: model.to_string(),
api_key: None,
endpoint: String::new(),
extra_headers: HeaderMap::new(),
bedrock: Some(bedrock),
use_anthropic_format: false,
});
}
Self::from_provider(provider, model, openrouter_api_key)
}
pub async fn from_llm_config(cfg: &LlmConfig, model: &str) -> Result<Self, String> {
match &cfg.source {
LlmSource::Openrouter => {
let key = std::env::var(&cfg.api_key_env)
.ok()
.filter(|k| !k.is_empty());
if key.is_none() {
return Err(format!(
"LLM source 'openrouter' requires an API key but the environment \
variable '{}' (set via llm.api_key_env) is not set or empty. \
Export the variable with your OpenRouter API key before running tga.",
cfg.api_key_env
));
}
info!(
model,
api_key_env = %cfg.api_key_env,
"LLM provider: openrouter (from llm: config section)"
);
Ok(Self::build_openrouter(model, key))
}
LlmSource::Bedrock => {
info!(
model,
region = ?cfg.region,
"LLM provider: bedrock (from llm: config section)"
);
let bedrock = BedrockClassifier::with_region(model, cfg.region.as_deref()).await?;
Ok(Self {
client: Client::new(),
model: model.to_string(),
api_key: None,
endpoint: String::new(),
extra_headers: HeaderMap::new(),
bedrock: Some(bedrock),
use_anthropic_format: false,
})
}
LlmSource::AnthropicApi => {
let key = std::env::var(&cfg.api_key_env)
.ok()
.filter(|k| !k.is_empty());
if key.is_none() {
return Err(format!(
"LLM source 'anthropic-api' requires an API key but the environment \
variable '{}' (set via llm.api_key_env) is not set or empty. \
Export the variable with your Anthropic API key before running tga. \
Example: export {}=sk-ant-...", cfg.api_key_env, cfg.api_key_env
));
}
let effective_model = if model == "gpt-4o-mini" {
ANTHROPIC_DEFAULT_MODEL
} else {
model
};
info!(
model = effective_model,
api_key_env = %cfg.api_key_env,
"LLM provider: anthropic-api (direct Anthropic Messages API)"
);
Ok(Self::build_anthropic(effective_model, key))
}
}
}
fn build_openrouter(model: &str, api_key: Option<String>) -> Self {
let key = api_key.or_else(|| std::env::var("OPENROUTER_API_KEY").ok());
let mut headers = HeaderMap::new();
headers.insert("HTTP-Referer", HeaderValue::from_static(OPENROUTER_REFERER));
headers.insert("X-Title", HeaderValue::from_static(OPENROUTER_TITLE));
Self {
client: Client::new(),
model: model.to_string(),
api_key: key,
endpoint: OPENROUTER_ENDPOINT.to_string(),
extra_headers: headers,
bedrock: None,
use_anthropic_format: false,
}
}
pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
self.endpoint = endpoint.into();
self
}
pub fn has_api_key(&self) -> bool {
self.bedrock.is_some() || self.api_key.is_some()
}
pub async fn classify(&self, message: &str) -> Option<ClassificationResult> {
if let Some(bedrock) = &self.bedrock {
return bedrock
.classify_batch_bedrock(&[message])
.await
.into_iter()
.next()
.flatten();
}
if self.use_anthropic_format {
return self.classify_anthropic(message).await;
}
self.classify_openai_compat(message).await
}
async fn classify_anthropic(&self, message: &str) -> Option<ClassificationResult> {
let api_key = self.api_key.as_deref()?;
let body = AnthropicRequest {
model: &self.model,
max_tokens: 512,
system: SYSTEM_PROMPT,
messages: vec![AnthropicMessage {
role: "user",
content: format!("Classify this commit message:\n\n{message}"),
}],
};
let response = match self
.client
.post(&self.endpoint)
.header("x-api-key", api_key)
.headers(self.extra_headers.clone())
.json(&body)
.send()
.await
{
Ok(r) => r,
Err(e) => {
warn!(error = %e, "Anthropic API request failed");
return None;
}
};
if !response.status().is_success() {
warn!(status = %response.status(), "Anthropic API returned non-success status");
return None;
}
let parsed: AnthropicResponse = match response.json().await {
Ok(j) => j,
Err(e) => {
warn!(error = %e, "Anthropic API response JSON decode failed");
return None;
}
};
let content = parsed
.content
.into_iter()
.find(|c| c.kind == "text")
.and_then(|c| c.text)?;
debug!(content = %content, "Anthropic API raw response");
let verdict: LlmVerdict = serde_json::from_str(content.trim())
.map_err(|e| warn!(error = %e, "Anthropic API JSON parse failed"))
.ok()?;
Some(ClassificationResult {
category: verdict.category,
subcategory: verdict.subcategory,
top_level: None, confidence: verdict.confidence.clamp(0.0, 1.0),
method: ClassificationMethod::LlmFallback,
ticket_id: None,
complexity: verdict.complexity.map(|v| v.clamp(1, 5)),
})
}
async fn classify_openai_compat(&self, message: &str) -> Option<ClassificationResult> {
let api_key = self.api_key.as_deref()?;
let body = ChatRequest {
model: &self.model,
messages: vec![
ChatMessage {
role: "system",
content: SYSTEM_PROMPT.to_string(),
},
ChatMessage {
role: "user",
content: format!("Classify this commit message:\n\n{message}"),
},
],
temperature: 0.0,
response_format: Some(ResponseFormat {
kind: "json_object".to_string(),
}),
};
let response = match self
.client
.post(&self.endpoint)
.bearer_auth(api_key)
.headers(self.extra_headers.clone())
.json(&body)
.send()
.await
{
Ok(r) => r,
Err(e) => {
warn!(error = %e, "LLM request failed");
return None;
}
};
if !response.status().is_success() {
warn!(status = %response.status(), "LLM returned non-success status");
return None;
}
let parsed: ChatResponse = match response.json().await {
Ok(j) => j,
Err(e) => {
warn!(error = %e, "LLM response JSON decode failed");
return None;
}
};
let content = parsed.choices.first()?.message.content.clone();
debug!(content = %content, "LLM raw response");
let verdict: LlmVerdict = serde_json::from_str(&content)
.map_err(|e| warn!(error = %e, "LLM JSON parse failed"))
.ok()?;
Some(ClassificationResult {
category: verdict.category,
subcategory: verdict.subcategory,
top_level: None, confidence: verdict.confidence.clamp(0.0, 1.0),
method: ClassificationMethod::LlmFallback,
ticket_id: None,
complexity: verdict.complexity.map(|v| v.clamp(1, 5)),
})
}
}
#[derive(Serialize)]
struct ChatRequest<'a> {
model: &'a str,
messages: Vec<ChatMessage>,
temperature: f64,
#[serde(skip_serializing_if = "Option::is_none")]
response_format: Option<ResponseFormat>,
}
#[derive(Serialize)]
struct ChatMessage {
role: &'static str,
content: String,
}
#[derive(Serialize)]
struct ResponseFormat {
#[serde(rename = "type")]
kind: String,
}
#[derive(Deserialize)]
struct ChatResponse {
choices: Vec<ChatChoice>,
}
#[derive(Deserialize)]
struct ChatChoice {
message: ChatChoiceMessage,
}
#[derive(Deserialize)]
struct ChatChoiceMessage {
content: String,
}
#[derive(Serialize)]
struct AnthropicRequest<'a> {
model: &'a str,
max_tokens: u32,
system: &'a str,
messages: Vec<AnthropicMessage>,
}
#[derive(Serialize)]
struct AnthropicMessage {
role: &'static str,
content: String,
}
#[derive(Deserialize)]
struct AnthropicResponse {
content: Vec<AnthropicContent>,
}
#[derive(Deserialize)]
struct AnthropicContent {
#[serde(rename = "type")]
kind: String,
text: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct LlmVerdict {
pub category: String,
#[serde(default)]
pub subcategory: Option<String>,
#[serde(default = "default_confidence")]
pub confidence: f64,
#[serde(default)]
pub complexity: Option<u8>,
}
pub fn default_confidence() -> f64 {
0.5
}
#[cfg(test)]
mod tests {
use super::*;
use wiremock::matchers::{header, method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
#[test]
fn has_api_key_reflects_key_state() {
let with_key = LlmClassifier::new("gpt-4o-mini", Some("sk-test".to_string()));
assert!(with_key.has_api_key());
let without_key = LlmClassifier::new("gpt-4o-mini", None);
assert!(!without_key.has_api_key());
}
#[test]
fn build_anthropic_sets_format_flag_and_version_header() {
let llm = LlmClassifier::build_anthropic(
"claude-3-5-haiku-latest",
Some("sk-ant-test".to_string()), );
assert!(llm.use_anthropic_format, "must set use_anthropic_format");
assert!(llm.api_key.is_some(), "api_key must be set");
assert_eq!(llm.endpoint, ANTHROPIC_ENDPOINT);
assert!(
llm.extra_headers.contains_key("anthropic-version"),
"anthropic-version header must be set"
);
let ver = llm.extra_headers.get("anthropic-version").unwrap();
assert_eq!(ver, ANTHROPIC_API_VERSION);
}
#[test]
fn build_anthropic_without_key_has_no_api_key() {
let llm = LlmClassifier::build_anthropic("claude-3-5-haiku-latest", None);
assert!(!llm.has_api_key());
}
#[tokio::test]
async fn anthropic_response_parsing() {
let server = MockServer::start().await;
let body = serde_json::json!({
"content": [
{
"type": "text",
"text": "{\"category\":\"bugfix\",\"subcategory\":\"null-check\",\"confidence\":0.92,\"complexity\":2}"
}
]
});
Mock::given(method("POST"))
.and(path("/v1/messages"))
.respond_with(ResponseTemplate::new(200).set_body_json(body))
.mount(&server)
.await;
let llm = LlmClassifier::build_anthropic(
"claude-3-5-haiku-latest",
Some("sk-ant-test".to_string()), )
.with_endpoint(format!("{}/v1/messages", server.uri()));
let r = llm
.classify("fix: handle null in user endpoint")
.await
.expect("verdict");
assert_eq!(r.category, "bugfix");
assert_eq!(r.subcategory.as_deref(), Some("null-check"));
assert!((r.confidence - 0.92).abs() < 1e-6);
assert_eq!(r.complexity, Some(2));
assert_eq!(r.method, ClassificationMethod::LlmFallback);
}
#[tokio::test]
async fn anthropic_api_request_sets_correct_headers() {
let server = MockServer::start().await;
let body = serde_json::json!({
"content": [
{
"type": "text",
"text": "{\"category\":\"chore\",\"subcategory\":null,\"confidence\":0.8,\"complexity\":1}"
}
]
});
Mock::given(method("POST"))
.and(path("/v1/messages"))
.and(header("x-api-key", "sk-ant-test")) .and(header("anthropic-version", ANTHROPIC_API_VERSION))
.respond_with(ResponseTemplate::new(200).set_body_json(body))
.mount(&server)
.await;
let llm = LlmClassifier::build_anthropic(
"claude-3-5-haiku-latest",
Some("sk-ant-test".to_string()), )
.with_endpoint(format!("{}/v1/messages", server.uri()));
let r = llm
.classify("chore: bump version")
.await
.expect("verdict with correct headers");
assert_eq!(r.category, "chore");
}
#[tokio::test]
async fn from_llm_config_anthropic_api_reads_api_key_env() {
let var_name = "TGA_TEST_ANTHROPIC_KEY_9f2e";
std::env::set_var(var_name, "sk-ant-from-env");
let cfg = crate::core::config::LlmConfig {
source: LlmSource::AnthropicApi,
api_key_env: var_name.to_string(),
region: None,
model: Some("claude-3-5-haiku-latest".to_string()),
};
let result = LlmClassifier::from_llm_config(&cfg, "claude-3-5-haiku-latest").await;
std::env::remove_var(var_name);
let llm = result.expect("should build from env var");
assert!(llm.has_api_key());
assert!(llm.use_anthropic_format);
}
#[tokio::test]
async fn from_llm_config_anthropic_api_missing_key_errors() {
let var_name = "TGA_TEST_ANTHROPIC_MISSING_KEY_7c4b";
std::env::remove_var(var_name);
let cfg = crate::core::config::LlmConfig {
source: LlmSource::AnthropicApi,
api_key_env: var_name.to_string(),
region: None,
model: None,
};
let result = LlmClassifier::from_llm_config(&cfg, "gpt-4o-mini").await;
assert!(result.is_err(), "missing env var must produce Err");
let err = result.err().expect("just asserted is_err");
assert!(
err.contains(var_name),
"error must name the missing var: {err}"
);
}
#[tokio::test]
async fn anthropic_default_model_used_when_none_configured() {
let var_name = "TGA_TEST_ANTHROPIC_DEFAULT_MODEL_3a8d";
std::env::set_var(var_name, "sk-ant-test-model");
let cfg = crate::core::config::LlmConfig {
source: LlmSource::AnthropicApi,
api_key_env: var_name.to_string(),
region: None,
model: None, };
let result = LlmClassifier::from_llm_config(&cfg, "gpt-4o-mini").await;
std::env::remove_var(var_name);
let llm = result.expect("build from env");
assert_eq!(
llm.model, ANTHROPIC_DEFAULT_MODEL,
"must substitute ANTHROPIC_DEFAULT_MODEL, not gpt-4o-mini"
);
}
#[test]
fn llm_section_presence_self_enables_tier() {
let cfg = crate::core::config::Config {
llm: Some(crate::core::config::LlmConfig {
source: LlmSource::AnthropicApi,
api_key_env: "ANTHROPIC_ANALYTICS_API_KEY".to_string(), region: None,
model: Some("claude-3-5-haiku-latest".to_string()),
}),
classification: None,
..crate::core::config::Config::default()
};
let use_llm = cfg.llm.is_some()
|| cfg
.classification
.as_ref()
.map(|c| c.use_llm)
.unwrap_or(false);
assert!(
use_llm,
"llm: section presence must self-enable the LLM tier"
);
}
#[tokio::test]
async fn classify_returns_none_without_api_key() {
let llm = LlmClassifier::new("gpt-4o-mini", None);
assert!(llm.classify("feat: anything").await.is_none());
}
#[tokio::test]
async fn classify_does_not_set_ticket_id() {
let server = MockServer::start().await;
let body = serde_json::json!({
"choices": [{
"message": {
"content": "{\"category\": \"bugfix\", \
\"subcategory\": null, \
\"confidence\": 0.8}"
}
}]
});
Mock::given(method("POST"))
.and(path("/v1/chat/completions"))
.respond_with(ResponseTemplate::new(200).set_body_json(body))
.mount(&server)
.await;
let llm = LlmClassifier::new("gpt-4o-mini", Some("sk-test".to_string()))
.with_endpoint(format!("{}/v1/chat/completions", server.uri()));
let r = llm
.classify("fix: handle null in PROJ-1234 endpoint")
.await
.expect("LLM verdict");
assert_eq!(r.ticket_id, None);
}
#[test]
fn llm_verdict_deserializes_complexity() {
let with: LlmVerdict =
serde_json::from_str(r#"{"category":"feature","confidence":0.9,"complexity":3}"#)
.expect("deserialize verdict with complexity");
assert_eq!(with.complexity, Some(3));
let without: LlmVerdict =
serde_json::from_str(r#"{"category":"feature","confidence":0.9}"#)
.expect("deserialize verdict without complexity");
assert_eq!(without.complexity, None);
}
#[test]
fn system_prompt_requests_complexity() {
assert!(
SYSTEM_PROMPT.contains("complexity"),
"system prompt must instruct the model to return a complexity score"
);
}
#[tokio::test]
async fn classify_dispatches_to_endpoint_when_keyed() {
let server = MockServer::start().await;
let body = serde_json::json!({
"choices": [{
"message": {
"content": "{\"category\": \"feature\", \
\"subcategory\": \"new-auth\", \
\"confidence\": 0.91}"
}
}]
});
Mock::given(method("POST"))
.and(path("/v1/chat/completions"))
.respond_with(ResponseTemplate::new(200).set_body_json(body))
.mount(&server)
.await;
let llm = LlmClassifier::new("gpt-4o-mini", Some("sk-test".to_string()))
.with_endpoint(format!("{}/v1/chat/completions", server.uri()));
let r = llm.classify("chore: bump deps").await.expect("LLM verdict");
assert_eq!(r.category, "feature");
assert_eq!(r.subcategory.as_deref(), Some("new-auth"));
assert!((r.confidence - 0.91).abs() < 1e-6);
assert_eq!(r.method, ClassificationMethod::LlmFallback);
}
}