use std::collections::{HashMap, HashSet};
use std::time::{SystemTime, UNIX_EPOCH};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InferenceTask {
Generate,
Embed,
Classify,
Code,
Reasoning,
}
impl std::fmt::Display for InferenceTask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InferenceTask::Generate => write!(f, "generate"),
InferenceTask::Embed => write!(f, "embed"),
InferenceTask::Classify => write!(f, "classify"),
InferenceTask::Code => write!(f, "code"),
InferenceTask::Reasoning => write!(f, "reasoning"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InferenceOutcome {
pub trace_id: String,
pub model_id: String,
pub task: InferenceTask,
pub routing_reason: String,
pub latency_ms: u64,
pub input_tokens: usize,
pub output_tokens: usize,
pub inferred_outcome: Option<InferredOutcome>,
pub code_outcome: Option<CodeOutcome>,
pub error: Option<String>,
pub timestamp: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InferredOutcome {
Accepted { confidence: f64 },
AcceptedWithEdits { confidence: f64 },
Rejected { confidence: f64 },
Inconclusive,
}
impl InferredOutcome {
pub fn quality_score(&self) -> Option<f64> {
match self {
InferredOutcome::Accepted { confidence } => Some(*confidence),
InferredOutcome::AcceptedWithEdits { confidence } => Some(confidence * 0.7),
InferredOutcome::Rejected { confidence } => Some((1.0 - confidence) * 0.3),
InferredOutcome::Inconclusive => None,
}
}
pub fn is_success(&self) -> Option<bool> {
match self {
InferredOutcome::Accepted { .. } => Some(true),
InferredOutcome::AcceptedWithEdits { .. } => Some(true),
InferredOutcome::Rejected { .. } => Some(false),
InferredOutcome::Inconclusive => None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CodeOutcome {
Applied,
Modified,
Ignored,
SignatureChanged,
BodyModified,
SymbolAdded,
}
impl CodeOutcome {
pub fn quality_score(&self) -> f64 {
match self {
CodeOutcome::Applied => 1.0,
CodeOutcome::SignatureChanged => 0.8,
CodeOutcome::BodyModified => 0.7,
CodeOutcome::SymbolAdded => 0.7,
CodeOutcome::Modified => 0.6,
CodeOutcome::Ignored => 0.1,
}
}
pub fn is_success(&self) -> bool {
!matches!(self, CodeOutcome::Ignored)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TaskStats {
pub calls: u64,
pub successes: u64,
pub failures: u64,
pub avg_latency_ms: f64,
pub ema_quality: f64,
}
impl TaskStats {
pub fn success_rate(&self) -> f64 {
let total = self.successes + self.failures;
if total == 0 {
return 0.5;
} self.successes as f64 / total as f64
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelProfile {
pub model_id: String,
pub total_calls: u64,
pub success_count: u64,
pub fail_count: u64,
pub total_latency_ms: u64,
#[serde(default)]
pub total_input_tokens: u64,
#[serde(default)]
pub total_output_tokens: u64,
pub task_stats: HashMap<String, TaskStats>,
pub ema_quality: f64,
#[serde(default)]
pub quality_per_1k_tokens: f64,
pub updated_at: u64,
}
impl ModelProfile {
pub fn new(model_id: String) -> Self {
Self {
model_id,
total_calls: 0,
success_count: 0,
fail_count: 0,
total_latency_ms: 0,
total_input_tokens: 0,
total_output_tokens: 0,
task_stats: HashMap::new(),
ema_quality: 0.5, quality_per_1k_tokens: 0.0,
updated_at: now_unix(),
}
}
pub fn success_rate(&self) -> f64 {
let total = self.success_count + self.fail_count;
if total == 0 {
return 0.5;
}
self.success_count as f64 / total as f64
}
pub fn avg_latency_ms(&self) -> f64 {
if self.total_calls == 0 {
return 0.0;
}
self.total_latency_ms as f64 / self.total_calls as f64
}
pub fn should_degrade(&self, threshold: u64) -> bool {
self.fail_count > self.success_count + threshold
}
pub fn task_stats(&self, task: InferenceTask) -> Option<&TaskStats> {
self.task_stats.get(&task.to_string())
}
pub fn total_tokens(&self) -> u64 {
self.total_input_tokens + self.total_output_tokens
}
pub fn compute_quality_per_1k_tokens(&self) -> f64 {
let total = self.total_tokens();
if total == 0 {
return 0.0;
}
self.ema_quality * 1000.0 / total as f64
}
}
const EMA_ALPHA: f64 = 0.2;
pub struct OutcomeTracker {
profiles: HashMap<String, ModelProfile>,
pending: HashMap<String, InferenceOutcome>,
trace_counter: u64,
excluded: HashSet<String>,
}
impl OutcomeTracker {
pub fn new() -> Self {
Self {
profiles: HashMap::new(),
pending: HashMap::new(),
trace_counter: 0,
excluded: HashSet::new(),
}
}
pub fn is_excluded(&self, model_id: &str) -> bool {
self.excluded.contains(model_id)
}
pub fn record_start(
&mut self,
model_id: &str,
task: InferenceTask,
routing_reason: &str,
) -> String {
self.trace_counter += 1;
let trace_id = format!("t-{}-{}", now_unix(), self.trace_counter);
let outcome = InferenceOutcome {
trace_id: trace_id.clone(),
model_id: model_id.to_string(),
task,
routing_reason: routing_reason.to_string(),
latency_ms: 0,
input_tokens: 0,
output_tokens: 0,
inferred_outcome: None,
code_outcome: None,
error: None,
timestamp: now_unix(),
};
self.pending.insert(trace_id.clone(), outcome);
trace_id
}
pub fn record_complete(
&mut self,
trace_id: &str,
latency_ms: u64,
input_tokens: usize,
output_tokens: usize,
) {
if let Some(outcome) = self.pending.get_mut(trace_id) {
outcome.latency_ms = latency_ms;
outcome.input_tokens = input_tokens;
outcome.output_tokens = output_tokens;
let profile = self
.profiles
.entry(outcome.model_id.clone())
.or_insert_with(|| ModelProfile::new(outcome.model_id.clone()));
profile.total_calls += 1;
profile.total_latency_ms += latency_ms;
profile.total_input_tokens += input_tokens as u64;
profile.total_output_tokens += output_tokens as u64;
let task_key = outcome.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.calls += 1;
ts.avg_latency_ms =
ts.avg_latency_ms + (latency_ms as f64 - ts.avg_latency_ms) / ts.calls as f64;
profile.updated_at = now_unix();
}
}
pub fn record_failure(&mut self, trace_id: &str, error: &str) {
if let Some(outcome) = self.pending.get_mut(trace_id) {
outcome.error = Some(error.to_string());
let profile = self
.profiles
.entry(outcome.model_id.clone())
.or_insert_with(|| ModelProfile::new(outcome.model_id.clone()));
profile.fail_count += 1;
let is_rate_limited = error.contains("429") || error.contains("RESOURCE_EXHAUSTED");
if is_rate_limited {
self.excluded.insert(outcome.model_id.clone());
profile.ema_quality *= 0.1;
} else {
profile.ema_quality = profile.ema_quality * (1.0 - EMA_ALPHA) + 0.0 * EMA_ALPHA;
}
let task_key = outcome.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.failures += 1;
if is_rate_limited {
ts.ema_quality *= 0.1;
} else {
ts.ema_quality = ts.ema_quality * (1.0 - EMA_ALPHA);
}
profile.updated_at = now_unix();
}
self.pending.remove(trace_id);
}
pub fn record_inferred_outcome(&mut self, trace_id: &str, outcome: InferredOutcome) {
if let Some(pending) = self.pending.remove(trace_id) {
self.apply_outcome(&pending, outcome.quality_score(), outcome.is_success());
}
}
pub fn record_code_outcome(&mut self, trace_id: &str, outcome: CodeOutcome) {
if let Some(pending) = self.pending.remove(trace_id) {
self.apply_outcome(
&pending,
Some(outcome.quality_score()),
Some(outcome.is_success()),
);
}
}
pub fn resolve_pending_from_signals(&mut self, outcomes: Vec<(String, InferredOutcome)>) {
for (trace_id, inferred) in outcomes {
self.record_inferred_outcome(&trace_id, inferred);
}
}
pub fn infer_outcomes_from_action_sequence(
&self,
action_results: &[(String, bool, f64, String)], ) -> Vec<(String, InferredOutcome)> {
let mut outcomes = Vec::new();
for (i, (trace_id, success, confidence, output)) in action_results.iter().enumerate() {
if trace_id.is_empty() {
continue; }
if !success {
outcomes.push((
trace_id.clone(),
InferredOutcome::Rejected {
confidence: *confidence,
},
));
continue;
}
let next_succeeded = action_results
.get(i + 1)
.map(|(_, s, _, _)| *s)
.unwrap_or(true);
let has_output = !output.trim().is_empty();
if has_output && next_succeeded {
outcomes.push((
trace_id.clone(),
InferredOutcome::Accepted {
confidence: *confidence,
},
));
} else if has_output && !next_succeeded {
outcomes.push((
trace_id.clone(),
InferredOutcome::AcceptedWithEdits {
confidence: confidence * 0.7,
},
));
} else {
outcomes.push((trace_id.clone(), InferredOutcome::Inconclusive));
}
}
outcomes
}
pub fn profile(&self, model_id: &str) -> Option<&ModelProfile> {
self.profiles.get(model_id)
}
pub fn all_profiles(&self) -> &HashMap<String, ModelProfile> {
&self.profiles
}
pub fn pending_trace_ids(&self) -> Vec<String> {
self.pending.keys().cloned().collect()
}
pub fn get_pending(&self, trace_id: &str) -> Option<&InferenceOutcome> {
self.pending.get(trace_id)
}
pub fn export_profiles(&self) -> Vec<ModelProfile> {
self.profiles
.values()
.cloned()
.map(|mut p| {
p.quality_per_1k_tokens = p.compute_quality_per_1k_tokens();
p
})
.collect()
}
pub fn import_profiles(&mut self, profiles: Vec<ModelProfile>) {
for p in profiles {
self.profiles.insert(p.model_id.clone(), p);
}
}
pub fn save_to_file(&self, path: &std::path::Path) -> Result<(), std::io::Error> {
let profiles = self.export_profiles();
let json = serde_json::to_string_pretty(&profiles)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(path, json)
}
pub fn load_from_file(&mut self, path: &std::path::Path) -> Result<usize, std::io::Error> {
if !path.exists() {
return Ok(0);
}
let json = std::fs::read_to_string(path)?;
let profiles: Vec<ModelProfile> = serde_json::from_str(&json)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let count = profiles.len();
self.import_profiles(profiles);
Ok(count)
}
fn apply_outcome(
&mut self,
pending: &InferenceOutcome,
quality: Option<f64>,
success: Option<bool>,
) {
let profile = self
.profiles
.entry(pending.model_id.clone())
.or_insert_with(|| ModelProfile::new(pending.model_id.clone()));
if let Some(q) = quality {
profile.ema_quality = profile.ema_quality * (1.0 - EMA_ALPHA) + q * EMA_ALPHA;
let task_key = pending.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.ema_quality = ts.ema_quality * (1.0 - EMA_ALPHA) + q * EMA_ALPHA;
}
if let Some(ok) = success {
if ok {
profile.success_count += 1;
let task_key = pending.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.successes += 1;
} else {
profile.fail_count += 1;
let task_key = pending.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.failures += 1;
}
}
profile.updated_at = now_unix();
}
pub fn check_git_outcomes(&mut self, repo_dir: &std::path::Path) {
let diff = match std::process::Command::new("git")
.args(["diff", "--no-color"])
.current_dir(repo_dir)
.output()
{
Ok(output) => String::from_utf8_lossy(&output.stdout).to_string(),
Err(_) => return,
};
let staged_diff = match std::process::Command::new("git")
.args(["diff", "--cached", "--no-color"])
.current_dir(repo_dir)
.output()
{
Ok(output) => String::from_utf8_lossy(&output.stdout).to_string(),
Err(_) => String::new(),
};
let combined_diff = format!("{}\n{}", diff, staged_diff);
if combined_diff.trim().is_empty() {
return; }
#[cfg(feature = "ast")]
let ast_outcome = Self::check_git_outcomes_ast(repo_dir);
let code_traces: Vec<(String, String)> = self
.pending
.iter()
.filter(|(_, o)| matches!(o.task, InferenceTask::Code))
.map(|(id, o)| (id.clone(), o.model_id.clone()))
.collect();
for (trace_id, _model_id) in code_traces {
if let Some(pending) = self.pending.get(&trace_id) {
#[cfg(feature = "ast")]
if let Some(ref ast_out) = ast_outcome {
let pending_clone = pending.clone();
self.apply_outcome(
&pending_clone,
Some(ast_out.quality_score()),
Some(ast_out.is_success()),
);
continue;
}
let output_tokens: Vec<&str> = pending
.routing_reason
.split_whitespace()
.filter(|t| t.len() > 5)
.collect();
let outcome = if output_tokens.iter().any(|t| combined_diff.contains(t)) {
CodeOutcome::Applied
} else {
CodeOutcome::Modified
};
let pending_clone = pending.clone();
self.apply_outcome(
&pending_clone,
Some(outcome.quality_score()),
Some(outcome.is_success()),
);
}
}
}
#[cfg(feature = "ast")]
fn check_git_outcomes_ast(repo_dir: &std::path::Path) -> Option<CodeOutcome> {
let name_only = std::process::Command::new("git")
.args(["diff", "--name-only"])
.current_dir(repo_dir)
.output()
.ok()?;
let changed_files: Vec<&str> = std::str::from_utf8(&name_only.stdout)
.ok()?
.lines()
.filter(|f| !f.is_empty())
.collect();
if changed_files.is_empty() {
return None;
}
let mut has_sig_change = false;
let mut has_body_change = false;
let mut has_addition = false;
for file in &changed_files {
if car_ast::Language::from_filename(file).is_none() {
continue;
}
let old_content = std::process::Command::new("git")
.args(["show", &format!("HEAD:{}", file)])
.current_dir(repo_dir)
.output()
.ok()
.and_then(|o| {
if o.status.success() {
String::from_utf8(o.stdout).ok()
} else {
None
}
});
let new_path = repo_dir.join(file);
let new_content = std::fs::read_to_string(&new_path).ok();
match (old_content, new_content) {
(Some(old), Some(new)) => {
let old_parsed = car_ast::parse_file(&old, file);
let new_parsed = car_ast::parse_file(&new, file);
if let (Some(old_p), Some(new_p)) = (old_parsed, new_parsed) {
let changes = car_ast::diff_symbols(&old_p, &new_p);
for change in &changes {
match change {
car_ast::SymbolChange::Added(_) => has_addition = true,
car_ast::SymbolChange::Modified {
signature_changed, ..
} => {
if *signature_changed {
has_sig_change = true;
} else {
has_body_change = true;
}
}
car_ast::SymbolChange::Removed(_) => has_sig_change = true,
}
}
}
}
(None, Some(_)) => has_addition = true, _ => {}
}
}
if has_sig_change {
Some(CodeOutcome::SignatureChanged)
} else if has_body_change {
Some(CodeOutcome::BodyModified)
} else if has_addition {
Some(CodeOutcome::SymbolAdded)
} else {
None }
}
}
impl Default for OutcomeTracker {
fn default() -> Self {
Self::new()
}
}
fn now_unix() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lifecycle() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start(
"qwen/qwen3-4b:q4_k_m",
InferenceTask::Code,
"Code task -> Qwen3-4B",
);
tracker.record_complete(&trace, 1200, 100, 50);
let profile = tracker.profile("qwen/qwen3-4b:q4_k_m").unwrap();
assert_eq!(profile.total_calls, 1);
assert_eq!(profile.avg_latency_ms(), 1200.0);
tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 0.9 });
let profile = tracker.profile("qwen/qwen3-4b:q4_k_m").unwrap();
assert_eq!(profile.success_count, 1);
assert!(profile.ema_quality > 0.5); }
#[test]
fn failure_degrades() {
let mut tracker = OutcomeTracker::new();
for i in 0..5 {
let trace = tracker.record_start("bad-model", InferenceTask::Generate, "test");
tracker.record_complete(&trace, 100, 10, 5);
tracker.record_failure(&format!("t-fail-{i}"), "timeout");
}
let mut tracker = OutcomeTracker::new();
for _ in 0..5 {
let trace = tracker.record_start("bad-model", InferenceTask::Generate, "test");
tracker.record_complete(&trace, 100, 10, 5);
tracker.record_failure(&trace, "timeout");
}
let profile = tracker.profile("bad-model").unwrap();
assert_eq!(profile.fail_count, 5);
assert!(profile.should_degrade(2)); assert!(profile.ema_quality < 0.3); }
#[test]
fn code_outcome_ground_truth() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("qwen/qwen3-4b:q4_k_m", InferenceTask::Code, "code");
tracker.record_complete(&trace, 500, 200, 100);
tracker.record_code_outcome(&trace, CodeOutcome::Applied);
let profile = tracker.profile("qwen/qwen3-4b:q4_k_m").unwrap();
assert_eq!(profile.success_count, 1);
assert!((profile.ema_quality - 0.6).abs() < 0.01);
}
#[test]
fn per_task_stats() {
let mut tracker = OutcomeTracker::new();
for _ in 0..2 {
let trace = tracker.record_start("m1", InferenceTask::Code, "code");
tracker.record_complete(&trace, 1000, 100, 50);
tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 0.8 });
}
let trace = tracker.record_start("m1", InferenceTask::Generate, "gen");
tracker.record_complete(&trace, 500, 50, 25);
tracker.record_inferred_outcome(&trace, InferredOutcome::Rejected { confidence: 0.9 });
let profile = tracker.profile("m1").unwrap();
assert_eq!(profile.total_calls, 3);
let code_stats = profile.task_stats(InferenceTask::Code).unwrap();
assert_eq!(code_stats.calls, 2);
assert_eq!(code_stats.successes, 2);
let gen_stats = profile.task_stats(InferenceTask::Generate).unwrap();
assert_eq!(gen_stats.calls, 1);
assert_eq!(gen_stats.failures, 1);
}
#[test]
fn export_populates_quality_per_1k_tokens() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("m1", InferenceTask::Generate, "test");
tracker.record_complete(&trace, 100, 800, 200); tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 1.0 });
let exported = tracker.export_profiles();
assert_eq!(exported.len(), 1);
let p = &exported[0];
assert!((p.quality_per_1k_tokens - 0.6).abs() < 1e-6,
"got {}", p.quality_per_1k_tokens);
}
#[test]
fn quality_per_1k_tokens_zero_without_tokens() {
let profile = ModelProfile::new("x".into());
assert_eq!(profile.compute_quality_per_1k_tokens(), 0.0);
}
#[test]
fn export_import() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("m1", InferenceTask::Generate, "test");
tracker.record_complete(&trace, 100, 10, 5);
tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 0.9 });
let exported = tracker.export_profiles();
assert_eq!(exported.len(), 1);
let mut tracker2 = OutcomeTracker::new();
tracker2.import_profiles(exported);
assert!(tracker2.profile("m1").is_some());
}
}