use std::collections::{HashMap, HashSet};
use std::time::{SystemTime, UNIX_EPOCH};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InferenceTask {
Generate,
Embed,
Classify,
Code,
Reasoning,
}
impl std::fmt::Display for InferenceTask {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InferenceTask::Generate => write!(f, "generate"),
InferenceTask::Embed => write!(f, "embed"),
InferenceTask::Classify => write!(f, "classify"),
InferenceTask::Code => write!(f, "code"),
InferenceTask::Reasoning => write!(f, "reasoning"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InferenceOutcome {
pub trace_id: String,
pub model_id: String,
pub task: InferenceTask,
pub routing_reason: String,
pub latency_ms: u64,
pub input_tokens: usize,
pub output_tokens: usize,
#[serde(default)]
pub cache_read_input_tokens: usize,
#[serde(default)]
pub cache_creation_input_tokens: usize,
pub inferred_outcome: Option<InferredOutcome>,
pub code_outcome: Option<CodeOutcome>,
pub error: Option<String>,
pub timestamp: u64,
#[serde(skip)]
pub success_credited: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InferredOutcome {
Accepted { confidence: f64 },
AcceptedWithEdits { confidence: f64 },
Rejected { confidence: f64 },
Inconclusive,
}
impl InferredOutcome {
pub fn quality_score(&self) -> Option<f64> {
match self {
InferredOutcome::Accepted { confidence } => Some(*confidence),
InferredOutcome::AcceptedWithEdits { confidence } => Some(confidence * 0.7),
InferredOutcome::Rejected { confidence } => Some((1.0 - confidence) * 0.3),
InferredOutcome::Inconclusive => None,
}
}
pub fn is_success(&self) -> Option<bool> {
match self {
InferredOutcome::Accepted { .. } => Some(true),
InferredOutcome::AcceptedWithEdits { .. } => Some(true),
InferredOutcome::Rejected { .. } => Some(false),
InferredOutcome::Inconclusive => None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum CodeOutcome {
Applied,
Modified,
Ignored,
SignatureChanged,
BodyModified,
SymbolAdded,
}
impl CodeOutcome {
pub fn quality_score(&self) -> f64 {
match self {
CodeOutcome::Applied => 1.0,
CodeOutcome::SignatureChanged => 0.8,
CodeOutcome::BodyModified => 0.7,
CodeOutcome::SymbolAdded => 0.7,
CodeOutcome::Modified => 0.6,
CodeOutcome::Ignored => 0.1,
}
}
pub fn is_success(&self) -> bool {
!matches!(self, CodeOutcome::Ignored)
}
}
fn backfill_quality_observations(p: &mut ModelProfile) {
if p.quality_observations == 0 && p.fail_count > 0 {
p.quality_observations = p.fail_count;
}
for ts in p.task_stats.values_mut() {
if ts.quality_observations == 0 && ts.failures > 0 {
ts.quality_observations = ts.failures;
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TaskStats {
pub calls: u64,
pub successes: u64,
pub failures: u64,
pub avg_latency_ms: f64,
pub ema_quality: f64,
#[serde(default)]
pub prior_sample_size: usize,
#[serde(default)]
pub quality_observations: u64,
}
impl TaskStats {
pub fn success_rate(&self) -> f64 {
let total = self.successes + self.failures;
if total == 0 {
return 0.5;
} self.successes as f64 / total as f64
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelProfile {
pub model_id: String,
pub total_calls: u64,
pub success_count: u64,
pub fail_count: u64,
pub total_latency_ms: u64,
#[serde(default)]
pub total_input_tokens: u64,
#[serde(default)]
pub total_output_tokens: u64,
#[serde(default)]
pub total_cache_read_input_tokens: u64,
#[serde(default)]
pub total_cache_creation_input_tokens: u64,
pub task_stats: HashMap<String, TaskStats>,
pub ema_quality: f64,
#[serde(default)]
pub prior_sample_size: usize,
#[serde(default)]
pub quality_observations: u64,
#[serde(default)]
pub quality_per_1k_tokens: f64,
pub updated_at: u64,
}
impl ModelProfile {
pub fn new(model_id: String) -> Self {
Self {
model_id,
total_calls: 0,
success_count: 0,
fail_count: 0,
total_latency_ms: 0,
total_input_tokens: 0,
total_output_tokens: 0,
total_cache_read_input_tokens: 0,
total_cache_creation_input_tokens: 0,
task_stats: HashMap::new(),
ema_quality: 0.5, prior_sample_size: 0,
quality_observations: 0,
quality_per_1k_tokens: 0.0,
updated_at: now_unix(),
}
}
pub fn success_rate_resolved(&self) -> Option<f64> {
let resolved = self.success_count + self.fail_count;
if resolved == 0 {
None
} else {
Some(self.success_count as f64 / resolved as f64)
}
}
pub fn avg_latency_ms(&self) -> f64 {
if self.total_calls == 0 {
return 0.0;
}
self.total_latency_ms as f64 / self.total_calls as f64
}
pub fn should_degrade(&self, threshold: u64) -> bool {
self.fail_count > self.success_count + threshold
}
pub fn task_stats(&self, task: InferenceTask) -> Option<&TaskStats> {
self.task_stats.get(&task.to_string())
}
pub fn total_tokens(&self) -> u64 {
self.total_input_tokens
+ self.total_cache_read_input_tokens
+ self.total_cache_creation_input_tokens
+ self.total_output_tokens
}
pub fn compute_quality_per_1k_tokens(&self) -> f64 {
let total = self.total_tokens();
if total == 0 {
return 0.0;
}
self.ema_quality * 1000.0 / total as f64
}
pub fn tokens_per_success(&self) -> Option<f64> {
if self.success_count == 0 {
None
} else {
Some(self.total_tokens() as f64 / self.success_count as f64)
}
}
pub fn usd_per_success(
&self,
input_per_mtok: f64,
output_per_mtok: f64,
cache: CacheRates,
) -> Option<f64> {
if self.success_count == 0 {
return None;
}
let usd = priced_input_usd(
self.total_input_tokens,
self.total_cache_read_input_tokens,
self.total_cache_creation_input_tokens,
input_per_mtok,
cache,
) + self.total_output_tokens as f64 * output_per_mtok / 1_000_000.0;
Some(usd / self.success_count as f64)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct CacheRates {
pub read_mult: f64,
pub write_mult: f64,
}
impl CacheRates {
pub const ANTHROPIC: Self = Self {
read_mult: 0.1,
write_mult: 1.25,
};
pub const OPENAI: Self = Self {
read_mult: 0.5,
write_mult: 0.0,
};
pub const NONE: Self = Self {
read_mult: 0.0,
write_mult: 0.0,
};
}
impl Default for CacheRates {
fn default() -> Self {
Self::ANTHROPIC
}
}
pub fn priced_input_usd(
uncached_input_tokens: u64,
cache_read_input_tokens: u64,
cache_creation_input_tokens: u64,
input_per_mtok: f64,
cache: CacheRates,
) -> f64 {
(uncached_input_tokens as f64 * input_per_mtok
+ cache_read_input_tokens as f64 * input_per_mtok * cache.read_mult
+ cache_creation_input_tokens as f64 * input_per_mtok * cache.write_mult)
/ 1_000_000.0
}
const EMA_ALPHA: f64 = 0.2;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OutcomeLedgerEntry {
pub trace_id: String,
pub model_id: String,
pub task: InferenceTask,
pub routing_reason: String,
pub latency_ms: u64,
pub input_tokens: usize,
pub output_tokens: usize,
#[serde(default, skip_serializing_if = "is_zero_usize")]
pub cache_read_input_tokens: usize,
#[serde(default, skip_serializing_if = "is_zero_usize")]
pub cache_creation_input_tokens: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub success: Option<bool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub quality: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub project_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub intent: Option<String>,
pub timestamp: u64,
}
fn is_zero_usize(n: &usize) -> bool {
*n == 0
}
const MAX_LEDGER_BUFFER: usize = 5000;
const MAX_LEDGER_ERROR_CHARS: usize = 256;
fn redact_error(error: &str) -> String {
if error.chars().count() <= MAX_LEDGER_ERROR_CHARS {
return error.to_string();
}
let truncated: String = error.chars().take(MAX_LEDGER_ERROR_CHARS).collect();
format!("{truncated}…")
}
fn is_no_answer_failure(error: &str) -> bool {
let e = error.to_ascii_lowercase();
const NEEDLES: &[&str] = &[
"timeout",
"timed out",
"deadline",
"connection",
"connect error",
"reset by peer",
"broken pipe",
"network",
"unreachable",
"eof",
"stream closed",
"socket",
"503",
"502",
"504",
"internal server error",
"service unavailable",
"temporarily unavailable",
"overloaded",
"unauthorized",
"unauthenticated",
"forbidden",
"permission denied",
"authentication",
"invalid_client",
"invalid api key",
"bad request",
];
NEEDLES.iter().any(|n| e.contains(n))
}
pub fn prune_ledger(path: &std::path::Path, max_entries: usize) -> std::io::Result<()> {
if max_entries == 0 || !path.exists() {
return Ok(());
}
let entries = read_ledger(path, 0);
if entries.len() <= max_entries {
return Ok(());
}
let keep = &entries[entries.len() - max_entries..];
let mut body = String::new();
for e in keep {
let line = serde_json::to_string(e)
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
body.push_str(&line);
body.push('\n');
}
let tmp = path.with_extension("jsonl.tmp");
std::fs::write(&tmp, body)?;
std::fs::rename(&tmp, path)
}
pub fn append_ledger_entries(
path: &std::path::Path,
entries: &[OutcomeLedgerEntry],
) -> std::io::Result<()> {
if entries.is_empty() {
return Ok(());
}
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
use std::io::Write;
let mut opts = std::fs::OpenOptions::new();
opts.create(true).append(true);
#[cfg(unix)]
{
use std::os::unix::fs::OpenOptionsExt;
opts.mode(0o600);
}
let mut f = opts.open(path)?;
for e in entries {
let line = serde_json::to_string(e)
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
f.write_all(line.as_bytes())?;
f.write_all(b"\n")?;
}
Ok(())
}
pub fn read_ledger(path: &std::path::Path, limit: usize) -> Vec<OutcomeLedgerEntry> {
let Ok(content) = std::fs::read_to_string(path) else {
return Vec::new();
};
let mut out: Vec<OutcomeLedgerEntry> = content
.lines()
.filter(|l| !l.trim().is_empty())
.filter_map(|l| serde_json::from_str(l).ok())
.collect();
if limit > 0 && out.len() > limit {
out = out.split_off(out.len() - limit);
}
out
}
pub struct OutcomeTracker {
profiles: HashMap<String, ModelProfile>,
pending: HashMap<String, InferenceOutcome>,
trace_counter: u64,
excluded: HashSet<String>,
dirty: bool,
ledger_buffer: std::collections::VecDeque<OutcomeLedgerEntry>,
}
impl OutcomeTracker {
pub fn new() -> Self {
Self {
profiles: HashMap::new(),
pending: HashMap::new(),
trace_counter: 0,
excluded: HashSet::new(),
dirty: false,
ledger_buffer: std::collections::VecDeque::new(),
}
}
fn push_ledger(&mut self, entry: OutcomeLedgerEntry) {
if self.ledger_buffer.len() >= MAX_LEDGER_BUFFER {
self.ledger_buffer.pop_front();
}
self.ledger_buffer.push_back(entry);
}
pub fn drain_ledger(&mut self) -> Vec<OutcomeLedgerEntry> {
self.ledger_buffer.drain(..).collect()
}
pub fn is_excluded(&self, model_id: &str) -> bool {
self.excluded.contains(model_id)
}
pub fn record_start(
&mut self,
model_id: &str,
task: InferenceTask,
routing_reason: &str,
) -> String {
self.trace_counter += 1;
let trace_id = format!("t-{}-{}", now_unix(), self.trace_counter);
let outcome = InferenceOutcome {
trace_id: trace_id.clone(),
model_id: model_id.to_string(),
task,
routing_reason: redact_error(routing_reason),
latency_ms: 0,
input_tokens: 0,
output_tokens: 0,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0,
inferred_outcome: None,
code_outcome: None,
error: None,
timestamp: now_unix(),
success_credited: false,
};
self.pending.insert(trace_id.clone(), outcome);
trace_id
}
pub fn record_complete(
&mut self,
trace_id: &str,
latency_ms: u64,
input_tokens: usize,
output_tokens: usize,
) {
self.record_complete_cached(trace_id, latency_ms, input_tokens, output_tokens, 0, 0);
}
pub fn record_complete_cached(
&mut self,
trace_id: &str,
latency_ms: u64,
input_tokens: usize,
output_tokens: usize,
cache_read_input_tokens: usize,
cache_creation_input_tokens: usize,
) {
if let Some(outcome) = self.pending.get_mut(trace_id) {
outcome.latency_ms = latency_ms;
outcome.input_tokens = input_tokens;
outcome.output_tokens = output_tokens;
outcome.cache_read_input_tokens = cache_read_input_tokens;
outcome.cache_creation_input_tokens = cache_creation_input_tokens;
let mechanical_success = output_tokens > 0 && outcome.error.is_none();
if mechanical_success {
outcome.success_credited = true;
}
let model_id = outcome.model_id.clone();
let task_key = outcome.task.to_string();
let profile = self
.profiles
.entry(model_id.clone())
.or_insert_with(|| ModelProfile::new(model_id));
profile.total_calls += 1;
profile.total_latency_ms += latency_ms;
profile.total_input_tokens += input_tokens as u64;
profile.total_output_tokens += output_tokens as u64;
profile.total_cache_read_input_tokens += cache_read_input_tokens as u64;
profile.total_cache_creation_input_tokens += cache_creation_input_tokens as u64;
if mechanical_success {
profile.success_count += 1;
}
let ts = profile.task_stats.entry(task_key).or_default();
ts.calls += 1;
if mechanical_success {
ts.successes += 1;
}
ts.avg_latency_ms =
ts.avg_latency_ms + (latency_ms as f64 - ts.avg_latency_ms) / ts.calls as f64;
profile.updated_at = now_unix();
self.dirty = true;
}
}
pub fn record_failure(&mut self, trace_id: &str, error: &str) {
let mut ledger_entry = None;
if let Some(outcome) = self.pending.get_mut(trace_id) {
outcome.error = Some(error.to_string());
let profile = self
.profiles
.entry(outcome.model_id.clone())
.or_insert_with(|| ModelProfile::new(outcome.model_id.clone()));
profile.total_calls += 1;
profile.fail_count += 1;
let is_rate_limited = error.contains("429") || error.contains("RESOURCE_EXHAUSTED");
let is_no_answer = !is_rate_limited && is_no_answer_failure(error);
if is_rate_limited {
self.excluded.insert(outcome.model_id.clone());
profile.ema_quality *= 0.1;
profile.quality_observations += 1;
} else if !is_no_answer {
profile.ema_quality = profile.ema_quality * (1.0 - EMA_ALPHA) + 0.0 * EMA_ALPHA;
profile.quality_observations += 1;
}
let task_key = outcome.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.failures += 1;
if is_rate_limited {
ts.ema_quality *= 0.1;
ts.quality_observations += 1;
} else if !is_no_answer {
ts.ema_quality = ts.ema_quality * (1.0 - EMA_ALPHA);
ts.quality_observations += 1;
}
profile.updated_at = now_unix();
self.dirty = true;
ledger_entry = Some(OutcomeLedgerEntry {
trace_id: outcome.trace_id.clone(),
model_id: outcome.model_id.clone(),
task: outcome.task,
routing_reason: outcome.routing_reason.clone(),
latency_ms: outcome.latency_ms,
input_tokens: outcome.input_tokens,
output_tokens: outcome.output_tokens,
cache_read_input_tokens: outcome.cache_read_input_tokens,
cache_creation_input_tokens: outcome.cache_creation_input_tokens,
success: Some(false),
quality: if is_rate_limited || is_no_answer {
None
} else {
Some(0.0)
},
error: Some(redact_error(error)),
project_id: None,
intent: None,
timestamp: now_unix(),
});
}
if let Some(entry) = ledger_entry {
self.push_ledger(entry);
}
self.pending.remove(trace_id);
}
pub fn record_inferred_outcome(&mut self, trace_id: &str, outcome: InferredOutcome) {
if let Some(pending) = self.pending.remove(trace_id) {
self.apply_outcome(&pending, outcome.quality_score(), outcome.is_success());
}
}
pub fn record_code_outcome(&mut self, trace_id: &str, outcome: CodeOutcome) {
if let Some(pending) = self.pending.remove(trace_id) {
self.apply_outcome(
&pending,
Some(outcome.quality_score()),
Some(outcome.is_success()),
);
}
}
pub fn resolve_pending_from_signals(&mut self, outcomes: Vec<(String, InferredOutcome)>) {
for (trace_id, inferred) in outcomes {
self.record_inferred_outcome(&trace_id, inferred);
}
}
pub fn infer_outcomes_from_action_sequence(
&self,
action_results: &[(String, bool, f64, String)], ) -> Vec<(String, InferredOutcome)> {
let mut outcomes = Vec::new();
for (i, (trace_id, success, confidence, output)) in action_results.iter().enumerate() {
if trace_id.is_empty() {
continue; }
if !success {
outcomes.push((
trace_id.clone(),
InferredOutcome::Rejected {
confidence: *confidence,
},
));
continue;
}
let next_succeeded = action_results
.get(i + 1)
.map(|(_, s, _, _)| *s)
.unwrap_or(true);
let has_output = !output.trim().is_empty();
if has_output && next_succeeded {
outcomes.push((
trace_id.clone(),
InferredOutcome::Accepted {
confidence: *confidence,
},
));
} else if has_output && !next_succeeded {
outcomes.push((
trace_id.clone(),
InferredOutcome::AcceptedWithEdits {
confidence: confidence * 0.7,
},
));
} else {
outcomes.push((trace_id.clone(), InferredOutcome::Inconclusive));
}
}
outcomes
}
pub fn profile(&self, model_id: &str) -> Option<&ModelProfile> {
self.profiles.get(model_id)
}
pub fn has_pending(&self, trace_id: &str) -> bool {
self.pending.contains_key(trace_id)
}
pub fn all_profiles(&self) -> &HashMap<String, ModelProfile> {
&self.profiles
}
pub fn pending_trace_ids(&self) -> Vec<String> {
self.pending.keys().cloned().collect()
}
pub fn get_pending(&self, trace_id: &str) -> Option<&InferenceOutcome> {
self.pending.get(trace_id)
}
pub fn export_profiles(&self) -> Vec<ModelProfile> {
self.profiles
.values()
.cloned()
.map(|mut p| {
p.quality_per_1k_tokens = p.compute_quality_per_1k_tokens();
p
})
.collect()
}
pub fn import_profiles(&mut self, profiles: Vec<ModelProfile>) {
for p in profiles {
self.profiles.insert(p.model_id.clone(), p);
}
self.dirty = true;
}
pub fn save_to_file(&self, path: &std::path::Path) -> Result<(), std::io::Error> {
let profiles = self.export_profiles();
let json = serde_json::to_string_pretty(&profiles)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let tmp = path.with_extension("json.tmp");
std::fs::write(&tmp, json)?;
std::fs::rename(&tmp, path)
}
pub fn is_dirty(&self) -> bool {
self.dirty
}
pub fn save_if_dirty(&mut self, path: &std::path::Path) -> Result<bool, std::io::Error> {
if !self.dirty {
return Ok(false);
}
self.save_to_file(path)?;
self.dirty = false;
Ok(true)
}
pub fn load_from_file(&mut self, path: &std::path::Path) -> Result<usize, std::io::Error> {
if !path.exists() {
return Ok(0);
}
let json = std::fs::read_to_string(path)?;
let profiles: Vec<ModelProfile> = serde_json::from_str(&json)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let count = profiles.len();
for mut p in profiles {
backfill_quality_observations(&mut p);
self.profiles.insert(p.model_id.clone(), p);
}
Ok(count)
}
fn apply_outcome(
&mut self,
pending: &InferenceOutcome,
quality: Option<f64>,
success: Option<bool>,
) {
let profile = self
.profiles
.entry(pending.model_id.clone())
.or_insert_with(|| ModelProfile::new(pending.model_id.clone()));
if let Some(q) = quality {
profile.ema_quality = profile.ema_quality * (1.0 - EMA_ALPHA) + q * EMA_ALPHA;
profile.quality_observations += 1;
let task_key = pending.task.to_string();
let ts = profile.task_stats.entry(task_key).or_default();
ts.ema_quality = ts.ema_quality * (1.0 - EMA_ALPHA) + q * EMA_ALPHA;
ts.quality_observations += 1;
}
if let Some(ok) = success {
let already_credited = pending.success_credited;
let task_key = pending.task.to_string();
if ok {
if !already_credited {
profile.success_count += 1;
let ts = profile.task_stats.entry(task_key).or_default();
ts.successes += 1;
}
} else {
if already_credited {
profile.success_count = profile.success_count.saturating_sub(1);
let ts = profile.task_stats.entry(task_key.clone()).or_default();
ts.successes = ts.successes.saturating_sub(1);
}
profile.fail_count += 1;
let ts = profile.task_stats.entry(task_key).or_default();
ts.failures += 1;
}
}
profile.updated_at = now_unix();
self.dirty = true;
self.push_ledger(OutcomeLedgerEntry {
trace_id: pending.trace_id.clone(),
model_id: pending.model_id.clone(),
task: pending.task,
routing_reason: pending.routing_reason.clone(),
latency_ms: pending.latency_ms,
input_tokens: pending.input_tokens,
output_tokens: pending.output_tokens,
cache_read_input_tokens: pending.cache_read_input_tokens,
cache_creation_input_tokens: pending.cache_creation_input_tokens,
success,
quality,
error: None,
project_id: None,
intent: None,
timestamp: now_unix(),
});
}
pub fn sweep_pending(&mut self, ttl_secs: u64) -> usize {
self.sweep_pending_at(ttl_secs, now_unix())
}
fn sweep_pending_at(&mut self, ttl_secs: u64, now: u64) -> usize {
let cutoff = now.saturating_sub(ttl_secs);
let expired: Vec<String> = self
.pending
.iter()
.filter(|(_, o)| o.timestamp < cutoff)
.map(|(id, _)| id.clone())
.collect();
for id in &expired {
if let Some(o) = self.pending.remove(id) {
if o.latency_ms > 0 {
let credit_now =
o.output_tokens > 0 && o.error.is_none() && !o.success_credited;
let was_success = o.success_credited || credit_now;
if credit_now {
let profile = self
.profiles
.entry(o.model_id.clone())
.or_insert_with(|| ModelProfile::new(o.model_id.clone()));
profile.success_count += 1;
let ts = profile.task_stats.entry(o.task.to_string()).or_default();
ts.successes += 1;
profile.updated_at = now_unix();
self.dirty = true;
}
self.push_ledger(OutcomeLedgerEntry {
trace_id: o.trace_id,
model_id: o.model_id,
task: o.task,
routing_reason: o.routing_reason,
latency_ms: o.latency_ms,
input_tokens: o.input_tokens,
output_tokens: o.output_tokens,
cache_read_input_tokens: o.cache_read_input_tokens,
cache_creation_input_tokens: o.cache_creation_input_tokens,
success: if was_success { Some(true) } else { None },
quality: None,
error: None,
project_id: None,
intent: None,
timestamp: now_unix(),
});
}
}
}
expired.len()
}
pub fn check_git_outcomes(&mut self, repo_dir: &std::path::Path) {
let diff = match std::process::Command::new("git")
.args(["diff", "--no-color"])
.current_dir(repo_dir)
.output()
{
Ok(output) => String::from_utf8_lossy(&output.stdout).to_string(),
Err(_) => return,
};
let staged_diff = match std::process::Command::new("git")
.args(["diff", "--cached", "--no-color"])
.current_dir(repo_dir)
.output()
{
Ok(output) => String::from_utf8_lossy(&output.stdout).to_string(),
Err(_) => String::new(),
};
let combined_diff = format!("{}\n{}", diff, staged_diff);
if combined_diff.trim().is_empty() {
return; }
#[cfg(feature = "ast")]
let ast_outcome = Self::check_git_outcomes_ast(repo_dir);
let code_traces: Vec<(String, String)> = self
.pending
.iter()
.filter(|(_, o)| matches!(o.task, InferenceTask::Code))
.map(|(id, o)| (id.clone(), o.model_id.clone()))
.collect();
for (trace_id, _model_id) in code_traces {
if let Some(pending) = self.pending.get(&trace_id) {
#[cfg(feature = "ast")]
if let Some(ref ast_out) = ast_outcome {
let pending_clone = pending.clone();
self.apply_outcome(
&pending_clone,
Some(ast_out.quality_score()),
Some(ast_out.is_success()),
);
continue;
}
let output_tokens: Vec<&str> = pending
.routing_reason
.split_whitespace()
.filter(|t| t.len() > 5)
.collect();
let outcome = if output_tokens.iter().any(|t| combined_diff.contains(t)) {
CodeOutcome::Applied
} else {
CodeOutcome::Modified
};
let pending_clone = pending.clone();
self.apply_outcome(
&pending_clone,
Some(outcome.quality_score()),
Some(outcome.is_success()),
);
}
}
}
#[cfg(feature = "ast")]
fn check_git_outcomes_ast(repo_dir: &std::path::Path) -> Option<CodeOutcome> {
let name_only = std::process::Command::new("git")
.args(["diff", "--name-only"])
.current_dir(repo_dir)
.output()
.ok()?;
let changed_files: Vec<&str> = std::str::from_utf8(&name_only.stdout)
.ok()?
.lines()
.filter(|f| !f.is_empty())
.collect();
if changed_files.is_empty() {
return None;
}
let mut has_sig_change = false;
let mut has_body_change = false;
let mut has_addition = false;
for file in &changed_files {
if car_ast::Language::from_filename(file).is_none() {
continue;
}
let old_content = std::process::Command::new("git")
.args(["show", &format!("HEAD:{}", file)])
.current_dir(repo_dir)
.output()
.ok()
.and_then(|o| {
if o.status.success() {
String::from_utf8(o.stdout).ok()
} else {
None
}
});
let new_path = repo_dir.join(file);
let new_content = std::fs::read_to_string(&new_path).ok();
match (old_content, new_content) {
(Some(old), Some(new)) => {
let old_parsed = car_ast::parse_file(&old, file);
let new_parsed = car_ast::parse_file(&new, file);
if let (Some(old_p), Some(new_p)) = (old_parsed, new_parsed) {
let changes = car_ast::diff_symbols(&old_p, &new_p);
for change in &changes {
match change {
car_ast::SymbolChange::Added(_) => has_addition = true,
car_ast::SymbolChange::Modified {
signature_changed, ..
} => {
if *signature_changed {
has_sig_change = true;
} else {
has_body_change = true;
}
}
car_ast::SymbolChange::Removed(_) => has_sig_change = true,
}
}
}
}
(None, Some(_)) => has_addition = true, _ => {}
}
}
if has_sig_change {
Some(CodeOutcome::SignatureChanged)
} else if has_body_change {
Some(CodeOutcome::BodyModified)
} else if has_addition {
Some(CodeOutcome::SymbolAdded)
} else {
None }
}
}
impl Default for OutcomeTracker {
fn default() -> Self {
Self::new()
}
}
fn now_unix() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lifecycle() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start(
"qwen/qwen3-4b:q4_k_m",
InferenceTask::Code,
"Code task -> Qwen3-4B",
);
tracker.record_complete(&trace, 1200, 100, 50);
let profile = tracker.profile("qwen/qwen3-4b:q4_k_m").unwrap();
assert_eq!(profile.total_calls, 1);
assert_eq!(profile.avg_latency_ms(), 1200.0);
tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 0.9 });
let profile = tracker.profile("qwen/qwen3-4b:q4_k_m").unwrap();
assert_eq!(profile.success_count, 1);
assert!(profile.ema_quality > 0.5); }
#[test]
fn failure_degrades() {
let mut tracker = OutcomeTracker::new();
for _ in 0..5 {
let trace = tracker.record_start("bad-model", InferenceTask::Generate, "test");
tracker.record_failure(&trace, "model produced malformed output");
}
let profile = tracker.profile("bad-model").unwrap();
assert_eq!(profile.fail_count, 5);
assert_eq!(profile.success_count, 0);
assert!(profile.should_degrade(2)); assert!(profile.ema_quality < 0.3); }
#[test]
fn code_outcome_ground_truth() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("qwen/qwen3-4b:q4_k_m", InferenceTask::Code, "code");
tracker.record_complete(&trace, 500, 200, 100);
tracker.record_code_outcome(&trace, CodeOutcome::Applied);
let profile = tracker.profile("qwen/qwen3-4b:q4_k_m").unwrap();
assert_eq!(profile.success_count, 1);
assert!((profile.ema_quality - 0.6).abs() < 0.01);
}
#[test]
fn per_task_stats() {
let mut tracker = OutcomeTracker::new();
for _ in 0..2 {
let trace = tracker.record_start("m1", InferenceTask::Code, "code");
tracker.record_complete(&trace, 1000, 100, 50);
tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 0.8 });
}
let trace = tracker.record_start("m1", InferenceTask::Generate, "gen");
tracker.record_complete(&trace, 500, 50, 25);
tracker.record_inferred_outcome(&trace, InferredOutcome::Rejected { confidence: 0.9 });
let profile = tracker.profile("m1").unwrap();
assert_eq!(profile.total_calls, 3);
let code_stats = profile.task_stats(InferenceTask::Code).unwrap();
assert_eq!(code_stats.calls, 2);
assert_eq!(code_stats.successes, 2);
let gen_stats = profile.task_stats(InferenceTask::Generate).unwrap();
assert_eq!(gen_stats.calls, 1);
assert_eq!(gen_stats.failures, 1);
}
#[test]
fn export_populates_quality_per_1k_tokens() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("m1", InferenceTask::Generate, "test");
tracker.record_complete(&trace, 100, 800, 200); tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 1.0 });
let exported = tracker.export_profiles();
assert_eq!(exported.len(), 1);
let p = &exported[0];
assert!(
(p.quality_per_1k_tokens - 0.6).abs() < 1e-6,
"got {}",
p.quality_per_1k_tokens
);
}
#[test]
fn quality_per_1k_tokens_zero_without_tokens() {
let profile = ModelProfile::new("x".into());
assert_eq!(profile.compute_quality_per_1k_tokens(), 0.0);
}
#[test]
fn tokens_per_success_is_outcome_denominated() {
let mut p = ModelProfile::new("x".into());
assert_eq!(p.tokens_per_success(), None);
p.total_input_tokens = 600;
p.total_output_tokens = 300;
p.success_count = 3;
assert_eq!(p.tokens_per_success(), Some(300.0));
let usd = p.usd_per_success(1.0, 2.0, CacheRates::ANTHROPIC).unwrap();
assert!((usd - ((600.0 * 1.0 + 300.0 * 2.0) / 1_000_000.0 / 3.0)).abs() < 1e-12);
assert_eq!(
ModelProfile::new("y".into()).usd_per_success(1.0, 2.0, CacheRates::ANTHROPIC),
None
);
}
#[test]
fn usd_per_success_prices_cache_buckets_separately() {
let mut p = ModelProfile::new("cached".into());
p.success_count = 1;
p.total_input_tokens = 100; p.total_cache_read_input_tokens = 1000; p.total_cache_creation_input_tokens = 200; p.total_output_tokens = 50; let expected_input = (100.0 * 1.0 + 1000.0 * 1.0 * 0.1 + 200.0 * 1.0 * 1.25) / 1_000_000.0;
let expected = expected_input + 50.0 * 2.0 / 1_000_000.0;
let usd = p.usd_per_success(1.0, 2.0, CacheRates::ANTHROPIC).unwrap();
assert!((usd - expected).abs() < 1e-12, "got {usd}, want {expected}");
assert_eq!(p.total_tokens(), 100 + 1000 + 200 + 50);
let mut naive = ModelProfile::new("naive".into());
naive.success_count = 1;
naive.total_input_tokens = 1300; naive.total_output_tokens = 50;
assert!(naive.usd_per_success(1.0, 2.0, CacheRates::ANTHROPIC).unwrap() > usd);
}
#[test]
fn openai_cache_rates_price_reads_at_half_no_write_premium() {
let mut p = ModelProfile::new("gpt".into());
p.success_count = 1;
p.total_input_tokens = 100; p.total_cache_read_input_tokens = 1000; p.total_cache_creation_input_tokens = 0; let usd = p.usd_per_success(1.0, 2.0, CacheRates::OPENAI).unwrap();
let expected = (100.0 * 1.0 + 1000.0 * 1.0 * 0.5) / 1_000_000.0;
assert!((usd - expected).abs() < 1e-12, "got {usd}, want {expected}");
let anthropic = p.usd_per_success(1.0, 2.0, CacheRates::ANTHROPIC).unwrap();
assert!(usd > anthropic, "OpenAI 0.5× read must exceed Anthropic 0.1×");
let mut naive = ModelProfile::new("naive".into());
naive.success_count = 1;
naive.total_input_tokens = 1100;
assert!(naive.usd_per_success(1.0, 2.0, CacheRates::OPENAI).unwrap() > usd);
}
#[test]
fn record_complete_cached_accumulates_cache_totals() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("m", InferenceTask::Generate, "gen");
tracker.record_complete_cached(&trace, 100, 40, 20, 800, 120);
let p = tracker.profile("m").unwrap();
assert_eq!(p.total_input_tokens, 40, "uncached prefix only");
assert_eq!(p.total_cache_read_input_tokens, 800);
assert_eq!(p.total_cache_creation_input_tokens, 120);
}
#[test]
fn dirty_flag_and_save_if_dirty() {
let dir = std::env::temp_dir().join("car-outcome-dirty-test");
let _ = std::fs::remove_dir_all(&dir);
let path = dir.join("outcome_profiles.json");
let mut tracker = OutcomeTracker::new();
assert!(!tracker.is_dirty());
assert!(!tracker.save_if_dirty(&path).unwrap());
assert!(!path.exists());
let trace = tracker.record_start("m1", InferenceTask::Generate, "router");
tracker.record_complete(&trace, 100, 10, 20);
assert!(tracker.is_dirty());
assert!(tracker.save_if_dirty(&path).unwrap());
assert!(path.exists());
assert!(!tracker.is_dirty());
assert!(!tracker.save_if_dirty(&path).unwrap());
let mut fresh = OutcomeTracker::new();
fresh.load_from_file(&path).unwrap();
assert!(!fresh.is_dirty());
fresh.import_profiles(vec![ModelProfile::new("seeded".into())]);
assert!(fresh.is_dirty());
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn ledger_captures_resolved_outcomes() {
let dir = std::env::temp_dir().join("car-outcome-ledger-test");
let _ = std::fs::remove_dir_all(&dir);
let path = dir.join("outcome_ledger.jsonl");
let mut tracker = OutcomeTracker::new();
let t1 = tracker.record_start("good-model", InferenceTask::Generate, "router:test");
tracker.record_complete(&t1, 1200, 50, 100);
tracker.record_inferred_outcome(&t1, InferredOutcome::Accepted { confidence: 0.9 });
let t2 = tracker.record_start("bad-model", InferenceTask::Code, "router:test");
tracker.record_failure(&t2, "boom: 500");
let drained = tracker.drain_ledger();
assert_eq!(drained.len(), 2);
assert!(tracker.drain_ledger().is_empty(), "drain clears the buffer");
append_ledger_entries(&path, &drained).unwrap();
let read = read_ledger(&path, 0);
assert_eq!(read.len(), 2);
let good = read.iter().find(|e| e.model_id == "good-model").unwrap();
assert_eq!(good.success, Some(true));
assert!(good.quality.is_some());
assert_eq!(good.routing_reason, "router:test");
assert_eq!(good.latency_ms, 1200);
let bad = read.iter().find(|e| e.model_id == "bad-model").unwrap();
assert_eq!(bad.success, Some(false));
assert_eq!(bad.error.as_deref(), Some("boom: 500"));
assert_eq!(read_ledger(&path, 1).len(), 1);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn ledger_redacts_long_errors_and_prunes() {
let dir = std::env::temp_dir().join("car-outcome-privacy-test");
let _ = std::fs::remove_dir_all(&dir);
let path = dir.join("outcome_ledger.jsonl");
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Generate, "r");
let huge = "x".repeat(5000);
tracker.record_failure(&t, &huge);
let drained = tracker.drain_ledger();
let err = drained[0].error.as_ref().unwrap();
assert!(
err.chars().count() <= MAX_LEDGER_ERROR_CHARS + 1,
"error truncated"
);
let entries: Vec<OutcomeLedgerEntry> = (0..10)
.map(|i| OutcomeLedgerEntry {
trace_id: format!("t{i}"),
model_id: "m".into(),
task: InferenceTask::Generate,
routing_reason: "r".into(),
latency_ms: 1,
input_tokens: 1,
output_tokens: 1,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 0,
success: Some(true),
quality: Some(1.0),
error: None,
project_id: None,
intent: None,
timestamp: i,
})
.collect();
append_ledger_entries(&path, &entries).unwrap();
prune_ledger(&path, 3).unwrap();
let kept = read_ledger(&path, 0);
assert_eq!(kept.len(), 3);
assert_eq!(kept[0].trace_id, "t7"); assert_eq!(kept[2].trace_id, "t9");
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn sweep_pending_credits_mechanical_success_or_inconclusive() {
let mut tracker = OutcomeTracker::new();
let t1 = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t1, 500, 10, 20);
let t2 = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t2, 300, 5, 0);
let _t3 = tracker.record_start("m", InferenceTask::Generate, "r");
let swept = tracker.sweep_pending_at(0, now_unix() + 10);
assert_eq!(swept, 3, "all pending entries evicted");
let mut receipts = tracker.drain_ledger();
receipts.sort_by_key(|r| r.latency_ms);
assert_eq!(receipts.len(), 2);
assert_eq!(receipts[0].latency_ms, 300);
assert_eq!(receipts[0].success, None);
assert_eq!(receipts[1].latency_ms, 500);
assert_eq!(receipts[1].success, Some(true));
assert_eq!(receipts[1].quality, None);
let p = tracker.profile("m").expect("profile exists");
assert_eq!(p.success_count, 1);
assert_eq!(p.ema_quality, 0.5);
}
#[test]
fn record_complete_credits_success_immediately() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t, 500, 12, 20);
let p = tracker.profile("m").expect("profile exists");
assert_eq!(p.success_count, 1, "success credited at completion");
assert_eq!(p.total_calls, 1);
assert_eq!(p.total_input_tokens, 12, "input tokens recorded, not 0");
assert_eq!(p.fail_count, 0);
tracker.sweep_pending_at(0, now_unix() + 10);
let p = tracker.profile("m").unwrap();
assert_eq!(p.success_count, 1, "sweep does not re-credit");
}
#[test]
fn success_rate_resolved_distinguishes_no_signal_from_measured() {
let mut p = ModelProfile::new("m".to_string());
assert_eq!(
p.success_rate_resolved(),
None,
"no resolved signal, not a fabricated 0.5"
);
p.success_count = 3;
p.fail_count = 1;
assert_eq!(
p.success_rate_resolved(),
Some(0.75),
"real rate once resolved"
);
}
#[test]
fn quality_observations_count_only_graded_signals() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t, 500, 12, 20);
let p = tracker.profile("m").unwrap();
assert_eq!(p.success_count, 1);
assert_eq!(
p.quality_observations, 0,
"mechanical success is not a graded quality observation"
);
assert!((p.ema_quality - 0.5).abs() < 1e-9, "EMA untouched by mechanical success");
tracker.record_inferred_outcome(&t, InferredOutcome::Accepted { confidence: 0.9 });
let p = tracker.profile("m").unwrap();
assert_eq!(
p.quality_observations, 1,
"graded accept signal counts"
);
assert!(p.ema_quality > 0.5, "graded accept moved the EMA up");
assert_eq!(
p.task_stats(InferenceTask::Generate).unwrap().quality_observations,
1,
"per-task graded count tracked too"
);
let t2 = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_failure(&t2, "boom");
let p = tracker.profile("m").unwrap();
assert_eq!(p.quality_observations, 2, "failure is a graded observation");
}
#[test]
fn record_complete_no_output_is_not_a_success() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t, 300, 5, 0); let p = tracker.profile("m").unwrap();
assert_eq!(p.success_count, 0, "no output -> no mechanical success");
assert_eq!(p.total_calls, 1);
}
#[test]
fn record_failure_counts_total_calls() {
let mut tracker = OutcomeTracker::new();
for _ in 0..3 {
let t = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_failure(&t, "boom 500");
}
let p = tracker.profile("m").unwrap();
assert_eq!(p.fail_count, 3);
assert_eq!(p.total_calls, 3, "failures counted in total_calls");
assert!(p.fail_count <= p.total_calls);
}
#[test]
fn transport_failures_are_quality_neutral() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "model produced malformed output");
let baseline_ema = tracker.profile("m").unwrap().ema_quality;
let baseline_qobs = tracker.profile("m").unwrap().quality_observations;
for _ in 0..5 {
let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "daemon read timeout on infer after 30s");
}
for _ in 0..5 {
let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(
&t,
"inference failed: Parslee org lookup failed: HTTP 401 Unauthorized: Authentication",
);
}
let p = tracker.profile("m").unwrap();
assert_eq!(p.fail_count, 11, "no-answer failures still counted for availability");
assert_eq!(p.total_calls, 11, "every call counted (failures included)");
assert!(
(p.ema_quality - baseline_ema).abs() < 1e-9,
"timeouts AND auth rejections must not move the quality EMA ({baseline_ema} -> {})",
p.ema_quality
);
assert_eq!(
p.quality_observations, baseline_qobs,
"no-answer failures (transport + auth) are not graded quality evidence"
);
let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "model produced malformed output");
assert!(
tracker.profile("m").unwrap().ema_quality < baseline_ema,
"a non-transport failure must still move the quality EMA down"
);
}
#[test]
fn is_no_answer_failure_precision_boundary() {
for s in [
"daemon read timeout on infer after 30s",
"connection reset by peer",
"upstream returned 503 service unavailable",
"HTTP 500 Internal Server Error",
"stream closed unexpectedly (eof)",
"HTTP 401 Unauthorized",
"unauthenticated request",
"HTTP 403 Forbidden",
"permission denied by gateway",
"Parslee org lookup failed: Authentication required",
"oauth: invalid_client",
"invalid api key",
"HTTP 400 Bad Request: max context length exceeded",
] {
assert!(
is_no_answer_failure(s),
"expected no-answer (quality-neutral): {s:?}"
);
}
for s in [
"model produced malformed output",
"field 'user' not found in model response", "tokenizer: invalid token id 99999", "response failed JSON schema validation",
"tool call arguments did not validate",
] {
assert!(
!is_no_answer_failure(s),
"expected generation failure (graded): {s:?}"
);
}
}
#[test]
fn ledger_quality_distinguishes_generation_from_transport_failure() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "model produced malformed output"); let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "daemon read timeout on infer after 30s"); let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "429 RESOURCE_EXHAUSTED"); let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "upstream: 503 service unavailable"); let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(&t, "HTTP 500 Internal Server Error"); let t = tracker.record_start("m", InferenceTask::Code, "r");
tracker.record_failure(
&t,
"inference failed: Parslee org lookup failed: HTTP 401 Unauthorized: Authentication",
);
let entries = tracker.drain_ledger();
assert_eq!(entries.len(), 6);
assert!(entries.iter().all(|e| e.success == Some(false)));
assert_eq!(
entries[0].quality,
Some(0.0),
"generation failure -> graded 0.0 answer quality"
);
assert_eq!(
entries[1].quality, None,
"transport failure produced no answer -> quality unknown (None)"
);
assert_eq!(
entries[2].quality, None,
"429 produced no answer -> quality unknown (None), an availability event"
);
assert_eq!(
entries[3].quality, None,
"5xx infra failure produced no answer -> None"
);
assert_eq!(
entries[4].quality, None,
"an 'internal server error' (500) is a server fault, not a bad answer -> None"
);
assert_eq!(
entries[5].quality, None,
"a 401 auth rejection produced no answer -> None (not a bad-answer 0.0)"
);
}
#[test]
fn real_failure_signal_reclassifies_mechanical_success() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t, 100, 8, 15);
assert_eq!(tracker.profile("m").unwrap().success_count, 1);
tracker.record_inferred_outcome(&t, InferredOutcome::Rejected { confidence: 0.9 });
let p = tracker.profile("m").unwrap();
assert_eq!(p.success_count, 0, "mechanical success undone");
assert_eq!(p.fail_count, 1, "failure booked");
}
#[test]
fn real_success_signal_does_not_double_count() {
let mut tracker = OutcomeTracker::new();
let t = tracker.record_start("m", InferenceTask::Generate, "r");
tracker.record_complete(&t, 100, 8, 15);
tracker.record_inferred_outcome(&t, InferredOutcome::Accepted { confidence: 0.9 });
let p = tracker.profile("m").unwrap();
assert_eq!(
p.success_count, 1,
"Accepted on an already-credited call is not +2"
);
}
#[test]
fn export_import() {
let mut tracker = OutcomeTracker::new();
let trace = tracker.record_start("m1", InferenceTask::Generate, "test");
tracker.record_complete(&trace, 100, 10, 5);
tracker.record_inferred_outcome(&trace, InferredOutcome::Accepted { confidence: 0.9 });
let exported = tracker.export_profiles();
assert_eq!(exported.len(), 1);
let mut tracker2 = OutcomeTracker::new();
tracker2.import_profiles(exported);
assert!(tracker2.profile("m1").is_some());
}
}