use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencyDistribution {
pub prefill: LatencyStats,
pub decode: LatencyStats,
pub routing: LatencyStats,
pub adapter_swap: LatencyStats,
pub micro_lora_adapt: LatencyStats,
pub consolidation: LatencyStats,
pub end_to_end: LatencyStats,
}
impl LatencyDistribution {
pub fn new() -> Self {
Self {
prefill: LatencyStats::new(),
decode: LatencyStats::new(),
routing: LatencyStats::new(),
adapter_swap: LatencyStats::new(),
micro_lora_adapt: LatencyStats::new(),
consolidation: LatencyStats::new(),
end_to_end: LatencyStats::new(),
}
}
pub fn summary(&self) -> String {
format!(
"E2E: p50={:.1}ms p95={:.1}ms p99={:.1}ms | Prefill: {:.1}ms | Decode: {:.3}ms/tok | Route: {:.2}ms",
self.end_to_end.p50() * 1000.0,
self.end_to_end.p95() * 1000.0,
self.end_to_end.p99() * 1000.0,
self.prefill.p50() * 1000.0,
self.decode.p50() * 1000.0,
self.routing.p50() * 1000.0,
)
}
}
impl Default for LatencyDistribution {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencyStats {
samples: Vec<f64>,
}
impl LatencyStats {
pub fn new() -> Self {
Self {
samples: Vec::new(),
}
}
pub fn add(&mut self, duration: Duration) {
self.samples.push(duration.as_secs_f64());
}
pub fn add_secs(&mut self, secs: f64) {
self.samples.push(secs);
}
fn percentile(&self, p: f64) -> f64 {
if self.samples.is_empty() {
return 0.0;
}
let mut sorted = self.samples.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let idx = ((p / 100.0) * (sorted.len() - 1) as f64).round() as usize;
sorted[idx.min(sorted.len() - 1)]
}
pub fn p50(&self) -> f64 {
self.percentile(50.0)
}
pub fn p95(&self) -> f64 {
self.percentile(95.0)
}
pub fn p99(&self) -> f64 {
self.percentile(99.0)
}
pub fn mean(&self) -> f64 {
if self.samples.is_empty() {
return 0.0;
}
self.samples.iter().sum::<f64>() / self.samples.len() as f64
}
pub fn count(&self) -> usize {
self.samples.len()
}
pub fn min(&self) -> f64 {
self.samples.iter().copied().fold(f64::INFINITY, f64::min)
}
pub fn max(&self) -> f64 {
self.samples
.iter()
.copied()
.fold(f64::NEG_INFINITY, f64::max)
}
}
impl Default for LatencyStats {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostTracker {
pub input_tokens: u64,
pub output_tokens: u64,
pub tool_calls: u64,
pub retries: u64,
pub input_cost_per_million: f64,
pub output_cost_per_million: f64,
pub tool_call_cost: f64,
}
impl CostTracker {
pub fn with_claude_pricing() -> Self {
Self {
input_tokens: 0,
output_tokens: 0,
tool_calls: 0,
retries: 0,
input_cost_per_million: 3.0,
output_cost_per_million: 15.0,
tool_call_cost: 0.0,
}
}
pub fn with_haiku_pricing() -> Self {
Self {
input_tokens: 0,
output_tokens: 0,
tool_calls: 0,
retries: 0,
input_cost_per_million: 0.25,
output_cost_per_million: 1.25,
tool_call_cost: 0.0,
}
}
pub fn with_opus_pricing() -> Self {
Self {
input_tokens: 0,
output_tokens: 0,
tool_calls: 0,
retries: 0,
input_cost_per_million: 15.0,
output_cost_per_million: 75.0,
tool_call_cost: 0.0,
}
}
pub fn total_cost(&self) -> f64 {
let input_cost = (self.input_tokens as f64 / 1_000_000.0) * self.input_cost_per_million;
let output_cost = (self.output_tokens as f64 / 1_000_000.0) * self.output_cost_per_million;
let tool_cost = self.tool_calls as f64 * self.tool_call_cost;
input_cost + output_cost + tool_cost
}
pub fn cost_per_token(&self) -> f64 {
let total_tokens = self.input_tokens + self.output_tokens;
if total_tokens == 0 {
return 0.0;
}
self.total_cost() / total_tokens as f64
}
pub fn add(&mut self, other: &CostTracker) {
self.input_tokens += other.input_tokens;
self.output_tokens += other.output_tokens;
self.tool_calls += other.tool_calls;
self.retries += other.retries;
}
}
impl Default for CostTracker {
fn default() -> Self {
Self::with_claude_pricing()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StabilityMetrics {
pub total_requests: u64,
pub successful_requests: u64,
pub failed_requests: u64,
pub oom_events: u64,
pub timeout_events: u64,
pub queue_times: LatencyStats,
pub throughput_samples: VecDeque<f64>,
pub max_concurrent: u64,
pub current_concurrent: u64,
}
impl StabilityMetrics {
pub fn new() -> Self {
Self {
total_requests: 0,
successful_requests: 0,
failed_requests: 0,
oom_events: 0,
timeout_events: 0,
queue_times: LatencyStats::new(),
throughput_samples: VecDeque::with_capacity(100),
max_concurrent: 0,
current_concurrent: 0,
}
}
pub fn success_rate(&self) -> f64 {
if self.total_requests == 0 {
return 1.0;
}
self.successful_requests as f64 / self.total_requests as f64
}
pub fn failure_rate(&self) -> f64 {
1.0 - self.success_rate()
}
pub fn oom_rate(&self) -> f64 {
if self.total_requests == 0 {
return 0.0;
}
self.oom_events as f64 / self.total_requests as f64
}
pub fn avg_throughput(&self) -> f64 {
if self.throughput_samples.is_empty() {
return 0.0;
}
self.throughput_samples.iter().sum::<f64>() / self.throughput_samples.len() as f64
}
pub fn record_throughput(&mut self, requests_per_second: f64) {
if self.throughput_samples.len() >= 100 {
self.throughput_samples.pop_front();
}
self.throughput_samples.push_back(requests_per_second);
}
pub fn is_stable(&self) -> bool {
self.success_rate() > 0.95 && self.oom_rate() < 0.01 && self.queue_times.p95() < 5.0
}
}
impl Default for StabilityMetrics {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EconomicsMetrics {
pub latency: LatencyDistribution,
pub cost: CostTracker,
pub stability: StabilityMetrics,
pub successful_tasks: u64,
pub cost_per_accepted_patch: f64,
}
impl EconomicsMetrics {
pub fn new() -> Self {
Self {
latency: LatencyDistribution::new(),
cost: CostTracker::with_claude_pricing(),
stability: StabilityMetrics::new(),
successful_tasks: 0,
cost_per_accepted_patch: 0.0,
}
}
pub fn recalculate(&mut self) {
if self.successful_tasks > 0 {
self.cost_per_accepted_patch = self.cost.total_cost() / self.successful_tasks as f64;
}
}
pub fn is_economical(&self, target_cost: f64) -> bool {
self.cost_per_accepted_patch <= target_cost && self.stability.is_stable()
}
pub fn summary(&self) -> String {
format!(
"Cost/patch: ${:.4} | Total: ${:.2} | Success: {}/{} | {}",
self.cost_per_accepted_patch,
self.cost.total_cost(),
self.successful_tasks,
self.stability.total_requests,
self.latency.summary(),
)
}
}
impl Default for EconomicsMetrics {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_latency_percentiles() {
let mut stats = LatencyStats::new();
for i in 1..=100 {
stats.add_secs(i as f64 / 1000.0); }
assert!((stats.p50() - 0.050).abs() < 0.002);
assert!((stats.p95() - 0.095).abs() < 0.002);
assert!((stats.p99() - 0.099).abs() < 0.002);
}
#[test]
fn test_cost_calculation() {
let mut tracker = CostTracker::with_claude_pricing();
tracker.input_tokens = 1_000_000; tracker.output_tokens = 100_000;
let cost = tracker.total_cost();
assert!((cost - 4.50).abs() < 0.01);
}
#[test]
fn test_cost_per_accepted_patch() {
let mut metrics = EconomicsMetrics::new();
metrics.cost.input_tokens = 10_000_000; metrics.cost.output_tokens = 1_000_000; metrics.successful_tasks = 100;
metrics.recalculate();
assert!((metrics.cost_per_accepted_patch - 0.45).abs() < 0.01);
}
#[test]
fn test_stability_rates() {
let mut stability = StabilityMetrics::new();
stability.total_requests = 100;
stability.successful_requests = 95;
stability.failed_requests = 5;
stability.oom_events = 1;
assert!((stability.success_rate() - 0.95).abs() < 0.001);
assert!((stability.oom_rate() - 0.01).abs() < 0.001);
}
#[test]
fn test_haiku_vs_opus_pricing() {
let haiku = CostTracker::with_haiku_pricing();
let opus = CostTracker::with_opus_pricing();
assert!(opus.input_cost_per_million / haiku.input_cost_per_million > 50.0);
}
}