use parking_lot::Mutex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CallRecord {
pub model: String,
pub cost_usd: f64,
pub latency: Duration,
pub timestamp: SystemTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RunRecord {
pub name: String,
pub calls: Vec<CallRecord>,
pub started_at: SystemTime,
pub ended_at: SystemTime,
}
impl RunRecord {
pub fn total_cost_usd(&self) -> f64 {
self.calls.iter().map(|c| c.cost_usd).sum()
}
pub fn total_latency(&self) -> Duration {
self.calls.iter().map(|c| c.latency).sum()
}
pub fn call_count(&self) -> usize {
self.calls.len()
}
}
pub struct RunHandle {
name: String,
started_at: SystemTime,
calls: Mutex<Vec<CallRecord>>,
parent: Arc<TracerInner>,
}
impl RunHandle {
pub fn record(&self, model: impl Into<String>, cost_usd: f64, latency: Duration) {
self.calls.lock().push(CallRecord {
model: model.into(),
cost_usd,
latency,
timestamp: SystemTime::now(),
});
}
pub fn finish(self) -> RunRecord {
let calls = std::mem::take(&mut *self.calls.lock());
let rec = RunRecord {
name: self.name.clone(),
calls,
started_at: self.started_at,
ended_at: SystemTime::now(),
};
self.parent.runs.lock().push(rec.clone());
rec
}
}
#[derive(Debug, Clone, Default)]
pub struct ModelBreakdown {
pub calls: usize,
pub cost_usd: f64,
pub total_latency: Duration,
}
#[derive(Debug, Clone, Default)]
pub struct Aggregate {
pub runs: usize,
pub calls: usize,
pub total_cost_usd: f64,
pub p50_latency: Duration,
pub p95_latency: Duration,
pub by_model: HashMap<String, ModelBreakdown>,
}
struct TracerInner {
runs: Mutex<Vec<RunRecord>>,
}
#[derive(Clone)]
pub struct Tracer {
inner: Arc<TracerInner>,
}
impl Default for Tracer {
fn default() -> Self {
Self::new()
}
}
impl Tracer {
pub fn new() -> Self {
Self {
inner: Arc::new(TracerInner {
runs: Mutex::new(Vec::new()),
}),
}
}
pub fn run(&self, name: impl Into<String>) -> RunHandle {
RunHandle {
name: name.into(),
started_at: SystemTime::now(),
calls: Mutex::new(Vec::new()),
parent: self.inner.clone(),
}
}
pub fn runs(&self) -> Vec<RunRecord> {
self.inner.runs.lock().clone()
}
pub fn reset(&self) {
self.inner.runs.lock().clear();
}
pub fn aggregate(&self) -> Aggregate {
let runs = self.runs();
if runs.is_empty() {
return Aggregate::default();
}
let calls: Vec<&CallRecord> = runs.iter().flat_map(|r| r.calls.iter()).collect();
let mut latencies_us: Vec<u128> = calls.iter().map(|c| c.latency.as_micros()).collect();
latencies_us.sort_unstable();
let p = |percent: f64| -> Duration {
if latencies_us.is_empty() {
return Duration::ZERO;
}
let idx = ((latencies_us.len() as f64 - 1.0) * percent).round() as usize;
Duration::from_micros(latencies_us[idx] as u64)
};
let mut by_model: HashMap<String, ModelBreakdown> = HashMap::new();
for c in &calls {
let entry = by_model.entry(c.model.clone()).or_default();
entry.calls += 1;
entry.cost_usd += c.cost_usd;
entry.total_latency += c.latency;
}
Aggregate {
runs: runs.len(),
calls: calls.len(),
total_cost_usd: calls.iter().map(|c| c.cost_usd).sum(),
p50_latency: p(0.50),
p95_latency: p(0.95),
by_model,
}
}
}