Skip to main content

entrenar/monitor/llm/
stats.rs

1//! Aggregate LLM statistics.
2
3use crate::monitor::llm::LLMMetrics;
4use serde::{Deserialize, Serialize};
5
6/// Aggregate LLM statistics
7#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8pub struct LLMStats {
9    /// Number of calls
10    pub n_calls: usize,
11    /// Total tokens used
12    pub total_tokens: u64,
13    /// Total prompt tokens
14    pub total_prompt_tokens: u64,
15    /// Total completion tokens
16    pub total_completion_tokens: u64,
17    /// Total estimated cost
18    pub total_cost: f64,
19    /// Average latency
20    pub avg_latency_ms: f64,
21    /// Average tokens per second
22    pub avg_tokens_per_second: f64,
23    /// P50 latency
24    pub p50_latency_ms: f64,
25    /// P95 latency
26    pub p95_latency_ms: f64,
27    /// P99 latency
28    pub p99_latency_ms: f64,
29}
30
31impl LLMStats {
32    /// Compute stats from metrics
33    pub fn from_metrics(metrics: &[LLMMetrics]) -> Self {
34        if metrics.is_empty() {
35            return Self::default();
36        }
37
38        let n = metrics.len();
39        let total_tokens: u64 = metrics.iter().map(|m| u64::from(m.total_tokens)).sum();
40        let total_prompt: u64 = metrics.iter().map(|m| u64::from(m.prompt_tokens)).sum();
41        let total_completion: u64 = metrics.iter().map(|m| u64::from(m.completion_tokens)).sum();
42        let total_cost: f64 =
43            metrics.iter().map(|m| m.cost_usd.unwrap_or_else(|| m.estimate_cost())).sum();
44
45        let avg_latency: f64 = metrics.iter().map(|m| m.latency_ms).sum::<f64>() / n as f64;
46        let avg_tps: f64 = metrics.iter().map(|m| m.tokens_per_second).sum::<f64>() / n as f64;
47
48        // Compute percentiles
49        let mut latencies: Vec<f64> = metrics.iter().map(|m| m.latency_ms).collect();
50        latencies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
51
52        let p50 = percentile(&latencies, 50.0);
53        let p95 = percentile(&latencies, 95.0);
54        let p99 = percentile(&latencies, 99.0);
55
56        Self {
57            n_calls: n,
58            total_tokens,
59            total_prompt_tokens: total_prompt,
60            total_completion_tokens: total_completion,
61            total_cost,
62            avg_latency_ms: avg_latency,
63            avg_tokens_per_second: avg_tps,
64            p50_latency_ms: p50,
65            p95_latency_ms: p95,
66            p99_latency_ms: p99,
67        }
68    }
69}
70
71/// Compute percentile from sorted array
72pub fn percentile(sorted: &[f64], p: f64) -> f64 {
73    if sorted.is_empty() {
74        return 0.0;
75    }
76    let idx = ((p / 100.0) * (sorted.len() - 1) as f64).round() as usize;
77    sorted[idx.min(sorted.len() - 1)]
78}