entrenar/monitor/llm/
stats.rs1use crate::monitor::llm::LLMMetrics;
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, Default, Serialize, Deserialize)]
8pub struct LLMStats {
9 pub n_calls: usize,
11 pub total_tokens: u64,
13 pub total_prompt_tokens: u64,
15 pub total_completion_tokens: u64,
17 pub total_cost: f64,
19 pub avg_latency_ms: f64,
21 pub avg_tokens_per_second: f64,
23 pub p50_latency_ms: f64,
25 pub p95_latency_ms: f64,
27 pub p99_latency_ms: f64,
29}
30
31impl LLMStats {
32 pub fn from_metrics(metrics: &[LLMMetrics]) -> Self {
34 if metrics.is_empty() {
35 return Self::default();
36 }
37
38 let n = metrics.len();
39 let total_tokens: u64 = metrics.iter().map(|m| u64::from(m.total_tokens)).sum();
40 let total_prompt: u64 = metrics.iter().map(|m| u64::from(m.prompt_tokens)).sum();
41 let total_completion: u64 = metrics.iter().map(|m| u64::from(m.completion_tokens)).sum();
42 let total_cost: f64 =
43 metrics.iter().map(|m| m.cost_usd.unwrap_or_else(|| m.estimate_cost())).sum();
44
45 let avg_latency: f64 = metrics.iter().map(|m| m.latency_ms).sum::<f64>() / n as f64;
46 let avg_tps: f64 = metrics.iter().map(|m| m.tokens_per_second).sum::<f64>() / n as f64;
47
48 let mut latencies: Vec<f64> = metrics.iter().map(|m| m.latency_ms).collect();
50 latencies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
51
52 let p50 = percentile(&latencies, 50.0);
53 let p95 = percentile(&latencies, 95.0);
54 let p99 = percentile(&latencies, 99.0);
55
56 Self {
57 n_calls: n,
58 total_tokens,
59 total_prompt_tokens: total_prompt,
60 total_completion_tokens: total_completion,
61 total_cost,
62 avg_latency_ms: avg_latency,
63 avg_tokens_per_second: avg_tps,
64 p50_latency_ms: p50,
65 p95_latency_ms: p95,
66 p99_latency_ms: p99,
67 }
68 }
69}
70
71pub fn percentile(sorted: &[f64], p: f64) -> f64 {
73 if sorted.is_empty() {
74 return 0.0;
75 }
76 let idx = ((p / 100.0) * (sorted.len() - 1) as f64).round() as usize;
77 sorted[idx.min(sorted.len() - 1)]
78}