Skip to main content

vectorless/metrics/
llm.rs

1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! LLM metrics collection.
5
6use std::sync::atomic::{AtomicU64, Ordering};
7
8use crate::config::LlmMetricsConfig;
9
10/// LLM metrics tracker.
11#[derive(Debug, Default)]
12pub struct LlmMetrics {
13    /// Total number of LLM calls.
14    pub total_calls: AtomicU64,
15    /// Number of successful calls.
16    pub successful_calls: AtomicU64,
17    /// Number of failed calls.
18    pub failed_calls: AtomicU64,
19    /// Total input tokens.
20    pub total_input_tokens: AtomicU64,
21    /// Total output tokens.
22    pub total_output_tokens: AtomicU64,
23    /// Total latency in milliseconds.
24    pub total_latency_ms: AtomicU64,
25    /// Estimated cost in micro-dollars.
26    pub estimated_cost_micros: AtomicU64,
27    /// Number of rate limit errors.
28    pub rate_limit_errors: AtomicU64,
29    /// Number of timeout errors.
30    pub timeout_errors: AtomicU64,
31    /// Number of fallback triggers.
32    pub fallback_triggers: AtomicU64,
33}
34
35impl LlmMetrics {
36    /// Create new LLM metrics.
37    pub fn new() -> Self {
38        Self::default()
39    }
40
41    /// Record an LLM call.
42    pub fn record_call(
43        &self,
44        input_tokens: u64,
45        output_tokens: u64,
46        latency_ms: u64,
47        success: bool,
48        config: &LlmMetricsConfig,
49    ) {
50        self.total_calls.fetch_add(1, Ordering::Relaxed);
51
52        if success {
53            self.successful_calls.fetch_add(1, Ordering::Relaxed);
54        } else {
55            self.failed_calls.fetch_add(1, Ordering::Relaxed);
56        }
57
58        if config.track_tokens {
59            self.total_input_tokens
60                .fetch_add(input_tokens, Ordering::Relaxed);
61            self.total_output_tokens
62                .fetch_add(output_tokens, Ordering::Relaxed);
63        }
64
65        if config.track_latency {
66            self.total_latency_ms
67                .fetch_add(latency_ms, Ordering::Relaxed);
68        }
69
70        if config.track_cost {
71            let cost = config.calculate_cost(input_tokens, output_tokens);
72            // Store in micro-dollars for precision
73            let cost_micros = (cost * 1_000_000.0) as u64;
74            self.estimated_cost_micros
75                .fetch_add(cost_micros, Ordering::Relaxed);
76        }
77    }
78
79    /// Record a rate limit error.
80    pub fn record_rate_limit(&self) {
81        self.rate_limit_errors.fetch_add(1, Ordering::Relaxed);
82    }
83
84    /// Record a timeout error.
85    pub fn record_timeout(&self) {
86        self.timeout_errors.fetch_add(1, Ordering::Relaxed);
87    }
88
89    /// Record a fallback trigger.
90    pub fn record_fallback(&self) {
91        self.fallback_triggers.fetch_add(1, Ordering::Relaxed);
92    }
93
94    /// Reset all metrics.
95    pub fn reset(&self) {
96        self.total_calls.store(0, Ordering::Relaxed);
97        self.successful_calls.store(0, Ordering::Relaxed);
98        self.failed_calls.store(0, Ordering::Relaxed);
99        self.total_input_tokens.store(0, Ordering::Relaxed);
100        self.total_output_tokens.store(0, Ordering::Relaxed);
101        self.total_latency_ms.store(0, Ordering::Relaxed);
102        self.estimated_cost_micros.store(0, Ordering::Relaxed);
103        self.rate_limit_errors.store(0, Ordering::Relaxed);
104        self.timeout_errors.store(0, Ordering::Relaxed);
105        self.fallback_triggers.store(0, Ordering::Relaxed);
106    }
107
108    /// Generate a report snapshot.
109    pub fn generate_report(&self) -> LlmMetricsReport {
110        let total_calls = self.total_calls.load(Ordering::Relaxed);
111        let successful = self.successful_calls.load(Ordering::Relaxed);
112        let failed = self.failed_calls.load(Ordering::Relaxed);
113        let total_latency = self.total_latency_ms.load(Ordering::Relaxed);
114
115        LlmMetricsReport {
116            total_calls,
117            successful_calls: successful,
118            failed_calls: failed,
119            success_rate: if total_calls > 0 {
120                successful as f64 / total_calls as f64
121            } else {
122                0.0
123            },
124            total_input_tokens: self.total_input_tokens.load(Ordering::Relaxed),
125            total_output_tokens: self.total_output_tokens.load(Ordering::Relaxed),
126            total_tokens: self.total_input_tokens.load(Ordering::Relaxed)
127                + self.total_output_tokens.load(Ordering::Relaxed),
128            avg_latency_ms: if total_calls > 0 {
129                total_latency as f64 / total_calls as f64
130            } else {
131                0.0
132            },
133            total_latency_ms: total_latency,
134            estimated_cost_usd: self.estimated_cost_micros.load(Ordering::Relaxed) as f64
135                / 1_000_000.0,
136            rate_limit_errors: self.rate_limit_errors.load(Ordering::Relaxed),
137            timeout_errors: self.timeout_errors.load(Ordering::Relaxed),
138            fallback_triggers: self.fallback_triggers.load(Ordering::Relaxed),
139        }
140    }
141}
142
143/// LLM metrics report.
144#[derive(Debug, Clone)]
145pub struct LlmMetricsReport {
146    /// Total number of LLM calls.
147    pub total_calls: u64,
148    /// Number of successful calls.
149    pub successful_calls: u64,
150    /// Number of failed calls.
151    pub failed_calls: u64,
152    /// Success rate (0.0 - 1.0).
153    pub success_rate: f64,
154    /// Total input tokens.
155    pub total_input_tokens: u64,
156    /// Total output tokens.
157    pub total_output_tokens: u64,
158    /// Total tokens (input + output).
159    pub total_tokens: u64,
160    /// Average latency in milliseconds.
161    pub avg_latency_ms: f64,
162    /// Total latency in milliseconds.
163    pub total_latency_ms: u64,
164    /// Estimated cost in USD.
165    pub estimated_cost_usd: f64,
166    /// Number of rate limit errors.
167    pub rate_limit_errors: u64,
168    /// Number of timeout errors.
169    pub timeout_errors: u64,
170    /// Number of fallback triggers.
171    pub fallback_triggers: u64,
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_llm_metrics_recording() {
180        let config = LlmMetricsConfig::default();
181        let metrics = LlmMetrics::new();
182
183        metrics.record_call(100, 50, 150, true, &config);
184        metrics.record_call(200, 100, 300, true, &config);
185        metrics.record_call(100, 0, 0, false, &config);
186
187        let report = metrics.generate_report();
188        assert_eq!(report.total_calls, 3);
189        assert_eq!(report.successful_calls, 2);
190        assert_eq!(report.failed_calls, 1);
191        assert!((report.success_rate - 0.666666).abs() < 0.01);
192        assert_eq!(report.total_input_tokens, 400);
193        assert_eq!(report.total_output_tokens, 150);
194    }
195
196    #[test]
197    fn test_llm_metrics_reset() {
198        let config = LlmMetricsConfig::default();
199        let metrics = LlmMetrics::new();
200
201        metrics.record_call(100, 50, 150, true, &config);
202        metrics.reset();
203
204        let report = metrics.generate_report();
205        assert_eq!(report.total_calls, 0);
206    }
207}