llm_edge_cache/
metrics.rs

1//! Cache metrics tracking and reporting
2//!
3//! Tracks cache performance metrics including hit rates, latencies, and sizes.
4//! Integrates with Prometheus for monitoring.
5
6use metrics::{counter, gauge, histogram};
7use std::sync::atomic::{AtomicU64, Ordering};
8use std::sync::Arc;
9use std::time::{Duration, Instant};
10
11/// Cache tier identifier
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CacheTier {
14    L1,
15    L2,
16    L3,
17}
18
19impl CacheTier {
20    pub fn as_str(&self) -> &'static str {
21        match self {
22            CacheTier::L1 => "l1",
23            CacheTier::L2 => "l2",
24            CacheTier::L3 => "l3",
25        }
26    }
27}
28
29/// Cache operation type
30#[derive(Debug, Clone, Copy)]
31pub enum CacheOperation {
32    Hit,
33    Miss,
34    Write,
35    Delete,
36}
37
38/// Metrics collector for cache operations
39#[derive(Debug, Clone)]
40pub struct CacheMetrics {
41    // L1 metrics
42    l1_hits: Arc<AtomicU64>,
43    l1_misses: Arc<AtomicU64>,
44    l1_writes: Arc<AtomicU64>,
45
46    // L2 metrics
47    l2_hits: Arc<AtomicU64>,
48    l2_misses: Arc<AtomicU64>,
49    l2_writes: Arc<AtomicU64>,
50
51    // Overall metrics
52    total_requests: Arc<AtomicU64>,
53}
54
55impl CacheMetrics {
56    /// Create a new metrics collector
57    pub fn new() -> Self {
58        Self {
59            l1_hits: Arc::new(AtomicU64::new(0)),
60            l1_misses: Arc::new(AtomicU64::new(0)),
61            l1_writes: Arc::new(AtomicU64::new(0)),
62            l2_hits: Arc::new(AtomicU64::new(0)),
63            l2_misses: Arc::new(AtomicU64::new(0)),
64            l2_writes: Arc::new(AtomicU64::new(0)),
65            total_requests: Arc::new(AtomicU64::new(0)),
66        }
67    }
68
69    /// Record a cache operation
70    pub fn record_operation(&self, tier: CacheTier, operation: CacheOperation) {
71        match (tier, operation) {
72            (CacheTier::L1, CacheOperation::Hit) => {
73                self.l1_hits.fetch_add(1, Ordering::Relaxed);
74                counter!("llm_edge_cache_hits_total", "tier" => "l1").increment(1);
75            }
76            (CacheTier::L1, CacheOperation::Miss) => {
77                self.l1_misses.fetch_add(1, Ordering::Relaxed);
78                counter!("llm_edge_cache_misses_total", "tier" => "l1").increment(1);
79            }
80            (CacheTier::L1, CacheOperation::Write) => {
81                self.l1_writes.fetch_add(1, Ordering::Relaxed);
82                counter!("llm_edge_cache_writes_total", "tier" => "l1").increment(1);
83            }
84            (CacheTier::L2, CacheOperation::Hit) => {
85                self.l2_hits.fetch_add(1, Ordering::Relaxed);
86                counter!("llm_edge_cache_hits_total", "tier" => "l2").increment(1);
87            }
88            (CacheTier::L2, CacheOperation::Miss) => {
89                self.l2_misses.fetch_add(1, Ordering::Relaxed);
90                counter!("llm_edge_cache_misses_total", "tier" => "l2").increment(1);
91            }
92            (CacheTier::L2, CacheOperation::Write) => {
93                self.l2_writes.fetch_add(1, Ordering::Relaxed);
94                counter!("llm_edge_cache_writes_total", "tier" => "l2").increment(1);
95            }
96            (CacheTier::L3, CacheOperation::Hit) => {
97                counter!("llm_edge_cache_hits_total", "tier" => "l3").increment(1);
98            }
99            (CacheTier::L3, CacheOperation::Miss) => {
100                counter!("llm_edge_cache_misses_total", "tier" => "l3").increment(1);
101            }
102            (CacheTier::L3, CacheOperation::Write) => {
103                counter!("llm_edge_cache_writes_total", "tier" => "l3").increment(1);
104            }
105            _ => {}
106        }
107    }
108
109    /// Record cache lookup latency
110    pub fn record_latency(&self, tier: CacheTier, duration: Duration) {
111        let latency_ms = duration.as_secs_f64() * 1000.0;
112        histogram!(
113            "llm_edge_cache_latency_ms",
114            "tier" => tier.as_str()
115        )
116        .record(latency_ms);
117    }
118
119    /// Record a request (for overall metrics)
120    pub fn record_request(&self) {
121        self.total_requests.fetch_add(1, Ordering::Relaxed);
122        counter!("llm_edge_requests_total").increment(1);
123    }
124
125    /// Update cache size gauge
126    pub fn update_cache_size(&self, tier: CacheTier, size: u64) {
127        gauge!(
128            "llm_edge_cache_size_entries",
129            "tier" => tier.as_str()
130        )
131        .set(size as f64);
132    }
133
134    /// Update cache memory usage
135    pub fn update_cache_memory(&self, tier: CacheTier, bytes: u64) {
136        gauge!(
137            "llm_edge_cache_memory_bytes",
138            "tier" => tier.as_str()
139        )
140        .set(bytes as f64);
141    }
142
143    /// Calculate L1 hit rate
144    pub fn l1_hit_rate(&self) -> f64 {
145        let hits = self.l1_hits.load(Ordering::Relaxed);
146        let misses = self.l1_misses.load(Ordering::Relaxed);
147        let total = hits + misses;
148
149        if total == 0 {
150            0.0
151        } else {
152            (hits as f64) / (total as f64)
153        }
154    }
155
156    /// Calculate L2 hit rate
157    pub fn l2_hit_rate(&self) -> f64 {
158        let hits = self.l2_hits.load(Ordering::Relaxed);
159        let misses = self.l2_misses.load(Ordering::Relaxed);
160        let total = hits + misses;
161
162        if total == 0 {
163            0.0
164        } else {
165            (hits as f64) / (total as f64)
166        }
167    }
168
169    /// Calculate overall cache hit rate (L1 + L2)
170    pub fn overall_hit_rate(&self) -> f64 {
171        let l1_hits = self.l1_hits.load(Ordering::Relaxed);
172        let l2_hits = self.l2_hits.load(Ordering::Relaxed);
173        let l1_misses = self.l1_misses.load(Ordering::Relaxed);
174
175        let total_hits = l1_hits + l2_hits;
176        let total_requests = l1_hits + l1_misses; // L1 sees all requests
177
178        if total_requests == 0 {
179            0.0
180        } else {
181            (total_hits as f64) / (total_requests as f64)
182        }
183    }
184
185    /// Get total number of requests
186    pub fn total_requests(&self) -> u64 {
187        self.total_requests.load(Ordering::Relaxed)
188    }
189
190    /// Get snapshot of current metrics
191    pub fn snapshot(&self) -> MetricsSnapshot {
192        MetricsSnapshot {
193            l1_hits: self.l1_hits.load(Ordering::Relaxed),
194            l1_misses: self.l1_misses.load(Ordering::Relaxed),
195            l1_writes: self.l1_writes.load(Ordering::Relaxed),
196            l2_hits: self.l2_hits.load(Ordering::Relaxed),
197            l2_misses: self.l2_misses.load(Ordering::Relaxed),
198            l2_writes: self.l2_writes.load(Ordering::Relaxed),
199            total_requests: self.total_requests.load(Ordering::Relaxed),
200        }
201    }
202}
203
204impl Default for CacheMetrics {
205    fn default() -> Self {
206        Self::new()
207    }
208}
209
210/// Snapshot of cache metrics at a point in time
211#[derive(Debug, Clone, Copy)]
212pub struct MetricsSnapshot {
213    pub l1_hits: u64,
214    pub l1_misses: u64,
215    pub l1_writes: u64,
216    pub l2_hits: u64,
217    pub l2_misses: u64,
218    pub l2_writes: u64,
219    pub total_requests: u64,
220}
221
222impl MetricsSnapshot {
223    pub fn l1_hit_rate(&self) -> f64 {
224        let total = self.l1_hits + self.l1_misses;
225        if total == 0 {
226            0.0
227        } else {
228            (self.l1_hits as f64) / (total as f64)
229        }
230    }
231
232    pub fn l2_hit_rate(&self) -> f64 {
233        let total = self.l2_hits + self.l2_misses;
234        if total == 0 {
235            0.0
236        } else {
237            (self.l2_hits as f64) / (total as f64)
238        }
239    }
240
241    pub fn overall_hit_rate(&self) -> f64 {
242        let total_hits = self.l1_hits + self.l2_hits;
243        let total_requests = self.l1_hits + self.l1_misses;
244
245        if total_requests == 0 {
246            0.0
247        } else {
248            (total_hits as f64) / (total_requests as f64)
249        }
250    }
251}
252
253/// Helper to measure operation latency
254pub struct LatencyTimer {
255    start: Instant,
256    tier: CacheTier,
257    metrics: CacheMetrics,
258}
259
260impl LatencyTimer {
261    pub fn new(tier: CacheTier, metrics: CacheMetrics) -> Self {
262        Self {
263            start: Instant::now(),
264            tier,
265            metrics,
266        }
267    }
268
269    pub fn finish(self) {
270        let duration = self.start.elapsed();
271        self.metrics.record_latency(self.tier, duration);
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    #[test]
280    fn test_metrics_recording() {
281        let metrics = CacheMetrics::new();
282
283        metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
284        metrics.record_operation(CacheTier::L1, CacheOperation::Miss);
285        metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
286
287        assert_eq!(metrics.l1_hits.load(Ordering::Relaxed), 2);
288        assert_eq!(metrics.l1_misses.load(Ordering::Relaxed), 1);
289    }
290
291    #[test]
292    fn test_hit_rate_calculation() {
293        let metrics = CacheMetrics::new();
294
295        // Record 7 hits and 3 misses = 70% hit rate
296        for _ in 0..7 {
297            metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
298        }
299        for _ in 0..3 {
300            metrics.record_operation(CacheTier::L1, CacheOperation::Miss);
301        }
302
303        let hit_rate = metrics.l1_hit_rate();
304        assert!(
305            (hit_rate - 0.7).abs() < 0.01,
306            "Expected 70% hit rate, got {}",
307            hit_rate
308        );
309    }
310
311    #[test]
312    fn test_empty_metrics_hit_rate() {
313        let metrics = CacheMetrics::new();
314        assert_eq!(
315            metrics.l1_hit_rate(),
316            0.0,
317            "Empty metrics should have 0% hit rate"
318        );
319    }
320
321    #[test]
322    fn test_metrics_snapshot() {
323        let metrics = CacheMetrics::new();
324
325        metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
326        metrics.record_operation(CacheTier::L2, CacheOperation::Miss);
327
328        let snapshot = metrics.snapshot();
329        assert_eq!(snapshot.l1_hits, 1);
330        assert_eq!(snapshot.l2_misses, 1);
331    }
332
333    #[test]
334    fn test_overall_hit_rate() {
335        let metrics = CacheMetrics::new();
336
337        // 10 L1 requests: 6 hits, 4 misses
338        for _ in 0..6 {
339            metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
340        }
341        for _ in 0..4 {
342            metrics.record_operation(CacheTier::L1, CacheOperation::Miss);
343        }
344
345        // Of the 4 L1 misses, 2 hit L2, 2 miss L2
346        for _ in 0..2 {
347            metrics.record_operation(CacheTier::L2, CacheOperation::Hit);
348        }
349
350        // Overall: 8 hits (6 L1 + 2 L2) out of 10 requests = 80%
351        let overall = metrics.overall_hit_rate();
352        assert!(
353            (overall - 0.8).abs() < 0.01,
354            "Expected 80% overall hit rate, got {}",
355            overall
356        );
357    }
358}