1use metrics::{counter, gauge, histogram};
7use std::sync::atomic::{AtomicU64, Ordering};
8use std::sync::Arc;
9use std::time::{Duration, Instant};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CacheTier {
14 L1,
15 L2,
16 L3,
17}
18
19impl CacheTier {
20 pub fn as_str(&self) -> &'static str {
21 match self {
22 CacheTier::L1 => "l1",
23 CacheTier::L2 => "l2",
24 CacheTier::L3 => "l3",
25 }
26 }
27}
28
29#[derive(Debug, Clone, Copy)]
31pub enum CacheOperation {
32 Hit,
33 Miss,
34 Write,
35 Delete,
36}
37
38#[derive(Debug, Clone)]
40pub struct CacheMetrics {
41 l1_hits: Arc<AtomicU64>,
43 l1_misses: Arc<AtomicU64>,
44 l1_writes: Arc<AtomicU64>,
45
46 l2_hits: Arc<AtomicU64>,
48 l2_misses: Arc<AtomicU64>,
49 l2_writes: Arc<AtomicU64>,
50
51 total_requests: Arc<AtomicU64>,
53}
54
55impl CacheMetrics {
56 pub fn new() -> Self {
58 Self {
59 l1_hits: Arc::new(AtomicU64::new(0)),
60 l1_misses: Arc::new(AtomicU64::new(0)),
61 l1_writes: Arc::new(AtomicU64::new(0)),
62 l2_hits: Arc::new(AtomicU64::new(0)),
63 l2_misses: Arc::new(AtomicU64::new(0)),
64 l2_writes: Arc::new(AtomicU64::new(0)),
65 total_requests: Arc::new(AtomicU64::new(0)),
66 }
67 }
68
69 pub fn record_operation(&self, tier: CacheTier, operation: CacheOperation) {
71 match (tier, operation) {
72 (CacheTier::L1, CacheOperation::Hit) => {
73 self.l1_hits.fetch_add(1, Ordering::Relaxed);
74 counter!("llm_edge_cache_hits_total", "tier" => "l1").increment(1);
75 }
76 (CacheTier::L1, CacheOperation::Miss) => {
77 self.l1_misses.fetch_add(1, Ordering::Relaxed);
78 counter!("llm_edge_cache_misses_total", "tier" => "l1").increment(1);
79 }
80 (CacheTier::L1, CacheOperation::Write) => {
81 self.l1_writes.fetch_add(1, Ordering::Relaxed);
82 counter!("llm_edge_cache_writes_total", "tier" => "l1").increment(1);
83 }
84 (CacheTier::L2, CacheOperation::Hit) => {
85 self.l2_hits.fetch_add(1, Ordering::Relaxed);
86 counter!("llm_edge_cache_hits_total", "tier" => "l2").increment(1);
87 }
88 (CacheTier::L2, CacheOperation::Miss) => {
89 self.l2_misses.fetch_add(1, Ordering::Relaxed);
90 counter!("llm_edge_cache_misses_total", "tier" => "l2").increment(1);
91 }
92 (CacheTier::L2, CacheOperation::Write) => {
93 self.l2_writes.fetch_add(1, Ordering::Relaxed);
94 counter!("llm_edge_cache_writes_total", "tier" => "l2").increment(1);
95 }
96 (CacheTier::L3, CacheOperation::Hit) => {
97 counter!("llm_edge_cache_hits_total", "tier" => "l3").increment(1);
98 }
99 (CacheTier::L3, CacheOperation::Miss) => {
100 counter!("llm_edge_cache_misses_total", "tier" => "l3").increment(1);
101 }
102 (CacheTier::L3, CacheOperation::Write) => {
103 counter!("llm_edge_cache_writes_total", "tier" => "l3").increment(1);
104 }
105 _ => {}
106 }
107 }
108
109 pub fn record_latency(&self, tier: CacheTier, duration: Duration) {
111 let latency_ms = duration.as_secs_f64() * 1000.0;
112 histogram!(
113 "llm_edge_cache_latency_ms",
114 "tier" => tier.as_str()
115 )
116 .record(latency_ms);
117 }
118
119 pub fn record_request(&self) {
121 self.total_requests.fetch_add(1, Ordering::Relaxed);
122 counter!("llm_edge_requests_total").increment(1);
123 }
124
125 pub fn update_cache_size(&self, tier: CacheTier, size: u64) {
127 gauge!(
128 "llm_edge_cache_size_entries",
129 "tier" => tier.as_str()
130 )
131 .set(size as f64);
132 }
133
134 pub fn update_cache_memory(&self, tier: CacheTier, bytes: u64) {
136 gauge!(
137 "llm_edge_cache_memory_bytes",
138 "tier" => tier.as_str()
139 )
140 .set(bytes as f64);
141 }
142
143 pub fn l1_hit_rate(&self) -> f64 {
145 let hits = self.l1_hits.load(Ordering::Relaxed);
146 let misses = self.l1_misses.load(Ordering::Relaxed);
147 let total = hits + misses;
148
149 if total == 0 {
150 0.0
151 } else {
152 (hits as f64) / (total as f64)
153 }
154 }
155
156 pub fn l2_hit_rate(&self) -> f64 {
158 let hits = self.l2_hits.load(Ordering::Relaxed);
159 let misses = self.l2_misses.load(Ordering::Relaxed);
160 let total = hits + misses;
161
162 if total == 0 {
163 0.0
164 } else {
165 (hits as f64) / (total as f64)
166 }
167 }
168
169 pub fn overall_hit_rate(&self) -> f64 {
171 let l1_hits = self.l1_hits.load(Ordering::Relaxed);
172 let l2_hits = self.l2_hits.load(Ordering::Relaxed);
173 let l1_misses = self.l1_misses.load(Ordering::Relaxed);
174
175 let total_hits = l1_hits + l2_hits;
176 let total_requests = l1_hits + l1_misses; if total_requests == 0 {
179 0.0
180 } else {
181 (total_hits as f64) / (total_requests as f64)
182 }
183 }
184
185 pub fn total_requests(&self) -> u64 {
187 self.total_requests.load(Ordering::Relaxed)
188 }
189
190 pub fn snapshot(&self) -> MetricsSnapshot {
192 MetricsSnapshot {
193 l1_hits: self.l1_hits.load(Ordering::Relaxed),
194 l1_misses: self.l1_misses.load(Ordering::Relaxed),
195 l1_writes: self.l1_writes.load(Ordering::Relaxed),
196 l2_hits: self.l2_hits.load(Ordering::Relaxed),
197 l2_misses: self.l2_misses.load(Ordering::Relaxed),
198 l2_writes: self.l2_writes.load(Ordering::Relaxed),
199 total_requests: self.total_requests.load(Ordering::Relaxed),
200 }
201 }
202}
203
204impl Default for CacheMetrics {
205 fn default() -> Self {
206 Self::new()
207 }
208}
209
210#[derive(Debug, Clone, Copy)]
212pub struct MetricsSnapshot {
213 pub l1_hits: u64,
214 pub l1_misses: u64,
215 pub l1_writes: u64,
216 pub l2_hits: u64,
217 pub l2_misses: u64,
218 pub l2_writes: u64,
219 pub total_requests: u64,
220}
221
222impl MetricsSnapshot {
223 pub fn l1_hit_rate(&self) -> f64 {
224 let total = self.l1_hits + self.l1_misses;
225 if total == 0 {
226 0.0
227 } else {
228 (self.l1_hits as f64) / (total as f64)
229 }
230 }
231
232 pub fn l2_hit_rate(&self) -> f64 {
233 let total = self.l2_hits + self.l2_misses;
234 if total == 0 {
235 0.0
236 } else {
237 (self.l2_hits as f64) / (total as f64)
238 }
239 }
240
241 pub fn overall_hit_rate(&self) -> f64 {
242 let total_hits = self.l1_hits + self.l2_hits;
243 let total_requests = self.l1_hits + self.l1_misses;
244
245 if total_requests == 0 {
246 0.0
247 } else {
248 (total_hits as f64) / (total_requests as f64)
249 }
250 }
251}
252
253pub struct LatencyTimer {
255 start: Instant,
256 tier: CacheTier,
257 metrics: CacheMetrics,
258}
259
260impl LatencyTimer {
261 pub fn new(tier: CacheTier, metrics: CacheMetrics) -> Self {
262 Self {
263 start: Instant::now(),
264 tier,
265 metrics,
266 }
267 }
268
269 pub fn finish(self) {
270 let duration = self.start.elapsed();
271 self.metrics.record_latency(self.tier, duration);
272 }
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 #[test]
280 fn test_metrics_recording() {
281 let metrics = CacheMetrics::new();
282
283 metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
284 metrics.record_operation(CacheTier::L1, CacheOperation::Miss);
285 metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
286
287 assert_eq!(metrics.l1_hits.load(Ordering::Relaxed), 2);
288 assert_eq!(metrics.l1_misses.load(Ordering::Relaxed), 1);
289 }
290
291 #[test]
292 fn test_hit_rate_calculation() {
293 let metrics = CacheMetrics::new();
294
295 for _ in 0..7 {
297 metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
298 }
299 for _ in 0..3 {
300 metrics.record_operation(CacheTier::L1, CacheOperation::Miss);
301 }
302
303 let hit_rate = metrics.l1_hit_rate();
304 assert!(
305 (hit_rate - 0.7).abs() < 0.01,
306 "Expected 70% hit rate, got {}",
307 hit_rate
308 );
309 }
310
311 #[test]
312 fn test_empty_metrics_hit_rate() {
313 let metrics = CacheMetrics::new();
314 assert_eq!(
315 metrics.l1_hit_rate(),
316 0.0,
317 "Empty metrics should have 0% hit rate"
318 );
319 }
320
321 #[test]
322 fn test_metrics_snapshot() {
323 let metrics = CacheMetrics::new();
324
325 metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
326 metrics.record_operation(CacheTier::L2, CacheOperation::Miss);
327
328 let snapshot = metrics.snapshot();
329 assert_eq!(snapshot.l1_hits, 1);
330 assert_eq!(snapshot.l2_misses, 1);
331 }
332
333 #[test]
334 fn test_overall_hit_rate() {
335 let metrics = CacheMetrics::new();
336
337 for _ in 0..6 {
339 metrics.record_operation(CacheTier::L1, CacheOperation::Hit);
340 }
341 for _ in 0..4 {
342 metrics.record_operation(CacheTier::L1, CacheOperation::Miss);
343 }
344
345 for _ in 0..2 {
347 metrics.record_operation(CacheTier::L2, CacheOperation::Hit);
348 }
349
350 let overall = metrics.overall_hit_rate();
352 assert!(
353 (overall - 0.8).abs() < 0.01,
354 "Expected 80% overall hit rate, got {}",
355 overall
356 );
357 }
358}