llm_edge_cache/
l1.rs

1//! L1 In-Memory Cache using Moka
2//!
3//! High-performance in-process cache with TinyLFU eviction policy.
4//! Target latency: <1ms for get/set operations.
5
6use crate::metrics::{CacheMetrics, CacheOperation, CacheTier, LatencyTimer};
7use moka::future::Cache;
8use serde::{Deserialize, Serialize};
9use std::sync::Arc;
10use std::time::Duration;
11use tracing::{debug, info};
12
13/// Configuration for L1 cache
14#[derive(Debug, Clone)]
15pub struct L1Config {
16    /// Maximum number of entries (default: 1000)
17    pub max_capacity: u64,
18    /// Time to live in seconds (default: 300 = 5 minutes)
19    pub ttl_seconds: u64,
20    /// Time to idle in seconds (default: 120 = 2 minutes)
21    pub tti_seconds: u64,
22}
23
24impl Default for L1Config {
25    fn default() -> Self {
26        Self {
27            max_capacity: 1000,
28            ttl_seconds: 300,
29            tti_seconds: 120,
30        }
31    }
32}
33
34/// Cached response data
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct CachedResponse {
37    /// The actual response content
38    pub content: String,
39    /// Token usage information
40    pub tokens: Option<TokenUsage>,
41    /// Model that generated the response
42    pub model: String,
43    /// When this entry was cached (Unix timestamp)
44    pub cached_at: i64,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct TokenUsage {
49    pub prompt_tokens: u32,
50    pub completion_tokens: u32,
51    pub total_tokens: u32,
52}
53
54/// L1 cache implementation using Moka
55#[derive(Clone)]
56pub struct L1Cache {
57    cache: Cache<String, Arc<CachedResponse>>,
58    config: L1Config,
59    metrics: CacheMetrics,
60}
61
62impl L1Cache {
63    /// Create a new L1 cache with default configuration
64    pub fn new(metrics: CacheMetrics) -> Self {
65        Self::with_config(L1Config::default(), metrics)
66    }
67
68    /// Create a new L1 cache with custom configuration
69    pub fn with_config(config: L1Config, metrics: CacheMetrics) -> Self {
70        info!(
71            "Initializing L1 cache: capacity={}, ttl={}s, tti={}s",
72            config.max_capacity, config.ttl_seconds, config.tti_seconds
73        );
74
75        let cache = Cache::builder()
76            .max_capacity(config.max_capacity)
77            .time_to_live(Duration::from_secs(config.ttl_seconds))
78            .time_to_idle(Duration::from_secs(config.tti_seconds))
79            .build();
80
81        Self {
82            cache,
83            config,
84            metrics,
85        }
86    }
87
88    /// Get a value from the cache
89    ///
90    /// # Performance
91    /// Target: <1ms (typically <100μs)
92    pub async fn get(&self, key: &str) -> Option<Arc<CachedResponse>> {
93        let _timer = LatencyTimer::new(CacheTier::L1, self.metrics.clone());
94
95        let result = self.cache.get(key).await;
96
97        if result.is_some() {
98            debug!("L1 cache HIT: key={}", &key[..16.min(key.len())]);
99            self.metrics
100                .record_operation(CacheTier::L1, CacheOperation::Hit);
101        } else {
102            debug!("L1 cache MISS: key={}", &key[..16.min(key.len())]);
103            self.metrics
104                .record_operation(CacheTier::L1, CacheOperation::Miss);
105        }
106
107        result
108    }
109
110    /// Set a value in the cache
111    ///
112    /// # Performance
113    /// Target: <1ms (non-blocking, async write)
114    pub async fn set(&self, key: String, value: CachedResponse) {
115        let _timer = LatencyTimer::new(CacheTier::L1, self.metrics.clone());
116
117        debug!("L1 cache WRITE: key={}", &key[..16.min(key.len())]);
118
119        self.cache.insert(key, Arc::new(value)).await;
120        self.metrics
121            .record_operation(CacheTier::L1, CacheOperation::Write);
122
123        // Update size metrics
124        let size = self.cache.entry_count();
125        self.metrics.update_cache_size(CacheTier::L1, size);
126    }
127
128    /// Remove a value from the cache
129    pub async fn remove(&self, key: &str) {
130        self.cache.invalidate(key).await;
131        self.metrics
132            .record_operation(CacheTier::L1, CacheOperation::Delete);
133    }
134
135    /// Clear all entries from the cache
136    pub async fn clear(&self) {
137        info!("Clearing L1 cache");
138        self.cache.invalidate_all();
139        self.cache.run_pending_tasks().await;
140        self.metrics.update_cache_size(CacheTier::L1, 0);
141    }
142
143    /// Get the current number of entries in the cache
144    pub fn entry_count(&self) -> u64 {
145        self.cache.entry_count()
146    }
147
148    /// Get the cache configuration
149    pub fn config(&self) -> &L1Config {
150        &self.config
151    }
152
153    /// Get cache statistics
154    pub fn stats(&self) -> L1Stats {
155        L1Stats {
156            entry_count: self.cache.entry_count(),
157            max_capacity: self.config.max_capacity,
158            ttl_seconds: self.config.ttl_seconds,
159        }
160    }
161}
162
163/// L1 cache statistics
164#[derive(Debug, Clone)]
165pub struct L1Stats {
166    pub entry_count: u64,
167    pub max_capacity: u64,
168    pub ttl_seconds: u64,
169}
170
171impl L1Stats {
172    /// Calculate the cache utilization percentage
173    pub fn utilization(&self) -> f64 {
174        if self.max_capacity == 0 {
175            0.0
176        } else {
177            (self.entry_count as f64 / self.max_capacity as f64) * 100.0
178        }
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use chrono::Utc;
186
187    fn create_test_response(content: &str) -> CachedResponse {
188        CachedResponse {
189            content: content.to_string(),
190            tokens: Some(TokenUsage {
191                prompt_tokens: 10,
192                completion_tokens: 20,
193                total_tokens: 30,
194            }),
195            model: "gpt-4".to_string(),
196            cached_at: Utc::now().timestamp(),
197        }
198    }
199
200    #[tokio::test]
201    async fn test_l1_basic_get_set() {
202        let metrics = CacheMetrics::new();
203        let cache = L1Cache::new(metrics);
204
205        let key = "test_key".to_string();
206        let response = create_test_response("Hello, world!");
207
208        // Should miss initially
209        assert!(cache.get(&key).await.is_none());
210
211        // Set value
212        cache.set(key.clone(), response.clone()).await;
213
214        // Should hit now
215        let cached = cache.get(&key).await;
216        assert!(cached.is_some());
217        assert_eq!(cached.unwrap().content, "Hello, world!");
218    }
219
220    #[tokio::test]
221    async fn test_l1_eviction_by_capacity() {
222        let metrics = CacheMetrics::new();
223        let config = L1Config {
224            max_capacity: 2,
225            ttl_seconds: 300,
226            tti_seconds: 120,
227        };
228        let cache = L1Cache::with_config(config, metrics);
229
230        // Insert 3 items into cache with capacity of 2
231        cache
232            .set("key1".to_string(), create_test_response("value1"))
233            .await;
234        cache
235            .set("key2".to_string(), create_test_response("value2"))
236            .await;
237        cache
238            .set("key3".to_string(), create_test_response("value3"))
239            .await;
240
241        // Allow Moka to process evictions
242        tokio::time::sleep(Duration::from_millis(100)).await;
243
244        // Should have at most 2 entries
245        assert!(cache.entry_count() <= 2);
246    }
247
248    #[tokio::test]
249    async fn test_l1_remove() {
250        let metrics = CacheMetrics::new();
251        let cache = L1Cache::new(metrics);
252
253        let key = "test_key".to_string();
254        cache.set(key.clone(), create_test_response("test")).await;
255
256        assert!(cache.get(&key).await.is_some());
257
258        cache.remove(&key).await;
259
260        assert!(cache.get(&key).await.is_none());
261    }
262
263    #[tokio::test]
264    async fn test_l1_clear() {
265        let metrics = CacheMetrics::new();
266        let cache = L1Cache::new(metrics);
267
268        cache
269            .set("key1".to_string(), create_test_response("value1"))
270            .await;
271        cache
272            .set("key2".to_string(), create_test_response("value2"))
273            .await;
274
275        // Force Moka to process pending operations
276        cache.cache.run_pending_tasks().await;
277
278        assert!(cache.entry_count() > 0);
279
280        cache.clear().await;
281
282        assert_eq!(cache.entry_count(), 0);
283    }
284
285    #[tokio::test]
286    async fn test_l1_stats() {
287        let metrics = CacheMetrics::new();
288        let config = L1Config {
289            max_capacity: 100,
290            ttl_seconds: 300,
291            tti_seconds: 120,
292        };
293        let cache = L1Cache::with_config(config, metrics);
294
295        cache
296            .set("key1".to_string(), create_test_response("value1"))
297            .await;
298
299        // Force Moka to process pending operations
300        cache.cache.run_pending_tasks().await;
301
302        let stats = cache.stats();
303        assert_eq!(stats.entry_count, 1);
304        assert_eq!(stats.max_capacity, 100);
305        assert_eq!(stats.utilization(), 1.0);
306    }
307
308    #[tokio::test]
309    async fn test_l1_metrics_recording() {
310        let metrics = CacheMetrics::new();
311        let cache = L1Cache::new(metrics.clone());
312
313        let key = "test_key".to_string();
314
315        // Miss
316        cache.get(&key).await;
317        assert_eq!(metrics.snapshot().l1_misses, 1);
318
319        // Write
320        cache.set(key.clone(), create_test_response("test")).await;
321        assert_eq!(metrics.snapshot().l1_writes, 1);
322
323        // Hit
324        cache.get(&key).await;
325        assert_eq!(metrics.snapshot().l1_hits, 1);
326    }
327}
llm_edge_cache/l1.rs

llm_edge_cache/
l1.rs