1use crate::metrics::{CacheMetrics, CacheOperation, CacheTier, LatencyTimer};
7use moka::future::Cache;
8use serde::{Deserialize, Serialize};
9use std::sync::Arc;
10use std::time::Duration;
11use tracing::{debug, info};
12
13#[derive(Debug, Clone)]
15pub struct L1Config {
16 pub max_capacity: u64,
18 pub ttl_seconds: u64,
20 pub tti_seconds: u64,
22}
23
24impl Default for L1Config {
25 fn default() -> Self {
26 Self {
27 max_capacity: 1000,
28 ttl_seconds: 300,
29 tti_seconds: 120,
30 }
31 }
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct CachedResponse {
37 pub content: String,
39 pub tokens: Option<TokenUsage>,
41 pub model: String,
43 pub cached_at: i64,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct TokenUsage {
49 pub prompt_tokens: u32,
50 pub completion_tokens: u32,
51 pub total_tokens: u32,
52}
53
54#[derive(Clone)]
56pub struct L1Cache {
57 cache: Cache<String, Arc<CachedResponse>>,
58 config: L1Config,
59 metrics: CacheMetrics,
60}
61
62impl L1Cache {
63 pub fn new(metrics: CacheMetrics) -> Self {
65 Self::with_config(L1Config::default(), metrics)
66 }
67
68 pub fn with_config(config: L1Config, metrics: CacheMetrics) -> Self {
70 info!(
71 "Initializing L1 cache: capacity={}, ttl={}s, tti={}s",
72 config.max_capacity, config.ttl_seconds, config.tti_seconds
73 );
74
75 let cache = Cache::builder()
76 .max_capacity(config.max_capacity)
77 .time_to_live(Duration::from_secs(config.ttl_seconds))
78 .time_to_idle(Duration::from_secs(config.tti_seconds))
79 .build();
80
81 Self {
82 cache,
83 config,
84 metrics,
85 }
86 }
87
88 pub async fn get(&self, key: &str) -> Option<Arc<CachedResponse>> {
93 let _timer = LatencyTimer::new(CacheTier::L1, self.metrics.clone());
94
95 let result = self.cache.get(key).await;
96
97 if result.is_some() {
98 debug!("L1 cache HIT: key={}", &key[..16.min(key.len())]);
99 self.metrics
100 .record_operation(CacheTier::L1, CacheOperation::Hit);
101 } else {
102 debug!("L1 cache MISS: key={}", &key[..16.min(key.len())]);
103 self.metrics
104 .record_operation(CacheTier::L1, CacheOperation::Miss);
105 }
106
107 result
108 }
109
110 pub async fn set(&self, key: String, value: CachedResponse) {
115 let _timer = LatencyTimer::new(CacheTier::L1, self.metrics.clone());
116
117 debug!("L1 cache WRITE: key={}", &key[..16.min(key.len())]);
118
119 self.cache.insert(key, Arc::new(value)).await;
120 self.metrics
121 .record_operation(CacheTier::L1, CacheOperation::Write);
122
123 let size = self.cache.entry_count();
125 self.metrics.update_cache_size(CacheTier::L1, size);
126 }
127
128 pub async fn remove(&self, key: &str) {
130 self.cache.invalidate(key).await;
131 self.metrics
132 .record_operation(CacheTier::L1, CacheOperation::Delete);
133 }
134
135 pub async fn clear(&self) {
137 info!("Clearing L1 cache");
138 self.cache.invalidate_all();
139 self.cache.run_pending_tasks().await;
140 self.metrics.update_cache_size(CacheTier::L1, 0);
141 }
142
143 pub fn entry_count(&self) -> u64 {
145 self.cache.entry_count()
146 }
147
148 pub fn config(&self) -> &L1Config {
150 &self.config
151 }
152
153 pub fn stats(&self) -> L1Stats {
155 L1Stats {
156 entry_count: self.cache.entry_count(),
157 max_capacity: self.config.max_capacity,
158 ttl_seconds: self.config.ttl_seconds,
159 }
160 }
161}
162
163#[derive(Debug, Clone)]
165pub struct L1Stats {
166 pub entry_count: u64,
167 pub max_capacity: u64,
168 pub ttl_seconds: u64,
169}
170
171impl L1Stats {
172 pub fn utilization(&self) -> f64 {
174 if self.max_capacity == 0 {
175 0.0
176 } else {
177 (self.entry_count as f64 / self.max_capacity as f64) * 100.0
178 }
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185 use chrono::Utc;
186
187 fn create_test_response(content: &str) -> CachedResponse {
188 CachedResponse {
189 content: content.to_string(),
190 tokens: Some(TokenUsage {
191 prompt_tokens: 10,
192 completion_tokens: 20,
193 total_tokens: 30,
194 }),
195 model: "gpt-4".to_string(),
196 cached_at: Utc::now().timestamp(),
197 }
198 }
199
200 #[tokio::test]
201 async fn test_l1_basic_get_set() {
202 let metrics = CacheMetrics::new();
203 let cache = L1Cache::new(metrics);
204
205 let key = "test_key".to_string();
206 let response = create_test_response("Hello, world!");
207
208 assert!(cache.get(&key).await.is_none());
210
211 cache.set(key.clone(), response.clone()).await;
213
214 let cached = cache.get(&key).await;
216 assert!(cached.is_some());
217 assert_eq!(cached.unwrap().content, "Hello, world!");
218 }
219
220 #[tokio::test]
221 async fn test_l1_eviction_by_capacity() {
222 let metrics = CacheMetrics::new();
223 let config = L1Config {
224 max_capacity: 2,
225 ttl_seconds: 300,
226 tti_seconds: 120,
227 };
228 let cache = L1Cache::with_config(config, metrics);
229
230 cache
232 .set("key1".to_string(), create_test_response("value1"))
233 .await;
234 cache
235 .set("key2".to_string(), create_test_response("value2"))
236 .await;
237 cache
238 .set("key3".to_string(), create_test_response("value3"))
239 .await;
240
241 tokio::time::sleep(Duration::from_millis(100)).await;
243
244 assert!(cache.entry_count() <= 2);
246 }
247
248 #[tokio::test]
249 async fn test_l1_remove() {
250 let metrics = CacheMetrics::new();
251 let cache = L1Cache::new(metrics);
252
253 let key = "test_key".to_string();
254 cache.set(key.clone(), create_test_response("test")).await;
255
256 assert!(cache.get(&key).await.is_some());
257
258 cache.remove(&key).await;
259
260 assert!(cache.get(&key).await.is_none());
261 }
262
263 #[tokio::test]
264 async fn test_l1_clear() {
265 let metrics = CacheMetrics::new();
266 let cache = L1Cache::new(metrics);
267
268 cache
269 .set("key1".to_string(), create_test_response("value1"))
270 .await;
271 cache
272 .set("key2".to_string(), create_test_response("value2"))
273 .await;
274
275 cache.cache.run_pending_tasks().await;
277
278 assert!(cache.entry_count() > 0);
279
280 cache.clear().await;
281
282 assert_eq!(cache.entry_count(), 0);
283 }
284
285 #[tokio::test]
286 async fn test_l1_stats() {
287 let metrics = CacheMetrics::new();
288 let config = L1Config {
289 max_capacity: 100,
290 ttl_seconds: 300,
291 tti_seconds: 120,
292 };
293 let cache = L1Cache::with_config(config, metrics);
294
295 cache
296 .set("key1".to_string(), create_test_response("value1"))
297 .await;
298
299 cache.cache.run_pending_tasks().await;
301
302 let stats = cache.stats();
303 assert_eq!(stats.entry_count, 1);
304 assert_eq!(stats.max_capacity, 100);
305 assert_eq!(stats.utilization(), 1.0);
306 }
307
308 #[tokio::test]
309 async fn test_l1_metrics_recording() {
310 let metrics = CacheMetrics::new();
311 let cache = L1Cache::new(metrics.clone());
312
313 let key = "test_key".to_string();
314
315 cache.get(&key).await;
317 assert_eq!(metrics.snapshot().l1_misses, 1);
318
319 cache.set(key.clone(), create_test_response("test")).await;
321 assert_eq!(metrics.snapshot().l1_writes, 1);
322
323 cache.get(&key).await;
325 assert_eq!(metrics.snapshot().l1_hits, 1);
326 }
327}