1use crate::config::{CacheConfig, Config};
19use crate::models::{SearchQuery, SearchResponse};
20use serde::{Deserialize, Serialize};
21use std::fs;
22use std::path::{Path, PathBuf};
23use std::time::{Duration, SystemTime};
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27struct CacheMetadata {
28 cached_at: u64,
30
31 expires_at: u64,
33
34 source: String,
36
37 query: String,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43struct CachedSearchResponse {
44 metadata: CacheMetadata,
46
47 response: SearchResponse,
49}
50
51pub enum CacheResult<T> {
53 Hit(T),
55
56 Miss,
58
59 Expired,
61}
62
63#[derive(Debug, Clone)]
65pub struct CacheService {
66 base_dir: PathBuf,
68
69 search_dir: PathBuf,
71
72 citation_dir: PathBuf,
74
75 config: CacheConfig,
77}
78
79impl CacheService {
80 pub fn new() -> Self {
82 Self::from_config(Config::default().cache)
83 }
84
85 pub fn from_config(config: CacheConfig) -> Self {
87 let base_dir = config
88 .directory
89 .clone()
90 .unwrap_or_else(crate::config::default_cache_dir);
91
92 let search_dir = base_dir.join("searches");
93 let citation_dir = base_dir.join("citations");
94
95 Self {
96 base_dir,
97 search_dir,
98 citation_dir,
99 config,
100 }
101 }
102
103 pub fn initialize(&self) -> std::io::Result<()> {
105 if self.config.enabled {
106 fs::create_dir_all(&self.search_dir)?;
107 fs::create_dir_all(&self.citation_dir)?;
108 tracing::info!("Cache initialized at: {}", self.base_dir.display());
109 } else {
110 tracing::debug!("Cache is disabled");
111 }
112 Ok(())
113 }
114
115 pub fn is_enabled(&self) -> bool {
117 self.config.enabled
118 }
119
120 pub fn cache_dir(&self) -> &PathBuf {
122 &self.base_dir
123 }
124
125 fn search_cache_key(
127 &self,
128 query: &str,
129 source: &str,
130 max_results: usize,
131 year: Option<&str>,
132 author: Option<&str>,
133 category: Option<&str>,
134 ) -> String {
135 let input = format!(
136 "{}|{}|{}|{}|{}|{}",
137 query,
138 source,
139 max_results,
140 year.unwrap_or_default(),
141 author.unwrap_or_default(),
142 category.unwrap_or_default()
143 );
144
145 let digest = md5::compute(input.as_bytes());
146 format!("{:x}", digest)
147 }
148
149 fn citation_cache_key(&self, paper_id: &str, source: &str, max_results: usize) -> String {
151 let input = format!("{}|{}|{}", paper_id, source, max_results);
152 let digest = md5::compute(input.as_bytes());
153 format!("{:x}", digest)
154 }
155
156 fn is_expired(&self, expires_at: u64) -> bool {
158 let now = SystemTime::now()
159 .duration_since(SystemTime::UNIX_EPOCH)
160 .unwrap_or_default()
161 .as_secs();
162 now >= expires_at
163 }
164
165 pub fn get_search(&self, query: &SearchQuery, source: &str) -> CacheResult<SearchResponse> {
167 if !self.is_enabled() {
168 return CacheResult::Miss;
169 }
170
171 let key = self.search_cache_key(
172 &query.query,
173 source,
174 query.max_results,
175 query.year.as_deref(),
176 query.author.as_deref(),
177 query.category.as_deref(),
178 );
179
180 let cache_path = self.search_dir.join(&key);
181
182 match self.read_cache_file::<CachedSearchResponse>(&cache_path) {
183 Ok(cached) => {
184 if self.is_expired(cached.metadata.expires_at) {
185 tracing::debug!("Cache expired for search: {}", key);
186 CacheResult::Expired
187 } else {
188 tracing::debug!("Cache HIT for search: {}", key);
189 CacheResult::Hit(cached.response)
190 }
191 }
192 Err(_) => {
193 tracing::debug!("Cache MISS for search: {}", key);
194 CacheResult::Miss
195 }
196 }
197 }
198
199 pub fn set_search(&self, source: &str, query: &SearchQuery, response: &SearchResponse) {
201 if !self.is_enabled() {
202 return;
203 }
204
205 let key = self.search_cache_key(
206 &query.query,
207 source,
208 query.max_results,
209 query.year.as_deref(),
210 query.author.as_deref(),
211 query.category.as_deref(),
212 );
213 let cache_path = self.search_dir.join(&key);
214
215 let cached = CachedSearchResponse {
216 metadata: CacheMetadata {
217 cached_at: SystemTime::now()
218 .duration_since(SystemTime::UNIX_EPOCH)
219 .unwrap_or_default()
220 .as_secs(),
221 expires_at: SystemTime::now()
222 .duration_since(SystemTime::UNIX_EPOCH)
223 .unwrap_or_default()
224 .as_secs()
225 + self.config.search_ttl_seconds,
226 source: source.to_string(),
227 query: query.query.clone(),
228 },
229 response: response.clone(),
230 };
231
232 if let Err(e) = self.write_cache_file(&cache_path, &cached) {
233 tracing::warn!("Failed to cache search result: {}", e);
234 } else {
235 tracing::debug!("Cached search result: {}", key);
236 }
237 }
238
239 pub fn get_citations(
241 &self,
242 paper_id: &str,
243 source: &str,
244 max_results: usize,
245 ) -> CacheResult<SearchResponse> {
246 if !self.is_enabled() {
247 return CacheResult::Miss;
248 }
249
250 let key = self.citation_cache_key(paper_id, source, max_results);
251 let cache_path = self.citation_dir.join(&key);
252
253 match self.read_cache_file::<CachedSearchResponse>(&cache_path) {
254 Ok(cached) => {
255 if self.is_expired(cached.metadata.expires_at) {
256 tracing::debug!("Cache expired for citations: {}", key);
257 CacheResult::Expired
258 } else {
259 tracing::debug!("Cache HIT for citations: {}", key);
260 CacheResult::Hit(cached.response)
261 }
262 }
263 Err(_) => {
264 tracing::debug!("Cache MISS for citations: {}", key);
265 CacheResult::Miss
266 }
267 }
268 }
269
270 pub fn set_citations(&self, source: &str, paper_id: &str, response: &SearchResponse) {
272 if !self.is_enabled() {
273 return;
274 }
275
276 let key = self.citation_cache_key(paper_id, source, response.papers.len());
277 let cache_path = self.citation_dir.join(&key);
278
279 let cached = CachedSearchResponse {
280 metadata: CacheMetadata {
281 cached_at: SystemTime::now()
282 .duration_since(SystemTime::UNIX_EPOCH)
283 .unwrap_or_default()
284 .as_secs(),
285 expires_at: SystemTime::now()
286 .duration_since(SystemTime::UNIX_EPOCH)
287 .unwrap_or_default()
288 .as_secs()
289 + self.config.citation_ttl_seconds,
290 source: source.to_string(),
291 query: format!("citations for {}", paper_id),
292 },
293 response: response.clone(),
294 };
295
296 if let Err(e) = self.write_cache_file(&cache_path, &cached) {
297 tracing::warn!("Failed to cache citations: {}", e);
298 } else {
299 tracing::debug!("Cached citations: {}", key);
300 }
301 }
302
303 fn read_cache_file<T: for<'de> Deserialize<'de>>(
305 &self,
306 path: &Path,
307 ) -> Result<T, std::io::Error> {
308 let content = fs::read_to_string(path)?;
309 serde_json::from_str(&content)
310 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))
311 }
312
313 fn write_cache_file<T: Serialize>(&self, path: &Path, data: &T) -> Result<(), std::io::Error> {
315 let content = serde_json::to_string_pretty(data)?;
316 fs::write(path, content)
317 }
318
319 pub fn clear_all(&self) -> std::io::Result<()> {
321 if !self.is_enabled() {
322 return Ok(());
323 }
324
325 let _ = fs::remove_dir_all(&self.base_dir);
326 self.initialize()?;
327 tracing::info!("Cache cleared");
328 Ok(())
329 }
330
331 pub fn clear_searches(&self) -> std::io::Result<()> {
333 if !self.is_enabled() {
334 return Ok(());
335 }
336
337 let _ = fs::remove_dir_all(&self.search_dir);
338 fs::create_dir_all(&self.search_dir)?;
339 tracing::info!("Search cache cleared");
340 Ok(())
341 }
342
343 pub fn clear_citations(&self) -> std::io::Result<()> {
345 if !self.is_enabled() {
346 return Ok(());
347 }
348
349 let _ = fs::remove_dir_all(&self.citation_dir);
350 fs::create_dir_all(&self.citation_dir)?;
351 tracing::info!("Citation cache cleared");
352 Ok(())
353 }
354
355 pub fn stats(&self) -> CacheStats {
357 if !self.is_enabled() {
358 return CacheStats::disabled();
359 }
360
361 let search_count = self.search_dir.read_dir().map(|e| e.count()).unwrap_or(0);
362 let citation_count = self.citation_dir.read_dir().map(|e| e.count()).unwrap_or(0);
363
364 let search_size = self
365 .dir_size(&self.search_dir)
366 .map(|s| s / 1024)
367 .unwrap_or(0); let citation_size = self
369 .dir_size(&self.citation_dir)
370 .map(|s| s / 1024)
371 .unwrap_or(0); CacheStats {
374 enabled: true,
375 cache_dir: self.base_dir.clone(),
376 search_count,
377 citation_count,
378 search_size_kb: search_size,
379 citation_size_kb: citation_size,
380 total_size_kb: search_size + citation_size,
381 ttl_search: Duration::from_secs(self.config.search_ttl_seconds),
382 ttl_citations: Duration::from_secs(self.config.citation_ttl_seconds),
383 }
384 }
385
386 #[allow(clippy::only_used_in_recursion)]
388 fn dir_size(&self, path: &Path) -> Result<u64, std::io::Error> {
389 let mut size = 0;
390 if let Ok(entries) = path.read_dir() {
391 for entry in entries.flatten() {
392 size += if entry.path().is_dir() {
393 self.dir_size(&entry.path()).unwrap_or(0)
394 } else {
395 entry.metadata().map(|m| m.len()).unwrap_or(0)
396 };
397 }
398 }
399 Ok(size)
400 }
401}
402
403impl Default for CacheService {
404 fn default() -> Self {
405 Self::new()
406 }
407}
408
409#[derive(Debug, Clone)]
411pub struct CacheStats {
412 pub enabled: bool,
414
415 pub cache_dir: PathBuf,
417
418 pub search_count: usize,
420
421 pub citation_count: usize,
423
424 pub search_size_kb: u64,
426
427 pub citation_size_kb: u64,
429
430 pub total_size_kb: u64,
432
433 pub ttl_search: Duration,
435
436 pub ttl_citations: Duration,
438}
439
440impl CacheStats {
441 fn disabled() -> Self {
443 Self {
444 enabled: false,
445 cache_dir: PathBuf::new(),
446 search_count: 0,
447 citation_count: 0,
448 search_size_kb: 0,
449 citation_size_kb: 0,
450 total_size_kb: 0,
451 ttl_search: Duration::ZERO,
452 ttl_citations: Duration::ZERO,
453 }
454 }
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460 use tempfile::TempDir;
461
462 fn test_cache_config() -> CacheConfig {
463 CacheConfig {
464 enabled: true,
465 directory: None,
466 search_ttl_seconds: 60, citation_ttl_seconds: 30,
468 max_size_mb: 10,
469 }
470 }
471
472 #[tokio::test]
473 async fn test_cache_search() {
474 let temp_dir = TempDir::new().unwrap();
475 let mut config = test_cache_config();
476 config.directory = Some(temp_dir.path().to_path_buf());
477
478 let cache = CacheService::from_config(config);
479 cache.initialize().unwrap();
480
481 let response =
482 SearchResponse::new(vec![], "test_source".to_string(), "test query".to_string());
483
484 let query = SearchQuery::new("test query");
486
487 cache.set_search("test_source", &query, &response);
489
490 match cache.get_search(&query, "test_source") {
492 CacheResult::Hit(r) => {
493 assert_eq!(r.source, "test_source");
494 assert_eq!(r.query, "test query");
495 }
496 _ => panic!("Expected cache hit"),
497 }
498
499 let query2 = SearchQuery::new("different query");
501 match cache.get_search(&query2, "test_source") {
502 CacheResult::Miss => {}
503 _ => panic!("Expected cache miss for different query"),
504 }
505
506 cache.clear_all().unwrap();
507 }
508
509 #[tokio::test]
510 async fn test_cache_disabled() {
511 let temp_dir = TempDir::new().unwrap();
512 let config = CacheConfig {
513 enabled: false,
514 directory: Some(temp_dir.path().to_path_buf()),
515 ..test_cache_config()
516 };
517
518 let cache = CacheService::from_config(config);
519
520 let response =
521 SearchResponse::new(vec![], "test_source".to_string(), "test query".to_string());
522
523 let query = SearchQuery::new("test query");
524
525 cache.set_search("test_source", &query, &response);
527
528 match cache.get_search(&query, "test_source") {
529 CacheResult::Miss => {}
530 _ => panic!("Expected cache miss when disabled"),
531 }
532 }
533
534 #[tokio::test]
535 async fn test_cache_expiration() {
536 let temp_dir = TempDir::new().unwrap();
537 let config = CacheConfig {
538 enabled: true,
539 directory: Some(temp_dir.path().to_path_buf()),
540 search_ttl_seconds: 0, citation_ttl_seconds: 0,
542 max_size_mb: 10,
543 };
544
545 let cache = CacheService::from_config(config);
546 cache.initialize().unwrap();
547
548 let response =
549 SearchResponse::new(vec![], "test_source".to_string(), "test query".to_string());
550
551 let query = SearchQuery::new("test query");
552
553 cache.set_search("test_source", &query, &response);
554
555 match cache.get_search(&query, "test_source") {
556 CacheResult::Expired => {}
557 _ => panic!("Expected cache expired"),
558 }
559 }
560}