Skip to main content

pulith_fetch/cache/
file_cache.rs

1//! HTTP caching functionality.
2//!
3//! This module provides HTTP caching with ETag and Last-Modified
4//! support following RFC 7234.
5
6use crate::error::{Error, Result};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::path::PathBuf;
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11use tokio::fs;
12use tokio::sync::RwLock;
13
14/// Cache configuration.
15#[derive(Debug, Clone)]
16pub struct CacheConfig {
17    /// Cache directory
18    pub dir: PathBuf,
19    /// Maximum cache size in bytes
20    pub max_size: Option<u64>,
21    /// Maximum age for cache entries
22    pub max_age: Option<Duration>,
23    /// Whether to persist metadata to disk
24    pub persist_metadata: bool,
25}
26
27impl Default for CacheConfig {
28    fn default() -> Self {
29        Self {
30            dir: PathBuf::from(".cache/pulith-fetch"),
31            max_size: Some(1024 * 1024 * 1024), // 1GB
32            max_age: Some(Duration::from_secs(7 * 24 * 60 * 60)), // 7 days
33            persist_metadata: true,
34        }
35    }
36}
37
38/// Cache entry metadata.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct CacheEntry {
41    /// The URL that was cached
42    pub url: String,
43    /// ETag from the server
44    pub etag: Option<String>,
45    /// Last-Modified from the server
46    pub last_modified: Option<u64>, // Unix timestamp
47    /// When the entry was cached
48    pub cached_at: u64, // Unix timestamp
49    /// Size of the cached content
50    pub size: u64,
51    /// SHA256 checksum of the content
52    pub checksum: [u8; 32],
53    /// Number of times this entry was accessed
54    pub access_count: u64,
55    /// Last access time
56    pub last_accessed: u64, // Unix timestamp
57    /// Cache control max-age from server
58    pub max_age: Option<u64>, // seconds
59    /// Cache control no-cache directive
60    pub no_cache: bool,
61}
62
63impl CacheEntry {
64    /// Check if the entry is expired based on its age and max_age.
65    pub fn is_expired(&self, config_max_age: Option<Duration>) -> bool {
66        let now = SystemTime::now()
67            .duration_since(UNIX_EPOCH)
68            .unwrap()
69            .as_secs();
70
71        // Check server-provided max-age first
72        if let Some(server_max_age) = self.max_age
73            && self.cached_at + server_max_age < now
74        {
75            return true;
76        }
77
78        // Check configuration max-age
79        if let Some(config_max_age) = config_max_age
80            && self.cached_at + config_max_age.as_secs() < now
81        {
82            return true;
83        }
84
85        false
86    }
87
88    /// Check if the entry should be revalidated.
89    pub fn should_revalidate(&self) -> bool {
90        self.no_cache || self.etag.is_some() || self.last_modified.is_some()
91    }
92}
93
94/// HTTP cache implementation.
95pub struct Cache {
96    config: CacheConfig,
97    entries: RwLock<HashMap<String, CacheEntry>>,
98    current_size: RwLock<u64>,
99}
100
101impl Cache {
102    /// Create a new cache with the given configuration.
103    pub async fn new(config: CacheConfig) -> Result<Self> {
104        // Create cache directory if it doesn't exist
105        fs::create_dir_all(&config.dir)
106            .await
107            .map_err(|e| Error::Network(format!("Failed to create cache directory: {}", e)))?;
108
109        let cache = Self {
110            entries: RwLock::new(HashMap::new()),
111            current_size: RwLock::new(0),
112            config,
113        };
114
115        // Load metadata from disk if enabled
116        if cache.config.persist_metadata {
117            cache.load_metadata().await?;
118        }
119
120        Ok(cache)
121    }
122
123    /// Get a cached entry for the given URL.
124    pub async fn get(&self, url: &str) -> Result<Option<CacheEntry>> {
125        let entries = self.entries.read().await;
126
127        if let Some(entry) = entries.get(url) {
128            // Check if expired
129            if entry.is_expired(self.config.max_age) {
130                return Ok(None);
131            }
132
133            // Update access count and last accessed time
134            drop(entries);
135            self.update_access(url).await;
136
137            let entries = self.entries.read().await;
138            Ok(entries.get(url).cloned())
139        } else {
140            Ok(None)
141        }
142    }
143
144    /// Store content in the cache.
145    pub async fn put(
146        &self,
147        url: String,
148        content: &[u8],
149        etag: Option<String>,
150        last_modified: Option<u64>,
151        max_age: Option<u64>,
152        no_cache: bool,
153    ) -> Result<()> {
154        // Calculate checksum
155        use sha2::{Digest, Sha256};
156        let mut hasher = Sha256::new();
157        hasher.update(content);
158        let checksum = hasher.finalize().into();
159
160        let now = SystemTime::now()
161            .duration_since(UNIX_EPOCH)
162            .unwrap()
163            .as_secs();
164
165        let entry = CacheEntry {
166            url: url.clone(),
167            etag,
168            last_modified,
169            cached_at: now,
170            size: content.len() as u64,
171            checksum,
172            access_count: 1,
173            last_accessed: now,
174            max_age,
175            no_cache,
176        };
177
178        // Check if we need to evict entries
179        if let Some(max_size) = self.config.max_size {
180            let current_size = *self.current_size.read().await;
181            if current_size + entry.size > max_size {
182                self.evict_lru(entry.size).await?;
183            }
184        }
185
186        // Write content to file
187        let cache_file = self.cache_file_path(&url);
188        fs::write(&cache_file, content)
189            .await
190            .map_err(|e| Error::Network(format!("Failed to write cache file: {}", e)))?;
191
192        // Update metadata
193        {
194            let mut entries = self.entries.write().await;
195            let mut current_size = self.current_size.write().await;
196
197            // Remove old entry if exists
198            if let Some(old_entry) = entries.remove(&url) {
199                *current_size = current_size.saturating_sub(old_entry.size);
200            }
201
202            entries.insert(url.clone(), entry.clone());
203            *current_size += entry.size;
204        }
205
206        // Persist metadata if enabled
207        if self.config.persist_metadata {
208            self.save_metadata().await?;
209        }
210
211        Ok(())
212    }
213
214    /// Validate cached entry against server metadata.
215    pub async fn validate(
216        &self,
217        url: &str,
218        server_etag: Option<&str>,
219        server_last_modified: Option<u64>,
220    ) -> Result<bool> {
221        let entries = self.entries.read().await;
222
223        if let Some(entry) = entries.get(url) {
224            // Check ETag
225            if let (Some(cached_etag), Some(server_etag)) = (&entry.etag, server_etag)
226                && cached_etag == server_etag
227            {
228                return Ok(true);
229            }
230
231            // Check Last-Modified
232            if let (Some(cached_modified), Some(server_modified)) =
233                (entry.last_modified, server_last_modified)
234                && cached_modified >= server_modified
235            {
236                return Ok(true);
237            }
238        }
239
240        Ok(false)
241    }
242
243    /// Get the cache file path for a URL.
244    fn cache_file_path(&self, url: &str) -> PathBuf {
245        use sha2::{Digest, Sha256};
246        let mut hasher = Sha256::new();
247        hasher.update(url.as_bytes());
248        let hash = hex::encode(hasher.finalize());
249        self.config.dir.join(format!("{}.cache", hash))
250    }
251
252    /// Update access statistics for an entry.
253    async fn update_access(&self, url: &str) {
254        let mut entries = self.entries.write().await;
255        if let Some(entry) = entries.get_mut(url) {
256            entry.access_count += 1;
257            entry.last_accessed = SystemTime::now()
258                .duration_since(UNIX_EPOCH)
259                .unwrap()
260                .as_secs();
261        }
262    }
263
264    /// Evict least recently used entries to make space.
265    async fn evict_lru(&self, needed_space: u64) -> Result<()> {
266        let mut entries = self.entries.write().await;
267        let mut current_size = self.current_size.write().await;
268
269        // Collect entries sorted by last accessed time
270        let mut sorted_entries: Vec<_> = entries.iter().collect();
271        sorted_entries.sort_by_key(|(_, entry)| entry.last_accessed);
272
273        let mut freed_space = 0u64;
274        let mut to_remove = Vec::new();
275
276        for (url, entry) in sorted_entries {
277            if freed_space >= needed_space {
278                break;
279            }
280
281            to_remove.push(url.clone());
282            freed_space += entry.size;
283        }
284
285        // Remove entries and delete files
286        for url in to_remove {
287            if let Some(entry) = entries.remove(&url) {
288                *current_size = current_size.saturating_sub(entry.size);
289
290                // Delete cache file
291                let cache_file = self.cache_file_path(&url);
292                let _ = fs::remove_file(cache_file).await;
293            }
294        }
295
296        Ok(())
297    }
298
299    /// Load metadata from disk.
300    async fn load_metadata(&self) -> Result<()> {
301        let metadata_file = self.config.dir.join("metadata.json");
302
303        if !metadata_file.exists() {
304            return Ok(());
305        }
306
307        let content = fs::read_to_string(&metadata_file)
308            .await
309            .map_err(|e| Error::Network(format!("Failed to read metadata file: {}", e)))?;
310
311        let loaded_entries: HashMap<String, CacheEntry> = serde_json::from_str(&content)
312            .map_err(|e| Error::InvalidState(format!("Invalid metadata format: {}", e)))?;
313
314        // Calculate current size
315        let mut total_size = 0u64;
316        for entry in loaded_entries.values() {
317            total_size += entry.size;
318        }
319
320        *self.entries.write().await = loaded_entries;
321        *self.current_size.write().await = total_size;
322
323        Ok(())
324    }
325
326    /// Save metadata to disk.
327    async fn save_metadata(&self) -> Result<()> {
328        let metadata_file = self.config.dir.join("metadata.json");
329
330        let entries = self.entries.read().await;
331        let content = serde_json::to_string_pretty(&*entries)
332            .map_err(|e| Error::InvalidState(format!("Failed to serialize metadata: {}", e)))?;
333
334        fs::write(&metadata_file, content)
335            .await
336            .map_err(|e| Error::Network(format!("Failed to write metadata file: {}", e)))?;
337
338        Ok(())
339    }
340
341    /// Clear all cached entries.
342    pub async fn clear(&self) -> Result<()> {
343        let entries = self.entries.read().await;
344
345        // Delete all cache files
346        for url in entries.keys() {
347            let cache_file = self.cache_file_path(url);
348            let _ = fs::remove_file(cache_file).await;
349        }
350
351        // Clear in-memory state
352        drop(entries);
353        self.entries.write().await.clear();
354        *self.current_size.write().await = 0;
355
356        // Delete metadata file
357        let metadata_file = self.config.dir.join("metadata.json");
358        let _ = fs::remove_file(metadata_file).await;
359
360        Ok(())
361    }
362
363    /// Get cache statistics.
364    pub async fn stats(&self) -> CacheStats {
365        let entries = self.entries.read().await;
366        let current_size = *self.current_size.read().await;
367
368        CacheStats {
369            entry_count: entries.len(),
370            total_size: current_size,
371            max_size: self.config.max_size,
372            hit_count: 0,  // TODO: Implement hit tracking
373            miss_count: 0, // TODO: Implement miss tracking
374        }
375    }
376}
377
378/// Cache statistics.
379#[derive(Debug, Clone)]
380pub struct CacheStats {
381    /// Number of entries in the cache
382    pub entry_count: usize,
383    /// Total size of cached content
384    pub total_size: u64,
385    /// Maximum cache size
386    pub max_size: Option<u64>,
387    /// Number of cache hits
388    pub hit_count: u64,
389    /// Number of cache misses
390    pub miss_count: u64,
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396    use tempfile::TempDir;
397
398    async fn create_test_cache() -> (Cache, TempDir) {
399        let temp_dir = TempDir::new().unwrap();
400        let config = CacheConfig {
401            dir: temp_dir.path().to_path_buf(),
402            max_size: Some(1024),
403            max_age: Some(Duration::from_secs(3600)),
404            persist_metadata: true,
405        };
406        (Cache::new(config).await.unwrap(), temp_dir)
407    }
408
409    #[tokio::test]
410    async fn test_cache_put_and_get() {
411        let (cache, _temp_dir) = create_test_cache().await;
412
413        let url = "https://example.com/test.txt";
414        let content = b"Hello, World!";
415
416        // Put content in cache
417        cache
418            .put(
419                url.to_string(),
420                content,
421                Some("\"etag123\"".to_string()),
422                Some(1234567890),
423                Some(3600),
424                false,
425            )
426            .await
427            .unwrap();
428
429        // Get content from cache
430        let entry = cache.get(url).await.unwrap().unwrap();
431        assert_eq!(entry.url, url);
432        assert_eq!(entry.etag, Some("\"etag123\"".to_string()));
433        assert_eq!(entry.last_modified, Some(1234567890));
434        assert_eq!(entry.size, content.len() as u64);
435    }
436
437    #[tokio::test]
438    async fn test_cache_expiration() {
439        let (cache, _temp_dir) = create_test_cache().await;
440
441        let url = "https://example.com/test.txt";
442        let content = b"Hello, World!";
443
444        // Put content with short max age
445        cache
446            .put(
447                url.to_string(),
448                content,
449                None,
450                None,
451                Some(1), // 1 second
452                false,
453            )
454            .await
455            .unwrap();
456
457        // Should be valid immediately
458        assert!(cache.get(url).await.unwrap().is_some());
459
460        // TODO: Mock time to test expiration
461    }
462
463    #[tokio::test]
464    async fn test_cache_validation() {
465        let (cache, _temp_dir) = create_test_cache().await;
466
467        let url = "https://example.com/test.txt";
468        let content = b"Hello, World!";
469
470        // Put content in cache
471        cache
472            .put(
473                url.to_string(),
474                content,
475                Some("\"etag123\"".to_string()),
476                Some(1234567890),
477                None,
478                false,
479            )
480            .await
481            .unwrap();
482
483        // Validate with matching ETag
484        assert!(
485            cache
486                .validate(url, Some("\"etag123\""), None)
487                .await
488                .unwrap()
489        );
490
491        // Validate with non-matching ETag
492        assert!(
493            !cache
494                .validate(url, Some("\"etag456\""), None)
495                .await
496                .unwrap()
497        );
498
499        // Validate with matching Last-Modified
500        assert!(cache.validate(url, None, Some(1234567890)).await.unwrap());
501
502        // Validate with newer Last-Modified
503        assert!(!cache.validate(url, None, Some(1234567891)).await.unwrap());
504    }
505
506    #[tokio::test]
507    async fn test_cache_eviction() {
508        let (cache, _temp_dir) = create_test_cache().await;
509
510        // Fill cache beyond max size
511        for i in 0..5 {
512            let url = format!("https://example.com/test{}.txt", i);
513            let content = vec![b'x'; 300]; // 300 bytes each
514            cache
515                .put(url, &content, None, None, None, false)
516                .await
517                .unwrap();
518        }
519
520        // Check that only 3 entries fit (1024 / 300 ≈ 3)
521        let stats = cache.stats().await;
522        assert!(stats.entry_count <= 3);
523    }
524
525    #[tokio::test]
526    async fn test_cache_clear() {
527        let (cache, _temp_dir) = create_test_cache().await;
528
529        // Add some entries
530        cache
531            .put(
532                "https://example.com/test1.txt".to_string(),
533                b"content1",
534                None,
535                None,
536                None,
537                false,
538            )
539            .await
540            .unwrap();
541
542        cache
543            .put(
544                "https://example.com/test2.txt".to_string(),
545                b"content2",
546                None,
547                None,
548                None,
549                false,
550            )
551            .await
552            .unwrap();
553
554        // Clear cache
555        cache.clear().await.unwrap();
556
557        // Check that cache is empty
558        let stats = cache.stats().await;
559        assert_eq!(stats.entry_count, 0);
560        assert_eq!(stats.total_size, 0);
561    }
562
563    #[tokio::test]
564    async fn test_metadata_persistence() {
565        let temp_dir = TempDir::new().unwrap();
566        let config = CacheConfig {
567            dir: temp_dir.path().to_path_buf(),
568            max_size: Some(1024),
569            max_age: None,
570            persist_metadata: true,
571        };
572
573        // Create cache and add entry
574        let cache1 = Cache::new(config.clone()).await.unwrap();
575        cache1
576            .put(
577                "https://example.com/test.txt".to_string(),
578                b"Hello, World!",
579                Some("\"etag123\"".to_string()),
580                None,
581                None,
582                false,
583            )
584            .await
585            .unwrap();
586        drop(cache1);
587
588        // Create new cache instance and check that entry is loaded
589        let cache2 = Cache::new(config).await.unwrap();
590        let entry = cache2
591            .get("https://example.com/test.txt")
592            .await
593            .unwrap()
594            .unwrap();
595        assert_eq!(entry.etag, Some("\"etag123\"".to_string()));
596    }
597}