deps_core/
cache.rs

1use crate::error::{DepsError, Result};
2use dashmap::DashMap;
3use reqwest::{Client, StatusCode, header};
4use std::sync::Arc;
5use std::time::Instant;
6
7/// Maximum number of cached entries to prevent unbounded memory growth.
8const MAX_CACHE_ENTRIES: usize = 1000;
9
10/// Validates that a URL uses HTTPS protocol.
11///
12/// Returns an error if the URL doesn't start with "https://".
13/// This ensures all network requests are encrypted.
14///
15/// In test mode, HTTP URLs are allowed for mockito compatibility.
16#[inline]
17fn ensure_https(url: &str) -> Result<()> {
18    #[cfg(not(test))]
19    if !url.starts_with("https://") {
20        return Err(DepsError::CacheError(format!(
21            "URL must use HTTPS: {}",
22            url
23        )));
24    }
25    #[cfg(test)]
26    let _ = url; // Silence unused warning in tests
27    Ok(())
28}
29
30/// Cached HTTP response with validation headers.
31///
32/// Stores response body and cache validation headers (ETag, Last-Modified)
33/// for efficient conditional requests. The body is wrapped in `Arc` for
34/// zero-cost cloning across multiple consumers.
35///
36/// # Examples
37///
38/// ```
39/// use deps_core::cache::CachedResponse;
40/// use std::sync::Arc;
41/// use std::time::Instant;
42///
43/// let response = CachedResponse {
44///     body: Arc::new(b"response data".to_vec()),
45///     etag: Some("\"abc123\"".into()),
46///     last_modified: None,
47///     fetched_at: Instant::now(),
48/// };
49///
50/// // Clone is cheap - only increments Arc reference count
51/// let cloned = response.clone();
52/// assert!(Arc::ptr_eq(&response.body, &cloned.body));
53/// ```
54#[derive(Debug, Clone)]
55pub struct CachedResponse {
56    pub body: Arc<Vec<u8>>,
57    pub etag: Option<String>,
58    pub last_modified: Option<String>,
59    pub fetched_at: Instant,
60}
61
62/// HTTP cache with ETag and Last-Modified validation.
63///
64/// Implements RFC 7232 conditional requests to minimize network traffic.
65/// All responses are cached with their validation headers, and subsequent
66/// requests use `If-None-Match` (ETag) or `If-Modified-Since` headers
67/// to check for updates.
68///
69/// The cache uses `Arc<Vec<u8>>` for response bodies, enabling efficient
70/// sharing of cached data across multiple consumers without copying.
71///
72/// # Examples
73///
74/// ```no_run
75/// use deps_core::cache::HttpCache;
76///
77/// # async fn example() -> deps_core::error::Result<()> {
78/// let cache = HttpCache::new();
79///
80/// // First request - fetches from network
81/// let data1 = cache.get_cached("https://index.crates.io/se/rd/serde").await?;
82///
83/// // Second request - uses conditional GET (304 Not Modified if unchanged)
84/// let data2 = cache.get_cached("https://index.crates.io/se/rd/serde").await?;
85///
86/// // Both share the same underlying buffer
87/// assert!(std::sync::Arc::ptr_eq(&data1, &data2));
88/// # Ok(())
89/// # }
90/// ```
91pub struct HttpCache {
92    entries: DashMap<String, CachedResponse>,
93    client: Client,
94}
95
96impl HttpCache {
97    /// Creates a new HTTP cache with default configuration.
98    ///
99    /// The cache uses a 30-second timeout for all requests and identifies
100    /// itself with a `deps-lsp/0.1.0` user agent.
101    pub fn new() -> Self {
102        let client = Client::builder()
103            .user_agent("deps-lsp/0.1.0")
104            .timeout(std::time::Duration::from_secs(30))
105            .build()
106            .expect("failed to create HTTP client");
107
108        Self {
109            entries: DashMap::new(),
110            client,
111        }
112    }
113
114    /// Retrieves data from URL with intelligent caching.
115    ///
116    /// On first request, fetches data from the network and caches it.
117    /// On subsequent requests, performs a conditional GET request using
118    /// cached ETag or Last-Modified headers. If the server responds with
119    /// 304 Not Modified, returns the cached data. Otherwise, fetches and
120    /// caches the new data.
121    ///
122    /// If the conditional request fails due to network errors, falls back
123    /// to the cached data (stale-while-revalidate pattern).
124    ///
125    /// # Returns
126    ///
127    /// Returns `Arc<Vec<u8>>` containing the response body. Multiple calls
128    /// for the same URL return Arc clones pointing to the same buffer,
129    /// avoiding unnecessary memory allocations.
130    ///
131    /// # Errors
132    ///
133    /// Returns `DepsError::RegistryError` if the initial fetch fails or
134    /// if no cached data exists and the network is unavailable.
135    ///
136    /// # Examples
137    ///
138    /// ```no_run
139    /// # use deps_core::cache::HttpCache;
140    /// # async fn example() -> deps_core::error::Result<()> {
141    /// let cache = HttpCache::new();
142    /// let data = cache.get_cached("https://example.com/api/data").await?;
143    /// println!("Fetched {} bytes", data.len());
144    /// # Ok(())
145    /// # }
146    /// ```
147    pub async fn get_cached(&self, url: &str) -> Result<Arc<Vec<u8>>> {
148        // Evict old entries if cache is at capacity
149        if self.entries.len() >= MAX_CACHE_ENTRIES {
150            self.evict_entries();
151        }
152
153        if let Some(cached) = self.entries.get(url) {
154            // Attempt conditional request with cached headers
155            match self.conditional_request(url, &cached).await {
156                Ok(Some(new_body)) => {
157                    // 200 OK - content changed, cache updated internally
158                    return Ok(new_body);
159                }
160                Ok(None) => {
161                    // 304 Not Modified - use cached body (cheap Arc clone)
162                    return Ok(Arc::clone(&cached.body));
163                }
164                Err(e) => {
165                    // Network error - fall back to cached body if available
166                    tracing::warn!("conditional request failed, using cache: {}", e);
167                    return Ok(Arc::clone(&cached.body));
168                }
169            }
170        }
171
172        // No cache entry - fetch fresh
173        self.fetch_and_store(url).await
174    }
175
176    /// Performs conditional HTTP request using cached validation headers.
177    ///
178    /// Sends `If-None-Match` (ETag) and/or `If-Modified-Since` headers
179    /// to check if the cached content is still valid.
180    ///
181    /// # Returns
182    ///
183    /// - `Ok(Some(Arc<Vec<u8>>))` - Server returned 200 OK with new content
184    /// - `Ok(None)` - Server returned 304 Not Modified (cache is valid)
185    /// - `Err(_)` - Network or HTTP error occurred
186    async fn conditional_request(
187        &self,
188        url: &str,
189        cached: &CachedResponse,
190    ) -> Result<Option<Arc<Vec<u8>>>> {
191        ensure_https(url)?;
192        let mut request = self.client.get(url);
193
194        if let Some(etag) = &cached.etag {
195            request = request.header(header::IF_NONE_MATCH, etag);
196        }
197        if let Some(last_modified) = &cached.last_modified {
198            request = request.header(header::IF_MODIFIED_SINCE, last_modified);
199        }
200
201        let response = request.send().await.map_err(|e| DepsError::RegistryError {
202            package: url.to_string(),
203            source: e,
204        })?;
205
206        if response.status() == StatusCode::NOT_MODIFIED {
207            // 304 Not Modified - content unchanged
208            return Ok(None);
209        }
210
211        // 200 OK - content changed
212        let etag = response
213            .headers()
214            .get(header::ETAG)
215            .and_then(|v| v.to_str().ok())
216            .map(String::from);
217
218        let last_modified = response
219            .headers()
220            .get(header::LAST_MODIFIED)
221            .and_then(|v| v.to_str().ok())
222            .map(String::from);
223
224        let body = response
225            .bytes()
226            .await
227            .map_err(|e| DepsError::RegistryError {
228                package: url.to_string(),
229                source: e,
230            })?;
231
232        let body_arc = Arc::new(body.to_vec());
233
234        // Update cache with new response
235        self.entries.insert(
236            url.to_string(),
237            CachedResponse {
238                body: Arc::clone(&body_arc),
239                etag,
240                last_modified,
241                fetched_at: Instant::now(),
242            },
243        );
244
245        Ok(Some(body_arc))
246    }
247
248    /// Fetches a fresh response from the network and stores it in the cache.
249    ///
250    /// This method bypasses the cache and always makes a network request.
251    /// The response is stored with its ETag and Last-Modified headers for
252    /// future conditional requests.
253    ///
254    /// # Errors
255    ///
256    /// Returns `DepsError::CacheError` if the server returns a non-2xx status code,
257    /// or `DepsError::RegistryError` if the network request fails.
258    pub(crate) async fn fetch_and_store(&self, url: &str) -> Result<Arc<Vec<u8>>> {
259        ensure_https(url)?;
260        tracing::debug!("fetching fresh: {}", url);
261
262        let response = self
263            .client
264            .get(url)
265            .send()
266            .await
267            .map_err(|e| DepsError::RegistryError {
268                package: url.to_string(),
269                source: e,
270            })?;
271
272        if !response.status().is_success() {
273            return Err(DepsError::CacheError(format!(
274                "HTTP {} for {}",
275                response.status(),
276                url
277            )));
278        }
279
280        let etag = response
281            .headers()
282            .get(header::ETAG)
283            .and_then(|v| v.to_str().ok())
284            .map(String::from);
285
286        let last_modified = response
287            .headers()
288            .get(header::LAST_MODIFIED)
289            .and_then(|v| v.to_str().ok())
290            .map(String::from);
291
292        let body = response
293            .bytes()
294            .await
295            .map_err(|e| DepsError::RegistryError {
296                package: url.to_string(),
297                source: e,
298            })?;
299
300        let body_arc = Arc::new(body.to_vec());
301
302        self.entries.insert(
303            url.to_string(),
304            CachedResponse {
305                body: Arc::clone(&body_arc),
306                etag,
307                last_modified,
308                fetched_at: Instant::now(),
309            },
310        );
311
312        Ok(body_arc)
313    }
314
315    /// Clears all cached entries.
316    ///
317    /// This removes all cached responses, forcing the next request for
318    /// any URL to fetch fresh data from the network.
319    pub fn clear(&self) {
320        self.entries.clear();
321    }
322
323    /// Returns the number of cached entries.
324    pub fn len(&self) -> usize {
325        self.entries.len()
326    }
327
328    /// Returns `true` if the cache contains no entries.
329    pub fn is_empty(&self) -> bool {
330        self.entries.is_empty()
331    }
332
333    /// Evicts approximately 10% of cache entries when capacity is reached.
334    ///
335    /// Uses a simple random eviction strategy. In a production system,
336    /// this could be replaced with LRU or TTL-based eviction.
337    fn evict_entries(&self) {
338        let target_removals = MAX_CACHE_ENTRIES / 10;
339        let mut removed = 0;
340
341        // Simple eviction: remove oldest entries by fetched_at timestamp
342        let mut entries_to_remove = Vec::new();
343
344        for entry in self.entries.iter() {
345            entries_to_remove.push((entry.key().clone(), entry.value().fetched_at));
346            if entries_to_remove.len() >= MAX_CACHE_ENTRIES {
347                break;
348            }
349        }
350
351        // Sort by age (oldest first)
352        entries_to_remove.sort_by_key(|(_, time)| *time);
353
354        // Remove oldest entries
355        for (url, _) in entries_to_remove.iter().take(target_removals) {
356            self.entries.remove(url);
357            removed += 1;
358        }
359
360        tracing::debug!("evicted {} cache entries", removed);
361    }
362}
363
364impl Default for HttpCache {
365    fn default() -> Self {
366        Self::new()
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_cache_creation() {
376        let cache = HttpCache::new();
377        assert_eq!(cache.len(), 0);
378        assert!(cache.is_empty());
379    }
380
381    #[test]
382    fn test_cache_clear() {
383        let cache = HttpCache::new();
384        cache.entries.insert(
385            "test".into(),
386            CachedResponse {
387                body: Arc::new(vec![1, 2, 3]),
388                etag: None,
389                last_modified: None,
390                fetched_at: Instant::now(),
391            },
392        );
393        assert_eq!(cache.len(), 1);
394        cache.clear();
395        assert_eq!(cache.len(), 0);
396    }
397
398    #[test]
399    fn test_cached_response_clone() {
400        let response = CachedResponse {
401            body: Arc::new(vec![1, 2, 3]),
402            etag: Some("test".into()),
403            last_modified: Some("date".into()),
404            fetched_at: Instant::now(),
405        };
406        let cloned = response.clone();
407        // Arc clone points to same data
408        assert!(Arc::ptr_eq(&response.body, &cloned.body));
409        assert_eq!(response.etag, cloned.etag);
410    }
411
412    #[test]
413    fn test_cache_len() {
414        let cache = HttpCache::new();
415        assert_eq!(cache.len(), 0);
416
417        cache.entries.insert(
418            "url1".into(),
419            CachedResponse {
420                body: Arc::new(vec![]),
421                etag: None,
422                last_modified: None,
423                fetched_at: Instant::now(),
424            },
425        );
426
427        assert_eq!(cache.len(), 1);
428    }
429
430    #[tokio::test]
431    async fn test_get_cached_fresh_fetch() {
432        let mut server = mockito::Server::new_async().await;
433
434        let _m = server
435            .mock("GET", "/api/data")
436            .with_status(200)
437            .with_header("etag", "\"abc123\"")
438            .with_body("test data")
439            .create_async()
440            .await;
441
442        let cache = HttpCache::new();
443        let url = format!("{}/api/data", server.url());
444        let result = cache.get_cached(&url).await.unwrap();
445
446        assert_eq!(&**result, b"test data");
447        assert_eq!(cache.len(), 1);
448    }
449
450    #[tokio::test]
451    async fn test_get_cached_cache_hit() {
452        let mut server = mockito::Server::new_async().await;
453        let url = format!("{}/api/data", server.url());
454
455        let cache = HttpCache::new();
456
457        let _m1 = server
458            .mock("GET", "/api/data")
459            .with_status(200)
460            .with_header("etag", "\"abc123\"")
461            .with_body("original data")
462            .create_async()
463            .await;
464
465        let result1 = cache.get_cached(&url).await.unwrap();
466        assert_eq!(&**result1, b"original data");
467        assert_eq!(cache.len(), 1);
468
469        drop(_m1);
470
471        let _m2 = server
472            .mock("GET", "/api/data")
473            .match_header("if-none-match", "\"abc123\"")
474            .with_status(304)
475            .create_async()
476            .await;
477
478        let result2 = cache.get_cached(&url).await.unwrap();
479        assert_eq!(&**result2, b"original data");
480    }
481
482    #[tokio::test]
483    async fn test_get_cached_304_not_modified() {
484        let mut server = mockito::Server::new_async().await;
485        let url = format!("{}/api/data", server.url());
486
487        let cache = HttpCache::new();
488
489        let _m1 = server
490            .mock("GET", "/api/data")
491            .with_status(200)
492            .with_header("etag", "\"abc123\"")
493            .with_body("original data")
494            .create_async()
495            .await;
496
497        let result1 = cache.get_cached(&url).await.unwrap();
498        assert_eq!(&**result1, b"original data");
499
500        drop(_m1);
501
502        let _m2 = server
503            .mock("GET", "/api/data")
504            .match_header("if-none-match", "\"abc123\"")
505            .with_status(304)
506            .create_async()
507            .await;
508
509        let result2 = cache.get_cached(&url).await.unwrap();
510        assert_eq!(&**result2, b"original data");
511    }
512
513    #[tokio::test]
514    async fn test_get_cached_etag_validation() {
515        let mut server = mockito::Server::new_async().await;
516        let url = format!("{}/api/data", server.url());
517
518        let cache = HttpCache::new();
519
520        cache.entries.insert(
521            url.clone(),
522            CachedResponse {
523                body: Arc::new(b"cached".to_vec()),
524                etag: Some("\"tag123\"".into()),
525                last_modified: None,
526                fetched_at: Instant::now(),
527            },
528        );
529
530        let _m = server
531            .mock("GET", "/api/data")
532            .match_header("if-none-match", "\"tag123\"")
533            .with_status(304)
534            .create_async()
535            .await;
536
537        let result = cache.get_cached(&url).await.unwrap();
538        assert_eq!(&**result, b"cached");
539    }
540
541    #[tokio::test]
542    async fn test_get_cached_last_modified_validation() {
543        let mut server = mockito::Server::new_async().await;
544        let url = format!("{}/api/data", server.url());
545
546        let cache = HttpCache::new();
547
548        cache.entries.insert(
549            url.clone(),
550            CachedResponse {
551                body: Arc::new(b"cached".to_vec()),
552                etag: None,
553                last_modified: Some("Wed, 21 Oct 2024 07:28:00 GMT".into()),
554                fetched_at: Instant::now(),
555            },
556        );
557
558        let _m = server
559            .mock("GET", "/api/data")
560            .match_header("if-modified-since", "Wed, 21 Oct 2024 07:28:00 GMT")
561            .with_status(304)
562            .create_async()
563            .await;
564
565        let result = cache.get_cached(&url).await.unwrap();
566        assert_eq!(&**result, b"cached");
567    }
568
569    #[tokio::test]
570    async fn test_get_cached_network_error_fallback() {
571        let cache = HttpCache::new();
572        let url = "http://invalid.localhost.test/data";
573
574        cache.entries.insert(
575            url.to_string(),
576            CachedResponse {
577                body: Arc::new(b"stale data".to_vec()),
578                etag: Some("\"old\"".into()),
579                last_modified: None,
580                fetched_at: Instant::now(),
581            },
582        );
583
584        let result = cache.get_cached(url).await.unwrap();
585        assert_eq!(&**result, b"stale data");
586    }
587
588    #[tokio::test]
589    async fn test_fetch_and_store_http_error() {
590        let mut server = mockito::Server::new_async().await;
591
592        let _m = server
593            .mock("GET", "/api/missing")
594            .with_status(404)
595            .with_body("Not Found")
596            .create_async()
597            .await;
598
599        let cache = HttpCache::new();
600        let url = format!("{}/api/missing", server.url());
601        let result = cache.fetch_and_store(&url).await;
602
603        assert!(result.is_err());
604        match result {
605            Err(DepsError::CacheError(msg)) => {
606                assert!(msg.contains("404"));
607            }
608            _ => panic!("Expected CacheError"),
609        }
610    }
611
612    #[tokio::test]
613    async fn test_fetch_and_store_stores_headers() {
614        let mut server = mockito::Server::new_async().await;
615
616        let _m = server
617            .mock("GET", "/api/data")
618            .with_status(200)
619            .with_header("etag", "\"abc123\"")
620            .with_header("last-modified", "Wed, 21 Oct 2024 07:28:00 GMT")
621            .with_body("test")
622            .create_async()
623            .await;
624
625        let cache = HttpCache::new();
626        let url = format!("{}/api/data", server.url());
627        cache.fetch_and_store(&url).await.unwrap();
628
629        let cached = cache.entries.get(&url).unwrap();
630        assert_eq!(cached.etag, Some("\"abc123\"".into()));
631        assert_eq!(
632            cached.last_modified,
633            Some("Wed, 21 Oct 2024 07:28:00 GMT".into())
634        );
635    }
636}