deps_core/
cache.rs

1use crate::error::{DepsError, Result};
2use dashmap::DashMap;
3use reqwest::{Client, StatusCode, header};
4use std::sync::Arc;
5use std::time::Instant;
6
7/// Maximum number of cached entries to prevent unbounded memory growth.
8const MAX_CACHE_ENTRIES: usize = 1000;
9
10/// HTTP request timeout in seconds.
11const HTTP_TIMEOUT_SECS: u64 = 30;
12
13/// Percentage of cache entries to evict when capacity is reached.
14const CACHE_EVICTION_PERCENTAGE: usize = 10;
15
16/// Validates that a URL uses HTTPS protocol.
17///
18/// Returns an error if the URL doesn't start with "https://".
19/// This ensures all network requests are encrypted.
20///
21/// In test mode, HTTP URLs are allowed for mockito compatibility.
22#[inline]
23fn ensure_https(url: &str) -> Result<()> {
24    #[cfg(not(test))]
25    if !url.starts_with("https://") {
26        return Err(DepsError::CacheError(format!(
27            "URL must use HTTPS: {}",
28            url
29        )));
30    }
31    #[cfg(test)]
32    let _ = url; // Silence unused warning in tests
33    Ok(())
34}
35
36/// Cached HTTP response with validation headers.
37///
38/// Stores response body and cache validation headers (ETag, Last-Modified)
39/// for efficient conditional requests. The body is wrapped in `Arc` for
40/// zero-cost cloning across multiple consumers.
41///
42/// # Examples
43///
44/// ```
45/// use deps_core::cache::CachedResponse;
46/// use std::sync::Arc;
47/// use std::time::Instant;
48///
49/// let response = CachedResponse {
50///     body: Arc::new(b"response data".to_vec()),
51///     etag: Some("\"abc123\"".into()),
52///     last_modified: None,
53///     fetched_at: Instant::now(),
54/// };
55///
56/// // Clone is cheap - only increments Arc reference count
57/// let cloned = response.clone();
58/// assert!(Arc::ptr_eq(&response.body, &cloned.body));
59/// ```
60#[derive(Debug, Clone)]
61pub struct CachedResponse {
62    pub body: Arc<Vec<u8>>,
63    pub etag: Option<String>,
64    pub last_modified: Option<String>,
65    pub fetched_at: Instant,
66}
67
68/// HTTP cache with ETag and Last-Modified validation.
69///
70/// Implements RFC 7232 conditional requests to minimize network traffic.
71/// All responses are cached with their validation headers, and subsequent
72/// requests use `If-None-Match` (ETag) or `If-Modified-Since` headers
73/// to check for updates.
74///
75/// The cache uses `Arc<Vec<u8>>` for response bodies, enabling efficient
76/// sharing of cached data across multiple consumers without copying.
77///
78/// # Examples
79///
80/// ```no_run
81/// use deps_core::cache::HttpCache;
82///
83/// # async fn example() -> deps_core::error::Result<()> {
84/// let cache = HttpCache::new();
85///
86/// // First request - fetches from network
87/// let data1 = cache.get_cached("https://index.crates.io/se/rd/serde").await?;
88///
89/// // Second request - uses conditional GET (304 Not Modified if unchanged)
90/// let data2 = cache.get_cached("https://index.crates.io/se/rd/serde").await?;
91///
92/// // Both share the same underlying buffer
93/// assert!(std::sync::Arc::ptr_eq(&data1, &data2));
94/// # Ok(())
95/// # }
96/// ```
97pub struct HttpCache {
98    entries: DashMap<String, CachedResponse>,
99    client: Client,
100}
101
102impl HttpCache {
103    /// Creates a new HTTP cache with default configuration.
104    ///
105    /// The cache uses a configurable timeout for all requests and identifies
106    /// itself with an auto-versioned user agent.
107    pub fn new() -> Self {
108        let client = Client::builder()
109            .user_agent(format!("deps-lsp/{}", env!("CARGO_PKG_VERSION")))
110            .timeout(std::time::Duration::from_secs(HTTP_TIMEOUT_SECS))
111            .build()
112            .expect("failed to create HTTP client");
113
114        Self {
115            entries: DashMap::new(),
116            client,
117        }
118    }
119
120    /// Retrieves data from URL with intelligent caching.
121    ///
122    /// On first request, fetches data from the network and caches it.
123    /// On subsequent requests, performs a conditional GET request using
124    /// cached ETag or Last-Modified headers. If the server responds with
125    /// 304 Not Modified, returns the cached data. Otherwise, fetches and
126    /// caches the new data.
127    ///
128    /// If the conditional request fails due to network errors, falls back
129    /// to the cached data (stale-while-revalidate pattern).
130    ///
131    /// # Returns
132    ///
133    /// Returns `Arc<Vec<u8>>` containing the response body. Multiple calls
134    /// for the same URL return Arc clones pointing to the same buffer,
135    /// avoiding unnecessary memory allocations.
136    ///
137    /// # Errors
138    ///
139    /// Returns `DepsError::RegistryError` if the initial fetch fails or
140    /// if no cached data exists and the network is unavailable.
141    ///
142    /// # Examples
143    ///
144    /// ```no_run
145    /// # use deps_core::cache::HttpCache;
146    /// # async fn example() -> deps_core::error::Result<()> {
147    /// let cache = HttpCache::new();
148    /// let data = cache.get_cached("https://example.com/api/data").await?;
149    /// println!("Fetched {} bytes", data.len());
150    /// # Ok(())
151    /// # }
152    /// ```
153    pub async fn get_cached(&self, url: &str) -> Result<Arc<Vec<u8>>> {
154        // Evict old entries if cache is at capacity
155        if self.entries.len() >= MAX_CACHE_ENTRIES {
156            self.evict_entries();
157        }
158
159        if let Some(cached) = self.entries.get(url) {
160            // Attempt conditional request with cached headers
161            match self.conditional_request(url, &cached).await {
162                Ok(Some(new_body)) => {
163                    // 200 OK - content changed, cache updated internally
164                    return Ok(new_body);
165                }
166                Ok(None) => {
167                    // 304 Not Modified - use cached body (cheap Arc clone)
168                    return Ok(Arc::clone(&cached.body));
169                }
170                Err(e) => {
171                    // Network error - fall back to cached body if available
172                    tracing::warn!("conditional request failed, using cache: {}", e);
173                    return Ok(Arc::clone(&cached.body));
174                }
175            }
176        }
177
178        // No cache entry - fetch fresh
179        self.fetch_and_store(url).await
180    }
181
182    /// Performs conditional HTTP request using cached validation headers.
183    ///
184    /// Sends `If-None-Match` (ETag) and/or `If-Modified-Since` headers
185    /// to check if the cached content is still valid.
186    ///
187    /// # Returns
188    ///
189    /// - `Ok(Some(Arc<Vec<u8>>))` - Server returned 200 OK with new content
190    /// - `Ok(None)` - Server returned 304 Not Modified (cache is valid)
191    /// - `Err(_)` - Network or HTTP error occurred
192    async fn conditional_request(
193        &self,
194        url: &str,
195        cached: &CachedResponse,
196    ) -> Result<Option<Arc<Vec<u8>>>> {
197        ensure_https(url)?;
198        let mut request = self.client.get(url);
199
200        if let Some(etag) = &cached.etag {
201            request = request.header(header::IF_NONE_MATCH, etag);
202        }
203        if let Some(last_modified) = &cached.last_modified {
204            request = request.header(header::IF_MODIFIED_SINCE, last_modified);
205        }
206
207        let response = request.send().await.map_err(|e| DepsError::RegistryError {
208            package: url.to_string(),
209            source: e,
210        })?;
211
212        if response.status() == StatusCode::NOT_MODIFIED {
213            // 304 Not Modified - content unchanged
214            return Ok(None);
215        }
216
217        // 200 OK - content changed
218        let etag = response
219            .headers()
220            .get(header::ETAG)
221            .and_then(|v| v.to_str().ok())
222            .map(String::from);
223
224        let last_modified = response
225            .headers()
226            .get(header::LAST_MODIFIED)
227            .and_then(|v| v.to_str().ok())
228            .map(String::from);
229
230        let body = response
231            .bytes()
232            .await
233            .map_err(|e| DepsError::RegistryError {
234                package: url.to_string(),
235                source: e,
236            })?;
237
238        let body_arc = Arc::new(body.to_vec());
239
240        // Update cache with new response
241        self.entries.insert(
242            url.to_string(),
243            CachedResponse {
244                body: Arc::clone(&body_arc),
245                etag,
246                last_modified,
247                fetched_at: Instant::now(),
248            },
249        );
250
251        Ok(Some(body_arc))
252    }
253
254    /// Fetches a fresh response from the network and stores it in the cache.
255    ///
256    /// This method bypasses the cache and always makes a network request.
257    /// The response is stored with its ETag and Last-Modified headers for
258    /// future conditional requests.
259    ///
260    /// # Errors
261    ///
262    /// Returns `DepsError::CacheError` if the server returns a non-2xx status code,
263    /// or `DepsError::RegistryError` if the network request fails.
264    pub(crate) async fn fetch_and_store(&self, url: &str) -> Result<Arc<Vec<u8>>> {
265        ensure_https(url)?;
266        tracing::debug!("fetching fresh: {}", url);
267
268        let response = self
269            .client
270            .get(url)
271            .send()
272            .await
273            .map_err(|e| DepsError::RegistryError {
274                package: url.to_string(),
275                source: e,
276            })?;
277
278        if !response.status().is_success() {
279            return Err(DepsError::CacheError(format!(
280                "HTTP {} for {}",
281                response.status(),
282                url
283            )));
284        }
285
286        let etag = response
287            .headers()
288            .get(header::ETAG)
289            .and_then(|v| v.to_str().ok())
290            .map(String::from);
291
292        let last_modified = response
293            .headers()
294            .get(header::LAST_MODIFIED)
295            .and_then(|v| v.to_str().ok())
296            .map(String::from);
297
298        let body = response
299            .bytes()
300            .await
301            .map_err(|e| DepsError::RegistryError {
302                package: url.to_string(),
303                source: e,
304            })?;
305
306        let body_arc = Arc::new(body.to_vec());
307
308        self.entries.insert(
309            url.to_string(),
310            CachedResponse {
311                body: Arc::clone(&body_arc),
312                etag,
313                last_modified,
314                fetched_at: Instant::now(),
315            },
316        );
317
318        Ok(body_arc)
319    }
320
321    /// Clears all cached entries.
322    ///
323    /// This removes all cached responses, forcing the next request for
324    /// any URL to fetch fresh data from the network.
325    pub fn clear(&self) {
326        self.entries.clear();
327    }
328
329    /// Returns the number of cached entries.
330    pub fn len(&self) -> usize {
331        self.entries.len()
332    }
333
334    /// Returns `true` if the cache contains no entries.
335    pub fn is_empty(&self) -> bool {
336        self.entries.is_empty()
337    }
338
339    /// Evicts approximately `CACHE_EVICTION_PERCENTAGE`% of cache entries when capacity is reached.
340    ///
341    /// Uses a min-heap to efficiently find the oldest entries instead of full sorting.
342    /// For each entry, we potentially push/pop from the heap, which is O(log K).
343    ///
344    /// Time complexity: O(N log K) where N = number of cache entries, K = target_removals
345    /// Space complexity: O(K) for the min-heap
346    fn evict_entries(&self) {
347        use std::cmp::Reverse;
348        use std::collections::BinaryHeap;
349
350        let target_removals = MAX_CACHE_ENTRIES / CACHE_EVICTION_PERCENTAGE;
351
352        // Use min-heap to efficiently find N oldest entries
353        // The heap maintains the K oldest entries seen so far
354        let mut oldest = BinaryHeap::with_capacity(target_removals);
355
356        for entry in self.entries.iter() {
357            let item = (entry.value().fetched_at, entry.key().clone());
358
359            if oldest.len() < target_removals {
360                // Heap not full, insert directly
361                oldest.push(Reverse(item));
362            } else if let Some(Reverse(newest_of_oldest)) = oldest.peek() {
363                // If this entry is older than the newest entry in our "oldest" set,
364                // replace it
365                if item.0 < newest_of_oldest.0 {
366                    oldest.pop();
367                    oldest.push(Reverse(item));
368                }
369            }
370        }
371
372        // Remove selected oldest entries
373        let removed = oldest.len();
374        for Reverse((_, url)) in oldest {
375            self.entries.remove(&url);
376        }
377
378        tracing::debug!("evicted {} cache entries (O(N) algorithm)", removed);
379    }
380
381    /// Benchmark-only helper: Direct cache lookup without network requests.
382    #[doc(hidden)]
383    pub fn get_for_bench(&self, url: &str) -> Option<Arc<Vec<u8>>> {
384        self.entries.get(url).map(|entry| Arc::clone(&entry.body))
385    }
386
387    /// Benchmark-only helper: Direct cache insertion.
388    #[doc(hidden)]
389    pub fn insert_for_bench(&self, url: String, response: CachedResponse) {
390        self.entries.insert(url, response);
391    }
392}
393
394impl Default for HttpCache {
395    fn default() -> Self {
396        Self::new()
397    }
398}
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403
404    #[test]
405    fn test_cache_creation() {
406        let cache = HttpCache::new();
407        assert_eq!(cache.len(), 0);
408        assert!(cache.is_empty());
409    }
410
411    #[test]
412    fn test_cache_clear() {
413        let cache = HttpCache::new();
414        cache.entries.insert(
415            "test".into(),
416            CachedResponse {
417                body: Arc::new(vec![1, 2, 3]),
418                etag: None,
419                last_modified: None,
420                fetched_at: Instant::now(),
421            },
422        );
423        assert_eq!(cache.len(), 1);
424        cache.clear();
425        assert_eq!(cache.len(), 0);
426    }
427
428    #[test]
429    fn test_cached_response_clone() {
430        let response = CachedResponse {
431            body: Arc::new(vec![1, 2, 3]),
432            etag: Some("test".into()),
433            last_modified: Some("date".into()),
434            fetched_at: Instant::now(),
435        };
436        let cloned = response.clone();
437        // Arc clone points to same data
438        assert!(Arc::ptr_eq(&response.body, &cloned.body));
439        assert_eq!(response.etag, cloned.etag);
440    }
441
442    #[test]
443    fn test_cache_len() {
444        let cache = HttpCache::new();
445        assert_eq!(cache.len(), 0);
446
447        cache.entries.insert(
448            "url1".into(),
449            CachedResponse {
450                body: Arc::new(vec![]),
451                etag: None,
452                last_modified: None,
453                fetched_at: Instant::now(),
454            },
455        );
456
457        assert_eq!(cache.len(), 1);
458    }
459
460    #[tokio::test]
461    async fn test_get_cached_fresh_fetch() {
462        let mut server = mockito::Server::new_async().await;
463
464        let _m = server
465            .mock("GET", "/api/data")
466            .with_status(200)
467            .with_header("etag", "\"abc123\"")
468            .with_body("test data")
469            .create_async()
470            .await;
471
472        let cache = HttpCache::new();
473        let url = format!("{}/api/data", server.url());
474        let result = cache.get_cached(&url).await.unwrap();
475
476        assert_eq!(&**result, b"test data");
477        assert_eq!(cache.len(), 1);
478    }
479
480    #[tokio::test]
481    async fn test_get_cached_cache_hit() {
482        let mut server = mockito::Server::new_async().await;
483        let url = format!("{}/api/data", server.url());
484
485        let cache = HttpCache::new();
486
487        let _m1 = server
488            .mock("GET", "/api/data")
489            .with_status(200)
490            .with_header("etag", "\"abc123\"")
491            .with_body("original data")
492            .create_async()
493            .await;
494
495        let result1 = cache.get_cached(&url).await.unwrap();
496        assert_eq!(&**result1, b"original data");
497        assert_eq!(cache.len(), 1);
498
499        drop(_m1);
500
501        let _m2 = server
502            .mock("GET", "/api/data")
503            .match_header("if-none-match", "\"abc123\"")
504            .with_status(304)
505            .create_async()
506            .await;
507
508        let result2 = cache.get_cached(&url).await.unwrap();
509        assert_eq!(&**result2, b"original data");
510    }
511
512    #[tokio::test]
513    async fn test_get_cached_304_not_modified() {
514        let mut server = mockito::Server::new_async().await;
515        let url = format!("{}/api/data", server.url());
516
517        let cache = HttpCache::new();
518
519        let _m1 = server
520            .mock("GET", "/api/data")
521            .with_status(200)
522            .with_header("etag", "\"abc123\"")
523            .with_body("original data")
524            .create_async()
525            .await;
526
527        let result1 = cache.get_cached(&url).await.unwrap();
528        assert_eq!(&**result1, b"original data");
529
530        drop(_m1);
531
532        let _m2 = server
533            .mock("GET", "/api/data")
534            .match_header("if-none-match", "\"abc123\"")
535            .with_status(304)
536            .create_async()
537            .await;
538
539        let result2 = cache.get_cached(&url).await.unwrap();
540        assert_eq!(&**result2, b"original data");
541    }
542
543    #[tokio::test]
544    async fn test_get_cached_etag_validation() {
545        let mut server = mockito::Server::new_async().await;
546        let url = format!("{}/api/data", server.url());
547
548        let cache = HttpCache::new();
549
550        cache.entries.insert(
551            url.clone(),
552            CachedResponse {
553                body: Arc::new(b"cached".to_vec()),
554                etag: Some("\"tag123\"".into()),
555                last_modified: None,
556                fetched_at: Instant::now(),
557            },
558        );
559
560        let _m = server
561            .mock("GET", "/api/data")
562            .match_header("if-none-match", "\"tag123\"")
563            .with_status(304)
564            .create_async()
565            .await;
566
567        let result = cache.get_cached(&url).await.unwrap();
568        assert_eq!(&**result, b"cached");
569    }
570
571    #[tokio::test]
572    async fn test_get_cached_last_modified_validation() {
573        let mut server = mockito::Server::new_async().await;
574        let url = format!("{}/api/data", server.url());
575
576        let cache = HttpCache::new();
577
578        cache.entries.insert(
579            url.clone(),
580            CachedResponse {
581                body: Arc::new(b"cached".to_vec()),
582                etag: None,
583                last_modified: Some("Wed, 21 Oct 2024 07:28:00 GMT".into()),
584                fetched_at: Instant::now(),
585            },
586        );
587
588        let _m = server
589            .mock("GET", "/api/data")
590            .match_header("if-modified-since", "Wed, 21 Oct 2024 07:28:00 GMT")
591            .with_status(304)
592            .create_async()
593            .await;
594
595        let result = cache.get_cached(&url).await.unwrap();
596        assert_eq!(&**result, b"cached");
597    }
598
599    #[tokio::test]
600    async fn test_get_cached_network_error_fallback() {
601        let cache = HttpCache::new();
602        let url = "http://invalid.localhost.test/data";
603
604        cache.entries.insert(
605            url.to_string(),
606            CachedResponse {
607                body: Arc::new(b"stale data".to_vec()),
608                etag: Some("\"old\"".into()),
609                last_modified: None,
610                fetched_at: Instant::now(),
611            },
612        );
613
614        let result = cache.get_cached(url).await.unwrap();
615        assert_eq!(&**result, b"stale data");
616    }
617
618    #[tokio::test]
619    async fn test_fetch_and_store_http_error() {
620        let mut server = mockito::Server::new_async().await;
621
622        let _m = server
623            .mock("GET", "/api/missing")
624            .with_status(404)
625            .with_body("Not Found")
626            .create_async()
627            .await;
628
629        let cache = HttpCache::new();
630        let url = format!("{}/api/missing", server.url());
631        let result = cache.fetch_and_store(&url).await;
632
633        assert!(result.is_err());
634        match result {
635            Err(DepsError::CacheError(msg)) => {
636                assert!(msg.contains("404"));
637            }
638            _ => panic!("Expected CacheError"),
639        }
640    }
641
642    #[tokio::test]
643    async fn test_fetch_and_store_stores_headers() {
644        let mut server = mockito::Server::new_async().await;
645
646        let _m = server
647            .mock("GET", "/api/data")
648            .with_status(200)
649            .with_header("etag", "\"abc123\"")
650            .with_header("last-modified", "Wed, 21 Oct 2024 07:28:00 GMT")
651            .with_body("test")
652            .create_async()
653            .await;
654
655        let cache = HttpCache::new();
656        let url = format!("{}/api/data", server.url());
657        cache.fetch_and_store(&url).await.unwrap();
658
659        let cached = cache.entries.get(&url).unwrap();
660        assert_eq!(cached.etag, Some("\"abc123\"".into()));
661        assert_eq!(
662            cached.last_modified,
663            Some("Wed, 21 Oct 2024 07:28:00 GMT".into())
664        );
665    }
666}