deps_core/
cache.rs

1use crate::error::{DepsError, Result};
2use bytes::Bytes;
3use dashmap::DashMap;
4use reqwest::{Client, StatusCode, header};
5use std::time::Instant;
6
7/// Maximum number of cached entries to prevent unbounded memory growth.
8const MAX_CACHE_ENTRIES: usize = 1000;
9
10/// HTTP request timeout in seconds.
11const HTTP_TIMEOUT_SECS: u64 = 30;
12
13/// Percentage of cache entries to evict when capacity is reached.
14const CACHE_EVICTION_PERCENTAGE: usize = 10;
15
16/// Validates that a URL uses HTTPS protocol.
17///
18/// Returns an error if the URL doesn't start with "https://".
19/// This ensures all network requests are encrypted.
20///
21/// In test mode, HTTP URLs are allowed for mockito compatibility.
22#[inline]
23fn ensure_https(url: &str) -> Result<()> {
24    #[cfg(not(test))]
25    if !url.starts_with("https://") {
26        return Err(DepsError::CacheError(format!(
27            "URL must use HTTPS: {}",
28            url
29        )));
30    }
31    #[cfg(test)]
32    let _ = url; // Silence unused warning in tests
33    Ok(())
34}
35
36/// Cached HTTP response with validation headers.
37///
38/// Stores response body and cache validation headers (ETag, Last-Modified)
39/// for efficient conditional requests. The body uses `Bytes` which is an
40/// Arc-like type optimized for network data, enabling zero-cost cloning
41/// across multiple consumers without copying.
42///
43/// # Examples
44///
45/// ```
46/// use deps_core::cache::CachedResponse;
47/// use bytes::Bytes;
48/// use std::time::Instant;
49///
50/// let response = CachedResponse {
51///     body: Bytes::from("response data"),
52///     etag: Some("\"abc123\"".into()),
53///     last_modified: None,
54///     fetched_at: Instant::now(),
55/// };
56///
57/// // Clone is cheap - only increments reference count
58/// let cloned = response.clone();
59/// ```
60#[derive(Debug, Clone)]
61pub struct CachedResponse {
62    pub body: Bytes,
63    pub etag: Option<String>,
64    pub last_modified: Option<String>,
65    pub fetched_at: Instant,
66}
67
68/// HTTP cache with ETag and Last-Modified validation.
69///
70/// Implements RFC 7232 conditional requests to minimize network traffic.
71/// All responses are cached with their validation headers, and subsequent
72/// requests use `If-None-Match` (ETag) or `If-Modified-Since` headers
73/// to check for updates.
74///
75/// The cache uses `Bytes` for response bodies, enabling efficient sharing
76/// of cached data across multiple consumers without copying. `Bytes` is
77/// an Arc-like type optimized for network I/O.
78///
79/// # Examples
80///
81/// ```no_run
82/// use deps_core::cache::HttpCache;
83///
84/// # async fn example() -> deps_core::error::Result<()> {
85/// let cache = HttpCache::new();
86///
87/// // First request - fetches from network
88/// let data1 = cache.get_cached("https://index.crates.io/se/rd/serde").await?;
89///
90/// // Second request - uses conditional GET (304 Not Modified if unchanged)
91/// let data2 = cache.get_cached("https://index.crates.io/se/rd/serde").await?;
92/// # Ok(())
93/// # }
94/// ```
95pub struct HttpCache {
96    entries: DashMap<String, CachedResponse>,
97    client: Client,
98}
99
100impl HttpCache {
101    /// Creates a new HTTP cache with default configuration.
102    ///
103    /// The cache uses a configurable timeout for all requests and identifies
104    /// itself with an auto-versioned user agent.
105    pub fn new() -> Self {
106        let client = Client::builder()
107            .user_agent(format!("deps-lsp/{}", env!("CARGO_PKG_VERSION")))
108            .timeout(std::time::Duration::from_secs(HTTP_TIMEOUT_SECS))
109            .build()
110            .expect("failed to create HTTP client");
111
112        Self {
113            entries: DashMap::new(),
114            client,
115        }
116    }
117
118    /// Retrieves data from URL with intelligent caching.
119    ///
120    /// On first request, fetches data from the network and caches it.
121    /// On subsequent requests, performs a conditional GET request using
122    /// cached ETag or Last-Modified headers. If the server responds with
123    /// 304 Not Modified, returns the cached data. Otherwise, fetches and
124    /// caches the new data.
125    ///
126    /// If the conditional request fails due to network errors, falls back
127    /// to the cached data (stale-while-revalidate pattern).
128    ///
129    /// # Returns
130    ///
131    /// Returns `Bytes` containing the response body. Multiple calls for the
132    /// same URL return cheap clones (reference counting) without copying data.
133    ///
134    /// # Errors
135    ///
136    /// Returns `DepsError::RegistryError` if the initial fetch fails or
137    /// if no cached data exists and the network is unavailable.
138    ///
139    /// # Examples
140    ///
141    /// ```no_run
142    /// # use deps_core::cache::HttpCache;
143    /// # async fn example() -> deps_core::error::Result<()> {
144    /// let cache = HttpCache::new();
145    /// let data = cache.get_cached("https://example.com/api/data").await?;
146    /// println!("Fetched {} bytes", data.len());
147    /// # Ok(())
148    /// # }
149    /// ```
150    pub async fn get_cached(&self, url: &str) -> Result<Bytes> {
151        // Evict old entries if cache is at capacity
152        if self.entries.len() >= MAX_CACHE_ENTRIES {
153            self.evict_entries();
154        }
155
156        if let Some(cached) = self.entries.get(url) {
157            // Attempt conditional request with cached headers
158            match self.conditional_request(url, &cached).await {
159                Ok(Some(new_body)) => {
160                    // 200 OK - content changed, cache updated internally
161                    return Ok(new_body);
162                }
163                Ok(None) => {
164                    // 304 Not Modified - use cached body (cheap clone)
165                    return Ok(cached.body.clone());
166                }
167                Err(e) => {
168                    // Network error - fall back to cached body if available
169                    tracing::warn!("conditional request failed, using cache: {}", e);
170                    return Ok(cached.body.clone());
171                }
172            }
173        }
174
175        // No cache entry - fetch fresh
176        self.fetch_and_store(url).await
177    }
178
179    /// Performs conditional HTTP request using cached validation headers.
180    ///
181    /// Sends `If-None-Match` (ETag) and/or `If-Modified-Since` headers
182    /// to check if the cached content is still valid.
183    ///
184    /// # Returns
185    ///
186    /// - `Ok(Some(Bytes))` - Server returned 200 OK with new content
187    /// - `Ok(None)` - Server returned 304 Not Modified (cache is valid)
188    /// - `Err(_)` - Network or HTTP error occurred
189    async fn conditional_request(
190        &self,
191        url: &str,
192        cached: &CachedResponse,
193    ) -> Result<Option<Bytes>> {
194        ensure_https(url)?;
195        let mut request = self.client.get(url);
196
197        if let Some(etag) = &cached.etag {
198            request = request.header(header::IF_NONE_MATCH, etag);
199        }
200        if let Some(last_modified) = &cached.last_modified {
201            request = request.header(header::IF_MODIFIED_SINCE, last_modified);
202        }
203
204        let response = request.send().await.map_err(|e| DepsError::RegistryError {
205            package: url.to_string(),
206            source: e,
207        })?;
208
209        if response.status() == StatusCode::NOT_MODIFIED {
210            // 304 Not Modified - content unchanged
211            return Ok(None);
212        }
213
214        // 200 OK - content changed
215        let etag = response
216            .headers()
217            .get(header::ETAG)
218            .and_then(|v| v.to_str().ok())
219            .map(String::from);
220
221        let last_modified = response
222            .headers()
223            .get(header::LAST_MODIFIED)
224            .and_then(|v| v.to_str().ok())
225            .map(String::from);
226
227        let body = response
228            .bytes()
229            .await
230            .map_err(|e| DepsError::RegistryError {
231                package: url.to_string(),
232                source: e,
233            })?;
234
235        // Update cache with new response
236        self.entries.insert(
237            url.to_string(),
238            CachedResponse {
239                body: body.clone(),
240                etag,
241                last_modified,
242                fetched_at: Instant::now(),
243            },
244        );
245
246        Ok(Some(body))
247    }
248
249    /// Fetches a fresh response from the network and stores it in the cache.
250    ///
251    /// This method bypasses the cache and always makes a network request.
252    /// The response is stored with its ETag and Last-Modified headers for
253    /// future conditional requests.
254    ///
255    /// # Errors
256    ///
257    /// Returns `DepsError::CacheError` if the server returns a non-2xx status code,
258    /// or `DepsError::RegistryError` if the network request fails.
259    pub(crate) async fn fetch_and_store(&self, url: &str) -> Result<Bytes> {
260        ensure_https(url)?;
261        tracing::debug!("fetching fresh: {}", url);
262
263        let response = self
264            .client
265            .get(url)
266            .send()
267            .await
268            .map_err(|e| DepsError::RegistryError {
269                package: url.to_string(),
270                source: e,
271            })?;
272
273        if !response.status().is_success() {
274            return Err(DepsError::CacheError(format!(
275                "HTTP {} for {}",
276                response.status(),
277                url
278            )));
279        }
280
281        let etag = response
282            .headers()
283            .get(header::ETAG)
284            .and_then(|v| v.to_str().ok())
285            .map(String::from);
286
287        let last_modified = response
288            .headers()
289            .get(header::LAST_MODIFIED)
290            .and_then(|v| v.to_str().ok())
291            .map(String::from);
292
293        let body = response
294            .bytes()
295            .await
296            .map_err(|e| DepsError::RegistryError {
297                package: url.to_string(),
298                source: e,
299            })?;
300
301        self.entries.insert(
302            url.to_string(),
303            CachedResponse {
304                body: body.clone(),
305                etag,
306                last_modified,
307                fetched_at: Instant::now(),
308            },
309        );
310
311        Ok(body)
312    }
313
314    /// Clears all cached entries.
315    ///
316    /// This removes all cached responses, forcing the next request for
317    /// any URL to fetch fresh data from the network.
318    pub fn clear(&self) {
319        self.entries.clear();
320    }
321
322    /// Returns the number of cached entries.
323    pub fn len(&self) -> usize {
324        self.entries.len()
325    }
326
327    /// Returns `true` if the cache contains no entries.
328    pub fn is_empty(&self) -> bool {
329        self.entries.is_empty()
330    }
331
332    /// Evicts approximately `CACHE_EVICTION_PERCENTAGE`% of cache entries when capacity is reached.
333    ///
334    /// Uses a min-heap to efficiently find the oldest entries instead of full sorting.
335    /// For each entry, we potentially push/pop from the heap, which is O(log K).
336    ///
337    /// Time complexity: O(N log K) where N = number of cache entries, K = target_removals
338    /// Space complexity: O(K) for the min-heap
339    fn evict_entries(&self) {
340        use std::cmp::Reverse;
341        use std::collections::BinaryHeap;
342
343        let target_removals = MAX_CACHE_ENTRIES / CACHE_EVICTION_PERCENTAGE;
344
345        // Use min-heap to efficiently find N oldest entries
346        // The heap maintains the K oldest entries seen so far
347        let mut oldest = BinaryHeap::with_capacity(target_removals);
348
349        for entry in self.entries.iter() {
350            let item = (entry.value().fetched_at, entry.key().clone());
351
352            if oldest.len() < target_removals {
353                // Heap not full, insert directly
354                oldest.push(Reverse(item));
355            } else if let Some(Reverse(newest_of_oldest)) = oldest.peek() {
356                // If this entry is older than the newest entry in our "oldest" set,
357                // replace it
358                if item.0 < newest_of_oldest.0 {
359                    oldest.pop();
360                    oldest.push(Reverse(item));
361                }
362            }
363        }
364
365        // Remove selected oldest entries
366        let removed = oldest.len();
367        for Reverse((_, url)) in oldest {
368            self.entries.remove(&url);
369        }
370
371        tracing::debug!("evicted {} cache entries (O(N) algorithm)", removed);
372    }
373
374    /// Benchmark-only helper: Direct cache lookup without network requests.
375    #[doc(hidden)]
376    pub fn get_for_bench(&self, url: &str) -> Option<Bytes> {
377        self.entries.get(url).map(|entry| entry.body.clone())
378    }
379
380    /// Benchmark-only helper: Direct cache insertion.
381    #[doc(hidden)]
382    pub fn insert_for_bench(&self, url: String, response: CachedResponse) {
383        self.entries.insert(url, response);
384    }
385}
386
387impl Default for HttpCache {
388    fn default() -> Self {
389        Self::new()
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_cache_creation() {
399        let cache = HttpCache::new();
400        assert_eq!(cache.len(), 0);
401        assert!(cache.is_empty());
402    }
403
404    #[test]
405    fn test_cache_clear() {
406        let cache = HttpCache::new();
407        cache.entries.insert(
408            "test".into(),
409            CachedResponse {
410                body: Bytes::from_static(&[1, 2, 3]),
411                etag: None,
412                last_modified: None,
413                fetched_at: Instant::now(),
414            },
415        );
416        assert_eq!(cache.len(), 1);
417        cache.clear();
418        assert_eq!(cache.len(), 0);
419    }
420
421    #[test]
422    fn test_cached_response_clone() {
423        let response = CachedResponse {
424            body: Bytes::from_static(&[1, 2, 3]),
425            etag: Some("test".into()),
426            last_modified: Some("date".into()),
427            fetched_at: Instant::now(),
428        };
429        let cloned = response.clone();
430        // Bytes clone is cheap (reference counting)
431        assert_eq!(response.body, cloned.body);
432        assert_eq!(response.etag, cloned.etag);
433    }
434
435    #[test]
436    fn test_cache_len() {
437        let cache = HttpCache::new();
438        assert_eq!(cache.len(), 0);
439
440        cache.entries.insert(
441            "url1".into(),
442            CachedResponse {
443                body: Bytes::new(),
444                etag: None,
445                last_modified: None,
446                fetched_at: Instant::now(),
447            },
448        );
449
450        assert_eq!(cache.len(), 1);
451    }
452
453    #[tokio::test]
454    async fn test_get_cached_fresh_fetch() {
455        let mut server = mockito::Server::new_async().await;
456
457        let _m = server
458            .mock("GET", "/api/data")
459            .with_status(200)
460            .with_header("etag", "\"abc123\"")
461            .with_body("test data")
462            .create_async()
463            .await;
464
465        let cache = HttpCache::new();
466        let url = format!("{}/api/data", server.url());
467        let result: Bytes = cache.get_cached(&url).await.unwrap();
468
469        assert_eq!(result.as_ref(), b"test data");
470        assert_eq!(cache.len(), 1);
471    }
472
473    #[tokio::test]
474    async fn test_get_cached_cache_hit() {
475        let mut server = mockito::Server::new_async().await;
476        let url = format!("{}/api/data", server.url());
477
478        let cache = HttpCache::new();
479
480        let _m1 = server
481            .mock("GET", "/api/data")
482            .with_status(200)
483            .with_header("etag", "\"abc123\"")
484            .with_body("original data")
485            .create_async()
486            .await;
487
488        let result1: Bytes = cache.get_cached(&url).await.unwrap();
489        assert_eq!(result1.as_ref(), b"original data");
490        assert_eq!(cache.len(), 1);
491
492        drop(_m1);
493
494        let _m2 = server
495            .mock("GET", "/api/data")
496            .match_header("if-none-match", "\"abc123\"")
497            .with_status(304)
498            .create_async()
499            .await;
500
501        let result2: Bytes = cache.get_cached(&url).await.unwrap();
502        assert_eq!(result2.as_ref(), b"original data");
503    }
504
505    #[tokio::test]
506    async fn test_get_cached_304_not_modified() {
507        let mut server = mockito::Server::new_async().await;
508        let url = format!("{}/api/data", server.url());
509
510        let cache = HttpCache::new();
511
512        let _m1 = server
513            .mock("GET", "/api/data")
514            .with_status(200)
515            .with_header("etag", "\"abc123\"")
516            .with_body("original data")
517            .create_async()
518            .await;
519
520        let result1: Bytes = cache.get_cached(&url).await.unwrap();
521        assert_eq!(result1.as_ref(), b"original data");
522
523        drop(_m1);
524
525        let _m2 = server
526            .mock("GET", "/api/data")
527            .match_header("if-none-match", "\"abc123\"")
528            .with_status(304)
529            .create_async()
530            .await;
531
532        let result2: Bytes = cache.get_cached(&url).await.unwrap();
533        assert_eq!(result2.as_ref(), b"original data");
534    }
535
536    #[tokio::test]
537    async fn test_get_cached_etag_validation() {
538        let mut server = mockito::Server::new_async().await;
539        let url = format!("{}/api/data", server.url());
540
541        let cache = HttpCache::new();
542
543        cache.entries.insert(
544            url.clone(),
545            CachedResponse {
546                body: Bytes::from_static(b"cached"),
547                etag: Some("\"tag123\"".into()),
548                last_modified: None,
549                fetched_at: Instant::now(),
550            },
551        );
552
553        let _m = server
554            .mock("GET", "/api/data")
555            .match_header("if-none-match", "\"tag123\"")
556            .with_status(304)
557            .create_async()
558            .await;
559
560        let result: Bytes = cache.get_cached(&url).await.unwrap();
561        assert_eq!(result.as_ref(), b"cached");
562    }
563
564    #[tokio::test]
565    async fn test_get_cached_last_modified_validation() {
566        let mut server = mockito::Server::new_async().await;
567        let url = format!("{}/api/data", server.url());
568
569        let cache = HttpCache::new();
570
571        cache.entries.insert(
572            url.clone(),
573            CachedResponse {
574                body: Bytes::from_static(b"cached"),
575                etag: None,
576                last_modified: Some("Wed, 21 Oct 2024 07:28:00 GMT".into()),
577                fetched_at: Instant::now(),
578            },
579        );
580
581        let _m = server
582            .mock("GET", "/api/data")
583            .match_header("if-modified-since", "Wed, 21 Oct 2024 07:28:00 GMT")
584            .with_status(304)
585            .create_async()
586            .await;
587
588        let result: Bytes = cache.get_cached(&url).await.unwrap();
589        assert_eq!(result.as_ref(), b"cached");
590    }
591
592    #[tokio::test]
593    async fn test_get_cached_network_error_fallback() {
594        let cache = HttpCache::new();
595        let url = "http://invalid.localhost.test/data";
596
597        cache.entries.insert(
598            url.to_string(),
599            CachedResponse {
600                body: Bytes::from_static(b"stale data"),
601                etag: Some("\"old\"".into()),
602                last_modified: None,
603                fetched_at: Instant::now(),
604            },
605        );
606
607        let result: Bytes = cache.get_cached(url).await.unwrap();
608        assert_eq!(result.as_ref(), b"stale data");
609    }
610
611    #[tokio::test]
612    async fn test_fetch_and_store_http_error() {
613        let mut server = mockito::Server::new_async().await;
614
615        let _m = server
616            .mock("GET", "/api/missing")
617            .with_status(404)
618            .with_body("Not Found")
619            .create_async()
620            .await;
621
622        let cache = HttpCache::new();
623        let url = format!("{}/api/missing", server.url());
624        let result: Result<Bytes> = cache.fetch_and_store(&url).await;
625
626        assert!(result.is_err());
627        match result {
628            Err(DepsError::CacheError(msg)) => {
629                assert!(msg.contains("404"));
630            }
631            _ => panic!("Expected CacheError"),
632        }
633    }
634
635    #[tokio::test]
636    async fn test_fetch_and_store_stores_headers() {
637        let mut server = mockito::Server::new_async().await;
638
639        let _m = server
640            .mock("GET", "/api/data")
641            .with_status(200)
642            .with_header("etag", "\"abc123\"")
643            .with_header("last-modified", "Wed, 21 Oct 2024 07:28:00 GMT")
644            .with_body("test")
645            .create_async()
646            .await;
647
648        let cache = HttpCache::new();
649        let url = format!("{}/api/data", server.url());
650        let _: Bytes = cache.fetch_and_store(&url).await.unwrap();
651
652        let cached = cache.entries.get(&url).unwrap();
653        assert_eq!(cached.etag, Some("\"abc123\"".into()));
654        assert_eq!(
655            cached.last_modified,
656            Some("Wed, 21 Oct 2024 07:28:00 GMT".into())
657        );
658    }
659}