Skip to main content

hedl_mcp/
cache.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Request-level caching for immutable MCP operations.
19//!
20//! Provides true LRU caching for expensive operations like validation, linting,
21//! and schema analysis. Caches are keyed by operation + input hash for
22//! deterministic results.
23//!
24//! # True LRU Behavior
25//!
26//! Both `insert()` and `get()` operations update the access timestamp, ensuring
27//! that frequently accessed entries are retained while least-recently-used
28//! entries are evicted when the cache reaches capacity.
29//!
30//! # Performance
31//!
32//! Benchmarks show 2-5x speedup on repeated requests for the same content.
33//!
34//! # Thread Safety
35//!
36//! Uses `DashMap` for lock-free concurrent access with minimal contention.
37
38use dashmap::DashMap;
39use serde_json::Value as JsonValue;
40use std::hash::{Hash, Hasher};
41use std::sync::atomic::{AtomicU64, Ordering};
42
43/// Maximum number of cache entries (configurable).
44const DEFAULT_CACHE_SIZE: usize = 1000;
45
46/// Hash a string using the default hasher (fast, non-cryptographic).
47fn hash_string(s: &str) -> u64 {
48    let mut hasher = std::collections::hash_map::DefaultHasher::new();
49    s.hash(&mut hasher);
50    hasher.finish()
51}
52
53/// Cache key combining operation name and input hash.
54#[derive(Debug, Clone, PartialEq, Eq, Hash)]
55struct CacheKey {
56    operation: String,
57    input_hash: u64,
58}
59
60impl CacheKey {
61    fn new(operation: impl Into<String>, input: &str) -> Self {
62        Self {
63            operation: operation.into(),
64            input_hash: hash_string(input),
65        }
66    }
67}
68
69/// Cached result with LRU metadata.
70#[derive(Debug, Clone)]
71struct CacheEntry {
72    /// Cached JSON result.
73    result: JsonValue,
74    /// Access timestamp for LRU ordering (higher = more recent).
75    access_time: u64,
76}
77
78/// Cache statistics for monitoring.
79#[derive(Debug, Clone, Default)]
80pub struct CacheStats {
81    /// Total cache hits.
82    pub hits: u64,
83    /// Total cache misses.
84    pub misses: u64,
85    /// Total cache evictions (LRU).
86    pub evictions: u64,
87    /// Current cache size (entries).
88    pub size: usize,
89    /// Maximum cache size.
90    pub max_size: usize,
91}
92
93impl CacheStats {
94    /// Calculate cache hit rate (0.0 to 1.0).
95    #[must_use]
96    pub fn hit_rate(&self) -> f64 {
97        let total = self.hits + self.misses;
98        if total == 0 {
99            0.0
100        } else {
101            self.hits as f64 / total as f64
102        }
103    }
104
105    /// Calculate cache hit rate as percentage.
106    #[must_use]
107    pub fn hit_rate_percent(&self) -> f64 {
108        self.hit_rate() * 100.0
109    }
110}
111
112/// True LRU cache for immutable MCP operations.
113///
114/// Thread-safe cache using `DashMap` for concurrent access with true LRU eviction.
115/// Both `get()` and `insert()` update the access timestamp, ensuring frequently
116/// accessed entries are retained.
117///
118/// # Example
119///
120/// ```
121/// use hedl_mcp::cache::OperationCache;
122/// use serde_json::json;
123///
124/// let cache = OperationCache::new(1000);
125///
126/// // Cache a validation result
127/// let result = json!({"valid": true});
128/// cache.insert("validate", "my hedl content", result.clone());
129///
130/// // Retrieve from cache (updates LRU position)
131/// if let Some(cached) = cache.get("validate", "my hedl content") {
132///     assert_eq!(cached, result);
133/// }
134/// ```
135pub struct OperationCache {
136    /// Cache storage (operation+hash -> entry with result and access time).
137    cache: DashMap<CacheKey, CacheEntry>,
138    /// Maximum number of entries.
139    max_size: usize,
140    /// Monotonic timestamp counter for LRU ordering.
141    timestamp_counter: AtomicU64,
142    /// Cache hit counter.
143    hits: AtomicU64,
144    /// Cache miss counter.
145    misses: AtomicU64,
146    /// Cache eviction counter.
147    evictions: AtomicU64,
148}
149
150impl OperationCache {
151    /// Create a new cache with the specified maximum size.
152    ///
153    /// # Arguments
154    ///
155    /// * `max_size` - Maximum number of cache entries (0 = disabled)
156    ///
157    /// # Examples
158    ///
159    /// ```
160    /// use hedl_mcp::cache::OperationCache;
161    ///
162    /// let cache = OperationCache::new(1000);
163    /// ```
164    #[must_use]
165    pub fn new(max_size: usize) -> Self {
166        Self {
167            cache: DashMap::with_capacity(max_size),
168            max_size,
169            timestamp_counter: AtomicU64::new(0),
170            hits: AtomicU64::new(0),
171            misses: AtomicU64::new(0),
172            evictions: AtomicU64::new(0),
173        }
174    }
175
176    /// Get a cached result if available, updating its LRU position.
177    ///
178    /// # Arguments
179    ///
180    /// * `operation` - Operation name (e.g., "validate", "lint")
181    /// * `input` - Input content (used for cache key hash)
182    ///
183    /// # Returns
184    ///
185    /// Cached result if available, `None` otherwise.
186    ///
187    /// # Examples
188    ///
189    /// ```
190    /// use hedl_mcp::cache::OperationCache;
191    ///
192    /// let cache = OperationCache::new(1000);
193    /// let result = cache.get("validate", "%VERSION 1.0\n---");
194    /// assert!(result.is_none()); // Cache miss on first access
195    /// ```
196    pub fn get(&self, operation: &str, input: &str) -> Option<JsonValue> {
197        let key = CacheKey::new(operation, input);
198
199        if let Some(mut entry) = self.cache.get_mut(&key) {
200            // Update access time for true LRU behavior
201            entry.access_time = self.timestamp_counter.fetch_add(1, Ordering::Relaxed);
202            self.hits.fetch_add(1, Ordering::Relaxed);
203            Some(entry.result.clone())
204        } else {
205            self.misses.fetch_add(1, Ordering::Relaxed);
206            None
207        }
208    }
209
210    /// Insert a result into the cache.
211    ///
212    /// If the cache is full, evicts the least recently used entry.
213    /// If `max_size` is 0, this is a no-op (cache disabled).
214    ///
215    /// # Arguments
216    ///
217    /// * `operation` - Operation name (e.g., "validate", "lint")
218    /// * `input` - Input content (used for cache key hash)
219    /// * `result` - Result to cache
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use hedl_mcp::cache::OperationCache;
225    /// use serde_json::json;
226    ///
227    /// let cache = OperationCache::new(1000);
228    /// cache.insert("validate", "%VERSION 1.0\n---", json!({"valid": true}));
229    /// ```
230    pub fn insert(&self, operation: &str, input: &str, result: JsonValue) {
231        // Cache disabled when max_size is 0
232        if self.max_size == 0 {
233            return;
234        }
235
236        let key = CacheKey::new(operation, input);
237        let access_time = self.timestamp_counter.fetch_add(1, Ordering::Relaxed);
238
239        // Evict LRU entry if at capacity (and not updating existing key)
240        if self.cache.len() >= self.max_size && !self.cache.contains_key(&key) {
241            self.evict_lru();
242        }
243
244        let entry = CacheEntry {
245            result,
246            access_time,
247        };
248        self.cache.insert(key, entry);
249    }
250
251    /// Evict the least recently used entry (lowest `access_time`).
252    fn evict_lru(&self) {
253        // Find the entry with the lowest access_time
254        let mut lru_key: Option<CacheKey> = None;
255        let mut lru_time = u64::MAX;
256
257        for entry in &self.cache {
258            if entry.value().access_time < lru_time {
259                lru_time = entry.value().access_time;
260                lru_key = Some(entry.key().clone());
261            }
262        }
263
264        // Remove the LRU entry
265        if let Some(key) = lru_key {
266            if self.cache.remove(&key).is_some() {
267                self.evictions.fetch_add(1, Ordering::Relaxed);
268            }
269        }
270    }
271
272    /// Get cache statistics.
273    ///
274    /// # Returns
275    ///
276    /// Current cache statistics including hit/miss counts and hit rate.
277    ///
278    /// # Examples
279    ///
280    /// ```
281    /// use hedl_mcp::cache::OperationCache;
282    /// use serde_json::json;
283    ///
284    /// let cache = OperationCache::new(1000);
285    /// cache.insert("validate", "input", json!({"valid": true}));
286    /// cache.get("validate", "input"); // Hit
287    /// cache.get("validate", "other"); // Miss
288    ///
289    /// let stats = cache.stats();
290    /// assert_eq!(stats.hits, 1);
291    /// assert_eq!(stats.misses, 1);
292    /// assert_eq!(stats.size, 1);
293    /// ```
294    pub fn stats(&self) -> CacheStats {
295        CacheStats {
296            hits: self.hits.load(Ordering::Relaxed),
297            misses: self.misses.load(Ordering::Relaxed),
298            evictions: self.evictions.load(Ordering::Relaxed),
299            size: self.cache.len(),
300            max_size: self.max_size,
301        }
302    }
303
304    /// Clear all cache entries.
305    ///
306    /// # Examples
307    ///
308    /// ```
309    /// use hedl_mcp::cache::OperationCache;
310    /// use serde_json::json;
311    ///
312    /// let cache = OperationCache::new(1000);
313    /// cache.insert("validate", "input", json!({"valid": true}));
314    /// assert_eq!(cache.stats().size, 1);
315    ///
316    /// cache.clear();
317    /// assert_eq!(cache.stats().size, 0);
318    /// ```
319    pub fn clear(&self) {
320        self.cache.clear();
321    }
322
323    /// Reset cache statistics (hit/miss/eviction counters).
324    ///
325    /// Does not clear the cache itself, only resets the counters.
326    pub fn reset_stats(&self) {
327        self.hits.store(0, Ordering::Relaxed);
328        self.misses.store(0, Ordering::Relaxed);
329        self.evictions.store(0, Ordering::Relaxed);
330    }
331}
332
333impl Default for OperationCache {
334    fn default() -> Self {
335        Self::new(DEFAULT_CACHE_SIZE)
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use serde_json::json;
343
344    #[test]
345    fn test_cache_hit_miss() {
346        let cache = OperationCache::new(10);
347
348        // Miss on first access
349        assert!(cache.get("validate", "input1").is_none());
350        assert_eq!(cache.stats().misses, 1);
351
352        // Insert
353        cache.insert("validate", "input1", json!({"valid": true}));
354
355        // Hit on second access
356        let result = cache.get("validate", "input1");
357        assert!(result.is_some());
358        assert_eq!(result.unwrap()["valid"], true);
359        assert_eq!(cache.stats().hits, 1);
360    }
361
362    #[test]
363    fn test_cache_different_operations() {
364        let cache = OperationCache::new(10);
365
366        cache.insert("validate", "input", json!({"valid": true}));
367        cache.insert("lint", "input", json!({"diagnostics": []}));
368
369        // Different operations with same input are cached separately
370        let validate_result = cache.get("validate", "input");
371        let lint_result = cache.get("lint", "input");
372
373        assert!(validate_result.is_some());
374        assert!(lint_result.is_some());
375        assert_ne!(validate_result, lint_result);
376    }
377
378    #[test]
379    fn test_cache_different_inputs() {
380        let cache = OperationCache::new(10);
381
382        cache.insert("validate", "input1", json!({"valid": true}));
383        cache.insert("validate", "input2", json!({"valid": false}));
384
385        // Different inputs are cached separately
386        let result1 = cache.get("validate", "input1");
387        let result2 = cache.get("validate", "input2");
388
389        assert_eq!(result1.unwrap()["valid"], true);
390        assert_eq!(result2.unwrap()["valid"], false);
391    }
392
393    #[test]
394    fn test_cache_true_lru_eviction() {
395        let cache = OperationCache::new(3);
396
397        // Fill cache to capacity
398        cache.insert("op", "input1", json!(1));
399        cache.insert("op", "input2", json!(2));
400        cache.insert("op", "input3", json!(3));
401
402        assert_eq!(cache.stats().size, 3);
403        assert_eq!(cache.stats().evictions, 0);
404
405        // Access input1 to make it recently used
406        cache.get("op", "input1");
407
408        // Insert one more (should evict input2, the true LRU)
409        cache.insert("op", "input4", json!(4));
410
411        assert_eq!(cache.stats().size, 3);
412        assert_eq!(cache.stats().evictions, 1);
413
414        // input1 should still be present (was accessed recently)
415        assert!(cache.get("op", "input1").is_some());
416
417        // input2 should be evicted (true LRU)
418        assert!(cache.get("op", "input2").is_none());
419
420        // input3 and input4 should still be present
421        assert!(cache.get("op", "input3").is_some());
422        assert!(cache.get("op", "input4").is_some());
423    }
424
425    #[test]
426    fn test_cache_fifo_eviction_without_access() {
427        let cache = OperationCache::new(3);
428
429        // Fill cache to capacity
430        cache.insert("op", "input1", json!(1));
431        cache.insert("op", "input2", json!(2));
432        cache.insert("op", "input3", json!(3));
433
434        // Insert one more without accessing any (should evict input1, oldest)
435        cache.insert("op", "input4", json!(4));
436
437        assert_eq!(cache.stats().evictions, 1);
438
439        // input1 should be evicted (oldest insert, no access)
440        assert!(cache.get("op", "input1").is_none());
441
442        // Others should still be present
443        assert!(cache.get("op", "input2").is_some());
444        assert!(cache.get("op", "input3").is_some());
445        assert!(cache.get("op", "input4").is_some());
446    }
447
448    #[test]
449    fn test_cache_zero_capacity() {
450        let cache = OperationCache::new(0);
451
452        cache.insert("op", "key", json!("value"));
453
454        // With zero capacity, nothing should be cached
455        assert!(cache.get("op", "key").is_none());
456        assert_eq!(cache.stats().size, 0);
457    }
458
459    #[test]
460    fn test_cache_stats() {
461        let cache = OperationCache::new(10);
462
463        cache.insert("op", "input1", json!(1));
464
465        cache.get("op", "input1"); // Hit
466        cache.get("op", "input2"); // Miss
467        cache.get("op", "input1"); // Hit
468        cache.get("op", "input3"); // Miss
469
470        let stats = cache.stats();
471        assert_eq!(stats.hits, 2);
472        assert_eq!(stats.misses, 2);
473        assert_eq!(stats.size, 1);
474        assert_eq!(stats.hit_rate(), 0.5);
475        assert_eq!(stats.hit_rate_percent(), 50.0);
476    }
477
478    #[test]
479    fn test_cache_clear() {
480        let cache = OperationCache::new(10);
481
482        cache.insert("op", "input1", json!(1));
483        cache.insert("op", "input2", json!(2));
484
485        assert_eq!(cache.stats().size, 2);
486
487        cache.clear();
488
489        assert_eq!(cache.stats().size, 0);
490        assert!(cache.get("op", "input1").is_none());
491        assert!(cache.get("op", "input2").is_none());
492    }
493
494    #[test]
495    fn test_cache_reset_stats() {
496        let cache = OperationCache::new(10);
497
498        cache.insert("op", "input1", json!(1));
499        cache.get("op", "input1"); // Hit
500        cache.get("op", "input2"); // Miss
501
502        assert_eq!(cache.stats().hits, 1);
503        assert_eq!(cache.stats().misses, 1);
504
505        cache.reset_stats();
506
507        assert_eq!(cache.stats().hits, 0);
508        assert_eq!(cache.stats().misses, 0);
509        assert_eq!(cache.stats().size, 1); // Cache not cleared
510    }
511
512    #[test]
513    fn test_cache_hash_collision_resistance() {
514        let cache = OperationCache::new(10);
515
516        // Very similar inputs (should have different hashes)
517        cache.insert("op", "input", json!(1));
518        cache.insert("op", "input ", json!(2)); // Trailing space
519
520        let result1 = cache.get("op", "input");
521        let result2 = cache.get("op", "input ");
522
523        assert_eq!(result1.unwrap(), 1);
524        assert_eq!(result2.unwrap(), 2);
525    }
526
527    #[test]
528    fn test_cache_concurrent_access() {
529        use std::sync::Arc;
530        use std::thread;
531
532        let cache = Arc::new(OperationCache::new(100));
533        let mut handles = vec![];
534
535        // Spawn multiple threads doing concurrent reads/writes
536        for i in 0..10 {
537            let cache_clone = cache.clone();
538            let handle = thread::spawn(move || {
539                for j in 0..100 {
540                    let key = format!("input{}", j % 10);
541                    cache_clone.insert("op", &key, json!(i));
542                    cache_clone.get("op", &key);
543                }
544            });
545            handles.push(handle);
546        }
547
548        // Wait for all threads
549        for handle in handles {
550            handle.join().unwrap();
551        }
552
553        // Cache should be consistent
554        let stats = cache.stats();
555        assert!(stats.size > 0);
556        assert!(stats.size <= 100);
557    }
558
559    #[test]
560    fn test_cache_update_existing_key() {
561        let cache = OperationCache::new(3);
562
563        cache.insert("op", "key1", json!(1));
564        cache.insert("op", "key2", json!(2));
565        cache.insert("op", "key3", json!(3));
566
567        // Update existing key (should not trigger eviction)
568        cache.insert("op", "key1", json!(10));
569
570        assert_eq!(cache.stats().size, 3);
571        assert_eq!(cache.stats().evictions, 0);
572        assert_eq!(cache.get("op", "key1").unwrap(), json!(10));
573    }
574}