Skip to main content

rig_mcp/
result_cache.rs

1//! Result-cache layer for large MCP tool outputs.
2//!
3//! Large tool results (long search hit lists, document corpora, scraped
4//! pages) don't belong in the model window. This module provides a
5//! deterministic, transport-neutral way to:
6//!
7//! 1. **Cache** an oversized JSON array under an opaque handle.
8//! 2. **Envelope** what the model actually sees: the handle, the total
9//!    item count, the page size, and a deterministic first page.
10//! 3. **Page** through the remaining items on demand via follow-up
11//!    tools that read from the cache.
12//! 4. **Release** the handle when the caller is done so memory bounds
13//!    stay deterministic.
14//!
15//! `rig-mcp` keeps raw transports lossless. Callers can invoke
16//! [`cache_if_large`] directly, or opt into
17//! [`CachedResultsTransport`](crate::cache_tools::CachedResultsTransport) and
18//! the page/release tools from the [`cache_tools`](mod@crate::cache_tools)
19//! module at the
20//! model boundary.
21//!
22//! # Example
23//!
24//! ```no_run
25//! use rig_mcp::result_cache::{
26//!     CachedResultEnvelope, MemoryResultCache, ResultCache, cache_if_large,
27//! };
28//! use serde_json::{Value, json};
29//! use std::sync::Arc;
30//!
31//! let cache: Arc<dyn ResultCache> = Arc::new(MemoryResultCache::new());
32//! let big = json!([{"id": 1}, {"id": 2}, {"id": 3}]);
33//! let envelope = cache_if_large(big, cache.as_ref(), 16, 1);
34//! // The first page is a tiny preview; the rest are paged through the cache.
35//! let env: CachedResultEnvelope = serde_json::from_value(envelope).unwrap();
36//! assert_eq!(env.total_items, 3);
37//! ```
38
39use std::collections::HashMap;
40use std::sync::Mutex;
41
42use serde::{Deserialize, Serialize};
43use serde_json::Value;
44
45// ── Public types ─────────────────────────────────────────────────────────────
46
47/// Opaque, unique handle for a cached result. Stable for the lifetime of
48/// the [`ResultCache`] entry; invalidated by [`ResultCache::release`].
49#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
50pub struct CachedResultHandle(pub String);
51
52/// JSON envelope returned to the model in place of an oversized array.
53///
54/// The model sees `first_page` directly and can request later pages by
55/// calling a host-supplied page tool with `handle` and an offset. The
56/// envelope is deliberately small and self-describing so the model can
57/// reason about how much data is hidden behind the handle. The truncation
58/// fields mirror `rig_compose::ToolResultEnvelope` semantics for MCP result
59/// caches: the cache does not discard data, but the model-visible page is
60/// bounded and carries enough metadata for a follow-up page request.
61#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
62pub struct CachedResultEnvelope {
63    /// Opaque handle for the full cached result.
64    pub handle: CachedResultHandle,
65    /// Total number of items the cache holds for this handle.
66    pub total_items: usize,
67    /// Page size the cache will use when serving subsequent pages.
68    pub page_size: usize,
69    /// First `page_size.min(total_items)` items, inlined so the model
70    /// doesn't always have to do a follow-up call.
71    pub first_page: Vec<Value>,
72    /// Whether the model-visible page omitted cached items.
73    pub truncated: bool,
74    /// Number of cached items omitted from `first_page`.
75    pub omitted_items: usize,
76    /// Stable follow-up token for the first omitted page.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub page_token: Option<String>,
79}
80
81impl CachedResultEnvelope {
82    /// Build the follow-up page token for `handle` and the next offset.
83    pub fn page_token(handle: &CachedResultHandle, offset: usize) -> String {
84        format!("{}:offset:{offset}", handle.0)
85    }
86}
87
88/// Transport-neutral cache for paged tool results.
89///
90/// Implementations MUST be safe to share across tasks (`Send + Sync`).
91/// Pagination uses `(offset, limit)` semantics: `page(handle, 0, n)`
92/// returns up to the first `n` items. Implementations may serve
93/// fewer items than requested if the offset is near the end.
94pub trait ResultCache: Send + Sync {
95    /// Store `items` and return the handle the caller should publish.
96    fn store(&self, items: Vec<Value>) -> CachedResultHandle;
97    /// Return up to `limit` items starting at `offset`. Returns `None`
98    /// if the handle has been released or never existed; returns
99    /// `Some(empty)` if the offset is past the end.
100    fn page(&self, handle: &CachedResultHandle, offset: usize, limit: usize) -> Option<Vec<Value>>;
101    /// Total item count for `handle`, or `None` if missing.
102    fn len(&self, handle: &CachedResultHandle) -> Option<usize>;
103    /// Release the handle. Returns `true` if it existed.
104    fn release(&self, handle: &CachedResultHandle) -> bool;
105}
106
107/// Process-local, in-memory [`ResultCache`].
108///
109/// Backed by a `HashMap<String, Vec<Value>>` under a `std::sync::Mutex`.
110/// Operations are short-lived and fully synchronous, so the mutex is
111/// never held across an `.await` point.
112#[derive(Debug, Default)]
113pub struct MemoryResultCache {
114    next_id: Mutex<u64>,
115    inner: Mutex<HashMap<String, Vec<Value>>>,
116}
117
118impl MemoryResultCache {
119    /// Construct an empty cache.
120    pub fn new() -> Self {
121        Self::default()
122    }
123
124    /// Number of live handles. Useful for assertions and release audits.
125    pub fn live_handles(&self) -> usize {
126        self.inner.lock().map(|g| g.len()).unwrap_or(0)
127    }
128
129    fn fresh_handle(&self) -> CachedResultHandle {
130        // Deterministic, monotonic IDs — easier to test than UUIDs and
131        // perfectly adequate for an in-process cache. The wrapping
132        // arithmetic is unreachable in practice but keeps clippy happy.
133        let id = {
134            let mut g = match self.next_id.lock() {
135                Ok(g) => g,
136                Err(p) => p.into_inner(),
137            };
138            let id = *g;
139            *g = g.wrapping_add(1);
140            id
141        };
142        CachedResultHandle(format!("mcp-cache-{id}"))
143    }
144}
145
146impl ResultCache for MemoryResultCache {
147    fn store(&self, items: Vec<Value>) -> CachedResultHandle {
148        let handle = self.fresh_handle();
149        if let Ok(mut g) = self.inner.lock() {
150            g.insert(handle.0.clone(), items);
151        }
152        handle
153    }
154
155    fn page(&self, handle: &CachedResultHandle, offset: usize, limit: usize) -> Option<Vec<Value>> {
156        let g = self.inner.lock().ok()?;
157        let items = g.get(&handle.0)?;
158        let end = offset.saturating_add(limit).min(items.len());
159        let start = offset.min(items.len());
160        Some(
161            items
162                .get(start..end)
163                .map(<[Value]>::to_vec)
164                .unwrap_or_default(),
165        )
166    }
167
168    fn len(&self, handle: &CachedResultHandle) -> Option<usize> {
169        let g = self.inner.lock().ok()?;
170        g.get(&handle.0).map(Vec::len)
171    }
172
173    fn release(&self, handle: &CachedResultHandle) -> bool {
174        match self.inner.lock() {
175            Ok(mut g) => g.remove(&handle.0).is_some(),
176            Err(_) => false,
177        }
178    }
179}
180
181// ── Sizing helper ────────────────────────────────────────────────────────────
182
183/// If `value` is a JSON array whose serialized form exceeds
184/// `threshold_bytes`, store it in `cache` and return a JSON
185/// [`CachedResultEnvelope`]. Otherwise return `value` unchanged.
186///
187/// `page_size` is recorded in the envelope and used to slice
188/// `first_page`. Non-array values are always returned unchanged because
189/// pagination only makes sense over a sequence.
190pub fn cache_if_large(
191    value: Value,
192    cache: &dyn ResultCache,
193    threshold_bytes: usize,
194    page_size: usize,
195) -> Value {
196    let arr = match value {
197        Value::Array(items) => items,
198        other => return other,
199    };
200    // Estimate size deterministically via the canonical JSON rendering.
201    let serialized_size = match serde_json::to_string(&Value::Array(arr.clone())) {
202        Ok(s) => s.len(),
203        Err(_) => return Value::Array(arr),
204    };
205    if serialized_size <= threshold_bytes {
206        return Value::Array(arr);
207    }
208    let total_items = arr.len();
209    let first_page_len = page_size.min(total_items);
210    let first_page: Vec<Value> = arr
211        .get(..first_page_len)
212        .map(<[Value]>::to_vec)
213        .unwrap_or_default();
214    let handle = cache.store(arr);
215    let omitted_items = total_items.saturating_sub(first_page_len);
216    let envelope = CachedResultEnvelope {
217        page_token: (omitted_items > 0)
218            .then(|| CachedResultEnvelope::page_token(&handle, first_page_len)),
219        handle,
220        total_items,
221        page_size,
222        first_page,
223        truncated: omitted_items > 0,
224        omitted_items,
225    };
226    serde_json::to_value(envelope).unwrap_or(Value::Null)
227}
228
229// ── Tests ────────────────────────────────────────────────────────────────────
230
231#[cfg(test)]
232#[allow(
233    clippy::unwrap_used,
234    clippy::expect_used,
235    clippy::panic,
236    clippy::indexing_slicing
237)]
238mod tests {
239    use super::*;
240    use serde_json::json;
241
242    #[test]
243    fn store_then_page_returns_slice() {
244        let cache = MemoryResultCache::new();
245        let h = cache.store(vec![json!(1), json!(2), json!(3), json!(4)]);
246        assert_eq!(cache.len(&h), Some(4));
247        assert_eq!(cache.page(&h, 0, 2), Some(vec![json!(1), json!(2)]));
248        assert_eq!(cache.page(&h, 2, 2), Some(vec![json!(3), json!(4)]));
249    }
250
251    #[test]
252    fn page_past_end_returns_empty_not_none() {
253        let cache = MemoryResultCache::new();
254        let h = cache.store(vec![json!(1)]);
255        assert_eq!(cache.page(&h, 5, 10), Some(vec![]));
256    }
257
258    #[test]
259    fn page_unknown_handle_returns_none() {
260        let cache = MemoryResultCache::new();
261        let phantom = CachedResultHandle("nope".to_string());
262        assert!(cache.page(&phantom, 0, 1).is_none());
263        assert!(cache.len(&phantom).is_none());
264        assert!(!cache.release(&phantom));
265    }
266
267    #[test]
268    fn release_frees_handle_and_subsequent_calls_return_none() {
269        let cache = MemoryResultCache::new();
270        let h = cache.store(vec![json!("a"), json!("b")]);
271        assert_eq!(cache.live_handles(), 1);
272        assert!(cache.release(&h));
273        assert_eq!(cache.live_handles(), 0);
274        assert!(cache.page(&h, 0, 1).is_none());
275        assert!(cache.len(&h).is_none());
276        // Double-release is a no-op.
277        assert!(!cache.release(&h));
278    }
279
280    #[test]
281    fn handles_are_unique_per_store_call() {
282        let cache = MemoryResultCache::new();
283        let h1 = cache.store(vec![json!(1)]);
284        let h2 = cache.store(vec![json!(2)]);
285        assert_ne!(h1, h2);
286    }
287
288    #[test]
289    fn cache_if_large_passes_through_when_under_threshold() {
290        let cache = MemoryResultCache::new();
291        let v = json!([1, 2, 3]);
292        let out = cache_if_large(v.clone(), &cache, 1024, 10);
293        assert_eq!(out, v);
294        assert_eq!(cache.live_handles(), 0);
295    }
296
297    #[test]
298    fn cache_if_large_passes_through_for_non_arrays() {
299        let cache = MemoryResultCache::new();
300        let v = json!({"k": "v"});
301        let out = cache_if_large(v.clone(), &cache, 0, 10);
302        assert_eq!(out, v);
303        assert_eq!(cache.live_handles(), 0);
304    }
305
306    #[test]
307    fn cache_if_large_envelopes_oversized_array() {
308        let cache = MemoryResultCache::new();
309        let items: Vec<Value> = (0..50).map(|i| json!({"id": i})).collect();
310        let out = cache_if_large(Value::Array(items), &cache, 16, 5);
311        let env: CachedResultEnvelope = serde_json::from_value(out).unwrap();
312        assert_eq!(env.total_items, 50);
313        assert_eq!(env.page_size, 5);
314        assert_eq!(env.first_page.len(), 5);
315        assert_eq!(env.first_page[0], json!({"id": 0}));
316        assert_eq!(env.first_page[4], json!({"id": 4}));
317        assert!(env.truncated);
318        assert_eq!(env.omitted_items, 45);
319        assert_eq!(env.page_token.as_deref(), Some("mcp-cache-0:offset:5"));
320        // The handle is live and the full vec is paged through the cache.
321        assert_eq!(cache.len(&env.handle), Some(50));
322        let page2 = cache.page(&env.handle, 5, 5).unwrap();
323        assert_eq!(page2.len(), 5);
324        assert_eq!(page2[0], json!({"id": 5}));
325    }
326
327    #[test]
328    fn cache_if_large_marks_empty_preview_as_truncated() {
329        let cache = MemoryResultCache::new();
330        let items: Vec<Value> = (0..3).map(|i| json!({"id": i})).collect();
331        let out = cache_if_large(Value::Array(items), &cache, 1, 0);
332        let env: CachedResultEnvelope = serde_json::from_value(out).unwrap();
333
334        assert!(env.first_page.is_empty());
335        assert!(env.truncated);
336        assert_eq!(env.omitted_items, 3);
337        assert_eq!(env.page_token.as_deref(), Some("mcp-cache-0:offset:0"));
338    }
339}