llm_stack/tool/
cacher.rs

1//! Out-of-context caching for large tool results.
2//!
3//! The [`ToolResultCacher`] trait lets callers define how oversized tool
4//! results are stored outside the conversation context. After the
5//! [`ToolResultProcessor`](super::ToolResultProcessor) prunes a result,
6//! the loop core checks whether it still exceeds the inline threshold.
7//! If so, it hands the content to the cacher, which stores it however
8//! it likes (disk, memory, KV store, …) and returns a compact summary
9//! the LLM can use to retrieve slices on demand.
10//!
11//! llm-stack provides the hook and the threshold check. The caller
12//! (e.g. chimera) provides the storage implementation.
13//!
14//! # Example
15//!
16//! ```rust
17//! use llm_stack::tool::{ToolResultCacher, CachedResult};
18//! use llm_stack::context::estimate_tokens;
19//!
20//! struct MemoryCacher;
21//!
22//! impl ToolResultCacher for MemoryCacher {
23//!     fn cache(&self, tool_name: &str, content: &str) -> Option<CachedResult> {
24//!         let ref_id = "mem_0001".to_string();
25//!         let summary = format!(
26//!             "[Cached: {tool_name} result → ref={ref_id}. Use result_cache to inspect.]"
27//!         );
28//!         Some(CachedResult {
29//!             summary,
30//!             original_tokens_est: estimate_tokens(content),
31//!             summary_tokens_est: 20,
32//!         })
33//!     }
34//!
35//!     fn inline_threshold(&self) -> u32 {
36//!         2_000
37//!     }
38//! }
39//! ```
40
41/// Caches oversized tool results out-of-context.
42///
43/// Implementations store the full content somewhere (file, KV store,
44/// database, …) and return a compact summary for the conversation.
45/// The summary should tell the LLM how to retrieve slices (e.g. via
46/// a `result_cache` tool with a `ref` argument).
47pub trait ToolResultCacher: Send + Sync {
48    /// Store `content` and return a summary for the conversation.
49    ///
50    /// Called only when the (already-processed) result exceeds
51    /// [`inline_threshold`](Self::inline_threshold) tokens.
52    ///
53    /// Return `None` to fall back to keeping the content inline
54    /// (e.g. if storage fails).
55    fn cache(&self, tool_name: &str, content: &str) -> Option<CachedResult>;
56
57    /// Token threshold above which results are offered to the cacher.
58    ///
59    /// Results at or below this size stay inline in the conversation.
60    /// Default: 2 000 tokens (~8 000 chars).
61    fn inline_threshold(&self) -> u32 {
62        2_000
63    }
64}
65
66/// The summary returned after caching a tool result.
67#[derive(Debug, Clone)]
68pub struct CachedResult {
69    /// Compact text that replaces the full content in the conversation.
70    /// Should include a reference ID and instructions for retrieval.
71    pub summary: String,
72    /// Estimated token count of the original (pre-cache) content.
73    pub original_tokens_est: u32,
74    /// Estimated token count of the summary.
75    pub summary_tokens_est: u32,
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81    use crate::context::estimate_tokens;
82
83    struct TestCacher {
84        threshold: u32,
85    }
86
87    impl ToolResultCacher for TestCacher {
88        fn cache(&self, tool_name: &str, content: &str) -> Option<CachedResult> {
89            let summary = format!("[cached: {tool_name}, {} chars]", content.len());
90            let summary_tokens = estimate_tokens(&summary);
91            Some(CachedResult {
92                summary,
93                original_tokens_est: estimate_tokens(content),
94                summary_tokens_est: summary_tokens,
95            })
96        }
97
98        fn inline_threshold(&self) -> u32 {
99            self.threshold
100        }
101    }
102
103    struct FailingCacher;
104
105    impl ToolResultCacher for FailingCacher {
106        fn cache(&self, _tool_name: &str, _content: &str) -> Option<CachedResult> {
107            None // storage failed
108        }
109    }
110
111    #[test]
112    fn test_cacher_returns_summary() {
113        let test_cacher = TestCacher { threshold: 10 };
114        let result = test_cacher.cache("db_sql", "lots of data here").unwrap();
115        assert!(result.summary.contains("cached: db_sql"));
116        assert!(result.summary.contains("17 chars"));
117        assert!(result.original_tokens_est > 0);
118        assert!(result.summary_tokens_est > 0);
119    }
120
121    #[test]
122    fn test_failing_cacher_returns_none() {
123        let cacher = FailingCacher;
124        assert!(cacher.cache("tool", "data").is_none());
125    }
126
127    #[test]
128    fn test_default_threshold() {
129        let cacher = TestCacher { threshold: 2_000 };
130        assert_eq!(cacher.inline_threshold(), 2_000);
131    }
132
133    #[test]
134    fn test_custom_threshold() {
135        let cacher = TestCacher { threshold: 500 };
136        assert_eq!(cacher.inline_threshold(), 500);
137    }
138
139    #[test]
140    fn test_cacher_is_object_safe() {
141        let cacher: Box<dyn ToolResultCacher> = Box::new(TestCacher { threshold: 100 });
142        let result = cacher.cache("tool", "data");
143        assert!(result.is_some());
144    }
145}
llm_stack/tool/cacher.rs

llm_stack/tool/
cacher.rs