llm_stack/tool/cacher.rs
1//! Out-of-context caching for large tool results.
2//!
3//! The [`ToolResultCacher`] trait lets callers define how oversized tool
4//! results are stored outside the conversation context. After the
5//! [`ToolResultProcessor`](super::ToolResultProcessor) prunes a result,
6//! the loop core checks whether it still exceeds the inline threshold.
7//! If so, it hands the content to the cacher, which stores it however
8//! it likes (disk, memory, KV store, …) and returns a compact summary
9//! the LLM can use to retrieve slices on demand.
10//!
11//! llm-stack provides the hook and the threshold check. The caller
12//! (e.g. chimera) provides the storage implementation.
13//!
14//! # Example
15//!
16//! ```rust
17//! use llm_stack::tool::{ToolResultCacher, CachedResult};
18//! use llm_stack::context::estimate_tokens;
19//!
20//! struct MemoryCacher;
21//!
22//! impl ToolResultCacher for MemoryCacher {
23//! fn cache(&self, tool_name: &str, content: &str) -> Option<CachedResult> {
24//! let ref_id = "mem_0001".to_string();
25//! let summary = format!(
26//! "[Cached: {tool_name} result → ref={ref_id}. Use result_cache to inspect.]"
27//! );
28//! Some(CachedResult {
29//! summary,
30//! original_tokens_est: estimate_tokens(content),
31//! summary_tokens_est: 20,
32//! })
33//! }
34//!
35//! fn inline_threshold(&self) -> u32 {
36//! 2_000
37//! }
38//! }
39//! ```
40
41/// Caches oversized tool results out-of-context.
42///
43/// Implementations store the full content somewhere (file, KV store,
44/// database, …) and return a compact summary for the conversation.
45/// The summary should tell the LLM how to retrieve slices (e.g. via
46/// a `result_cache` tool with a `ref` argument).
47pub trait ToolResultCacher: Send + Sync {
48 /// Store `content` and return a summary for the conversation.
49 ///
50 /// Called only when the (already-processed) result exceeds
51 /// [`inline_threshold`](Self::inline_threshold) tokens.
52 ///
53 /// Return `None` to fall back to keeping the content inline
54 /// (e.g. if storage fails).
55 fn cache(&self, tool_name: &str, content: &str) -> Option<CachedResult>;
56
57 /// Token threshold above which results are offered to the cacher.
58 ///
59 /// Results at or below this size stay inline in the conversation.
60 /// Default: 2 000 tokens (~8 000 chars).
61 fn inline_threshold(&self) -> u32 {
62 2_000
63 }
64}
65
66/// The summary returned after caching a tool result.
67#[derive(Debug, Clone)]
68pub struct CachedResult {
69 /// Compact text that replaces the full content in the conversation.
70 /// Should include a reference ID and instructions for retrieval.
71 pub summary: String,
72 /// Estimated token count of the original (pre-cache) content.
73 pub original_tokens_est: u32,
74 /// Estimated token count of the summary.
75 pub summary_tokens_est: u32,
76}
77
78#[cfg(test)]
79mod tests {
80 use super::*;
81 use crate::context::estimate_tokens;
82
83 struct TestCacher {
84 threshold: u32,
85 }
86
87 impl ToolResultCacher for TestCacher {
88 fn cache(&self, tool_name: &str, content: &str) -> Option<CachedResult> {
89 let summary = format!("[cached: {tool_name}, {} chars]", content.len());
90 let summary_tokens = estimate_tokens(&summary);
91 Some(CachedResult {
92 summary,
93 original_tokens_est: estimate_tokens(content),
94 summary_tokens_est: summary_tokens,
95 })
96 }
97
98 fn inline_threshold(&self) -> u32 {
99 self.threshold
100 }
101 }
102
103 struct FailingCacher;
104
105 impl ToolResultCacher for FailingCacher {
106 fn cache(&self, _tool_name: &str, _content: &str) -> Option<CachedResult> {
107 None // storage failed
108 }
109 }
110
111 #[test]
112 fn test_cacher_returns_summary() {
113 let test_cacher = TestCacher { threshold: 10 };
114 let result = test_cacher.cache("db_sql", "lots of data here").unwrap();
115 assert!(result.summary.contains("cached: db_sql"));
116 assert!(result.summary.contains("17 chars"));
117 assert!(result.original_tokens_est > 0);
118 assert!(result.summary_tokens_est > 0);
119 }
120
121 #[test]
122 fn test_failing_cacher_returns_none() {
123 let cacher = FailingCacher;
124 assert!(cacher.cache("tool", "data").is_none());
125 }
126
127 #[test]
128 fn test_default_threshold() {
129 let cacher = TestCacher { threshold: 2_000 };
130 assert_eq!(cacher.inline_threshold(), 2_000);
131 }
132
133 #[test]
134 fn test_custom_threshold() {
135 let cacher = TestCacher { threshold: 500 };
136 assert_eq!(cacher.inline_threshold(), 500);
137 }
138
139 #[test]
140 fn test_cacher_is_object_safe() {
141 let cacher: Box<dyn ToolResultCacher> = Box::new(TestCacher { threshold: 100 });
142 let result = cacher.cache("tool", "data");
143 assert!(result.is_some());
144 }
145}