llm_stack/tool/
extractor.rs

1//! Async semantic extraction for large tool results.
2//!
3//! The [`ToolResultExtractor`] trait provides an async post-processing stage
4//! that runs **after** structural pruning ([`ToolResultProcessor`]) but
5//! **before** out-of-context caching ([`ToolResultCacher`]).
6//!
7//! Use this for heavyweight transformations that require async work, such as
8//! calling a fast/cheap LLM (Haiku-class) to extract task-relevant information
9//! from large tool results.
10//!
11//! The extractor receives the tool name, the (already-pruned) output, and the
12//! last user message for relevance-guided extraction. It returns an
13//! [`ExtractedResult`] with the condensed content.
14//!
15//! # Pipeline position
16//!
17//! ```text
18//!   Tool executes
19//!        │
20//!   Stage 1: ToolResultProcessor::process()   (sync, structural)
21//!        │
22//!   Stage 2: ToolResultExtractor::extract()   (async, semantic)
23//!        │
24//!   Stage 3: ToolResultCacher::cache()        (sync, overflow)
25//!        │
26//!   Result enters conversation context
27//! ```
28//!
29//! # Example
30//!
31//! ```rust
32//! use llm_stack::tool::{ToolResultExtractor, ExtractedResult};
33//! use llm_stack::context::estimate_tokens;
34//! use std::future::Future;
35//! use std::pin::Pin;
36//!
37//! struct KeywordExtractor;
38//!
39//! impl ToolResultExtractor for KeywordExtractor {
40//!     fn extract<'a>(
41//!         &'a self,
42//!         tool_name: &'a str,
43//!         output: &'a str,
44//!         user_query: &'a str,
45//!     ) -> Pin<Box<dyn Future<Output = Option<ExtractedResult>> + Send + 'a>> {
46//!         Box::pin(async move {
47//!             if tool_name != "web_search" || output.len() < 10_000 {
48//!                 return None;
49//!             }
50//!             let extracted = format!("Extracted relevant info about: {user_query}");
51//!             Some(ExtractedResult {
52//!                 content: extracted.clone(),
53//!                 original_tokens_est: estimate_tokens(output),
54//!                 extracted_tokens_est: estimate_tokens(&extracted),
55//!             })
56//!         })
57//!     }
58//!
59//!     fn extraction_threshold(&self) -> u32 {
60//!         15_000
61//!     }
62//! }
63//! ```
64
65use std::future::Future;
66use std::pin::Pin;
67
68/// Async semantic extractor for oversized tool results.
69///
70/// Implementations run after structural pruning and can perform heavyweight
71/// async work (e.g., calling a fast LLM) to condense large results into
72/// task-relevant summaries.
73///
74/// The extractor receives the last user message as context to guide
75/// relevance-based extraction.
76pub trait ToolResultExtractor: Send + Sync {
77    /// Extract task-relevant information from a tool result.
78    ///
79    /// # Arguments
80    ///
81    /// * `tool_name` — The name of the tool that produced this result.
82    /// * `output` — The (already structurally pruned) output string.
83    /// * `user_query` — The most recent user message, for relevance guidance.
84    ///
85    /// Return `None` to skip extraction (keep the structurally-pruned content).
86    /// The extractor is only called for results exceeding
87    /// [`extraction_threshold`](Self::extraction_threshold) tokens.
88    fn extract<'a>(
89        &'a self,
90        tool_name: &'a str,
91        output: &'a str,
92        user_query: &'a str,
93    ) -> Pin<Box<dyn Future<Output = Option<ExtractedResult>> + Send + 'a>>;
94
95    /// Token threshold above which results are offered to the extractor.
96    ///
97    /// Results at or below this size skip semantic extraction entirely.
98    /// Default: 15 000 tokens (~60 000 chars).
99    fn extraction_threshold(&self) -> u32 {
100        15_000
101    }
102}
103
104/// The result of semantic extraction.
105#[derive(Debug, Clone)]
106pub struct ExtractedResult {
107    /// The condensed, task-relevant content.
108    pub content: String,
109    /// Estimated token count of the pre-extraction content.
110    pub original_tokens_est: u32,
111    /// Estimated token count of the extracted content.
112    pub extracted_tokens_est: u32,
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use crate::context::estimate_tokens;
119
120    struct NoopExtractor;
121
122    impl ToolResultExtractor for NoopExtractor {
123        fn extract<'a>(
124            &'a self,
125            _tool_name: &'a str,
126            _output: &'a str,
127            _user_query: &'a str,
128        ) -> Pin<Box<dyn Future<Output = Option<ExtractedResult>> + Send + 'a>> {
129            Box::pin(async { None })
130        }
131    }
132
133    struct TestExtractor {
134        threshold: u32,
135    }
136
137    impl ToolResultExtractor for TestExtractor {
138        fn extract<'a>(
139            &'a self,
140            tool_name: &'a str,
141            output: &'a str,
142            user_query: &'a str,
143        ) -> Pin<Box<dyn Future<Output = Option<ExtractedResult>> + Send + 'a>> {
144            Box::pin(async move {
145                let extracted = format!(
146                    "[Extracted from {tool_name} for query: {user_query}] \
147                     Summary of {} chars",
148                    output.len()
149                );
150                Some(ExtractedResult {
151                    content: extracted.clone(),
152                    original_tokens_est: estimate_tokens(output),
153                    extracted_tokens_est: estimate_tokens(&extracted),
154                })
155            })
156        }
157
158        fn extraction_threshold(&self) -> u32 {
159            self.threshold
160        }
161    }
162
163    #[test]
164    fn test_extracted_result_debug_clone() {
165        let result = ExtractedResult {
166            content: "test".into(),
167            original_tokens_est: 100,
168            extracted_tokens_est: 10,
169        };
170        let cloned = result.clone();
171        assert_eq!(cloned.content, "test");
172        assert_eq!(format!("{result:?}").len(), format!("{cloned:?}").len());
173    }
174
175    #[test]
176    fn test_default_threshold() {
177        let extractor = NoopExtractor;
178        assert_eq!(extractor.extraction_threshold(), 15_000);
179    }
180
181    #[test]
182    fn test_custom_threshold() {
183        let extractor = TestExtractor { threshold: 5_000 };
184        assert_eq!(extractor.extraction_threshold(), 5_000);
185    }
186
187    #[tokio::test]
188    async fn test_noop_extractor_returns_none() {
189        let extractor = NoopExtractor;
190        let result = extractor.extract("web_search", "content", "query").await;
191        assert!(result.is_none());
192    }
193
194    #[tokio::test]
195    async fn test_extractor_returns_condensed_content() {
196        let extractor = TestExtractor { threshold: 10 };
197        let output = "a".repeat(1000);
198        let result = extractor
199            .extract("web_search", &output, "weather in Tybee")
200            .await;
201        assert!(result.is_some());
202        let extracted = result.unwrap();
203        assert!(extracted.content.contains("web_search"));
204        assert!(extracted.content.contains("weather in Tybee"));
205        assert!(extracted.extracted_tokens_est < extracted.original_tokens_est);
206    }
207
208    #[test]
209    fn test_extractor_is_object_safe() {
210        let extractor: Box<dyn ToolResultExtractor> = Box::new(NoopExtractor);
211        assert_eq!(extractor.extraction_threshold(), 15_000);
212    }
213
214    #[tokio::test]
215    async fn test_extractor_object_safe_extract() {
216        let extractor: Box<dyn ToolResultExtractor> = Box::new(TestExtractor { threshold: 100 });
217        let result = extractor.extract("tool", "data", "query").await;
218        assert!(result.is_some());
219    }
220}
llm_stack/tool/extractor.rs

llm_stack/tool/
extractor.rs