Skip to main content

zeph_context/
tool_result_compress.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Acon tool-result compression (#4021).
5//!
6//! Stateless, pure-function compression pass that enforces per-result and batch-level
7//! token budgets on tool outputs **before** they enter message history. This runs as a
8//! pre-processing step in `zeph-core`'s tier loop, not as part of context assembly.
9//!
10//! # Compression model
11//!
12//! - Results below `passthrough_threshold`: returned unchanged (`PassThrough`).
13//! - Results above `passthrough_threshold`: char-truncated to approximately
14//!   `passthrough_threshold` tokens with a `" [...truncated]"` suffix (`Truncated`).
15//!   The suffix adds ~3–4 tokens, so `compressed_tokens` may slightly exceed
16//!   `passthrough_threshold` — callers must not rely on exact equality.
17//! - LLM summarization is **not** performed here — it is the caller's responsibility in
18//!   `zeph-core`. The caller may pre-summarize a result and then pass the shortened text
19//!   to `compress_single` or `compress_batch`.
20//! - After per-result compression, `compress_batch` enforces the `total_budget` cap by
21//!   proportionally trimming the largest results (`BatchTrimmed`).
22
23use zeph_common::memory::TokenCounting;
24use zeph_config::AconConfig;
25
26const TRUNCATION_MARKER: &str = " [...truncated]";
27
28/// Configuration for Acon tool-result compression.
29///
30/// Constructed from [`AconConfig`] (zeph-config) at session init via the [`From`] impl.
31#[derive(Debug, Clone)]
32pub struct ToolResultCompressionConfig {
33    /// Token count below which results pass through unchanged.
34    /// Also the approximate truncation target: results above this are char-truncated to
35    /// approximately this many tokens (the `" [...truncated]"` suffix adds ~3–4 tokens).
36    /// Default: `2000`.
37    pub passthrough_threshold: usize,
38    /// Token count above which the caller should attempt LLM summarization before
39    /// falling back to truncation. Not enforced here — informational for the caller.
40    /// Default: `4000`.
41    pub summarize_threshold: usize,
42    /// Maximum total tokens for all tool results combined in a single turn. Default: `8000`.
43    pub total_budget: usize,
44}
45
46impl From<&AconConfig> for ToolResultCompressionConfig {
47    fn from(cfg: &AconConfig) -> Self {
48        Self {
49            passthrough_threshold: cfg.passthrough_threshold,
50            summarize_threshold: cfg.summarize_threshold,
51            total_budget: cfg.total_budget,
52        }
53    }
54}
55
56/// A tool result entry before compression, used as input to `compress_batch`.
57///
58/// The `index` field is used as a deterministic tiebreaker when two results have
59/// equal token counts during batch budget enforcement (lower index trimmed first).
60pub struct ToolResultEntry<'a> {
61    /// Tool name for tracing and logging.
62    pub tool_name: &'a str,
63    /// Raw tool result text.
64    pub text: &'a str,
65    /// Position in the original tool call list. Used as tiebreaker in batch trimming.
66    pub index: usize,
67}
68
69/// Method applied when compressing a single tool result.
70///
71/// Does NOT include a `Summarized` variant — LLM summarization is the caller's
72/// responsibility in `zeph-core` before calling these methods.
73#[non_exhaustive]
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75pub enum CompressionMethod {
76    /// Result was within `passthrough_threshold` — returned unchanged.
77    PassThrough,
78    /// Result was truncated at a char boundary to approximately `passthrough_threshold` tokens.
79    /// The `" [...truncated]"` suffix adds ~3–4 tokens, so `compressed_tokens` may slightly
80    /// exceed `passthrough_threshold`.
81    Truncated,
82    /// Result was proportionally trimmed during batch budget enforcement.
83    BatchTrimmed,
84}
85
86/// Output of compressing a single tool result.
87#[derive(Debug, Clone)]
88pub struct CompressedToolResult {
89    /// Compressed (or unchanged) text.
90    pub text: String,
91    /// Token count before compression.
92    pub original_tokens: usize,
93    /// Token count after compression.
94    pub compressed_tokens: usize,
95    /// Method applied.
96    pub method: CompressionMethod,
97}
98
99/// Stateless tool-result compressor for Acon (#4021).
100///
101/// All methods are pure functions: they take text, a token counter, and a config, and
102/// return compressed text with metadata. No I/O, no async, no agent state.
103///
104/// # Examples
105///
106/// ```
107/// use zeph_context::tool_result_compress::{
108///     ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
109/// };
110///
111/// struct WordCounter;
112/// impl zeph_common::memory::TokenCounting for WordCounter {
113///     fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
114///     fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
115/// }
116///
117/// let config = ToolResultCompressionConfig {
118///     passthrough_threshold: 5,
119///     summarize_threshold: 10,
120///     total_budget: 20,
121/// };
122/// let tc = WordCounter;
123///
124/// let result = ToolResultCompressor::compress_single("hello world", &tc, &config);
125/// assert_eq!(result.method, CompressionMethod::PassThrough);
126/// ```
127pub struct ToolResultCompressor;
128
129impl ToolResultCompressor {
130    /// Compress a single tool result text.
131    ///
132    /// - Below `passthrough_threshold` tokens: returned unchanged.
133    /// - At or above `passthrough_threshold` tokens: char-truncated so that the truncated
134    ///   text has approximately `passthrough_threshold` tokens (using a char-boundary-safe
135    ///   cut at `passthrough_threshold * 4` bytes), with `" [...truncated]"` appended.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use zeph_context::tool_result_compress::{
141    ///     ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
142    /// };
143    ///
144    /// struct WordCounter;
145    /// impl zeph_common::memory::TokenCounting for WordCounter {
146    ///     fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
147    ///     fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
148    /// }
149    ///
150    /// let config = ToolResultCompressionConfig {
151    ///     passthrough_threshold: 3,
152    ///     summarize_threshold: 10,
153    ///     total_budget: 20,
154    /// };
155    /// let tc = WordCounter;
156    ///
157    /// // Short text passes through.
158    /// let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
159    /// assert_eq!(r.method, CompressionMethod::PassThrough);
160    ///
161    /// // Long text is truncated.
162    /// let r = ToolResultCompressor::compress_single("one two three four five", &tc, &config);
163    /// assert_eq!(r.method, CompressionMethod::Truncated);
164    /// assert!(r.text.ends_with("[...truncated]"));
165    /// ```
166    #[must_use]
167    pub fn compress_single(
168        text: &str,
169        tc: &dyn TokenCounting,
170        config: &ToolResultCompressionConfig,
171    ) -> CompressedToolResult {
172        let original_tokens = tc.count_tokens(text);
173
174        if original_tokens <= config.passthrough_threshold {
175            return CompressedToolResult {
176                text: text.to_owned(),
177                original_tokens,
178                compressed_tokens: original_tokens,
179                method: CompressionMethod::PassThrough,
180            };
181        }
182
183        // Truncate at a char boundary. The heuristic is ~4 bytes per token.
184        // Subtract the marker's byte length so the final result (text + marker) fits within
185        // the passthrough_threshold token budget — without this the marker inflates the count.
186        let byte_limit = config
187            .passthrough_threshold
188            .saturating_mul(4)
189            .saturating_sub(TRUNCATION_MARKER.len());
190        let cut = text.floor_char_boundary(byte_limit.min(text.len()));
191        let truncated = format!("{}{}", &text[..cut], TRUNCATION_MARKER);
192        let compressed_tokens = tc.count_tokens(&truncated);
193
194        CompressedToolResult {
195            text: truncated,
196            original_tokens,
197            compressed_tokens,
198            method: CompressionMethod::Truncated,
199        }
200    }
201
202    /// Compress a batch of tool results, enforcing both per-result and total-budget limits.
203    ///
204    /// 1. Applies `compress_single` to each entry.
205    /// 2. If the total compressed tokens still exceed `total_budget`, trims results in
206    ///    descending token-count order. Ties are broken by `entry.index` (lower index
207    ///    trimmed first) for deterministic output.
208    ///
209    /// Returns one `CompressedToolResult` per input entry, in the same order.
210    ///
211    /// # Examples
212    ///
213    /// ```
214    /// use zeph_context::tool_result_compress::{
215    ///     ToolResultCompressor, ToolResultCompressionConfig, ToolResultEntry, CompressionMethod,
216    /// };
217    ///
218    /// struct WordCounter;
219    /// impl zeph_common::memory::TokenCounting for WordCounter {
220    ///     fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
221    ///     fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
222    /// }
223    ///
224    /// let config = ToolResultCompressionConfig {
225    ///     passthrough_threshold: 100,
226    ///     summarize_threshold: 200,
227    ///     total_budget: 5,
228    /// };
229    /// let tc = WordCounter;
230    /// let entries = vec![
231    ///     ToolResultEntry { tool_name: "shell", text: "one two three", index: 0 },
232    ///     ToolResultEntry { tool_name: "fetch", text: "four five six", index: 1 },
233    /// ];
234    /// let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
235    /// assert_eq!(results.len(), 2);
236    /// // Combined tokens (6) exceed total_budget (5) → at least one is BatchTrimmed.
237    /// assert!(results.iter().any(|r| r.method == CompressionMethod::BatchTrimmed));
238    /// ```
239    #[must_use]
240    pub fn compress_batch(
241        entries: &[ToolResultEntry<'_>],
242        tc: &dyn TokenCounting,
243        config: &ToolResultCompressionConfig,
244    ) -> Vec<CompressedToolResult> {
245        if entries.is_empty() {
246            return Vec::new();
247        }
248
249        // Phase 1: per-result compression.
250        let mut results: Vec<CompressedToolResult> = entries
251            .iter()
252            .map(|e| Self::compress_single(e.text, tc, config))
253            .collect();
254
255        // Phase 2: batch budget enforcement.
256        let total_tokens: usize = results.iter().map(|r| r.compressed_tokens).sum();
257        if total_tokens <= config.total_budget {
258            return results;
259        }
260
261        // Build a sorted list of (compressed_tokens, original_index) to trim the largest first.
262        // Tiebreaker: lower input index is trimmed first (critic note M3).
263        let mut order: Vec<usize> = (0..results.len()).collect();
264        order.sort_unstable_by(|&a, &b| {
265            let ta = results[a].compressed_tokens;
266            let tb = results[b].compressed_tokens;
267            // Descending by tokens, then ascending by index for ties.
268            tb.cmp(&ta)
269                .then_with(|| entries[a].index.cmp(&entries[b].index))
270        });
271
272        let mut remaining = total_tokens;
273        for &idx in &order {
274            if remaining <= config.total_budget {
275                break;
276            }
277            let current = results[idx].compressed_tokens;
278            // Shrink this result proportionally so the total fits.
279            let excess = remaining.saturating_sub(config.total_budget);
280            let target_tokens = current.saturating_sub(excess.min(current));
281            // target_tokens == 0 means we'd remove the entire result — keep a minimal stub.
282            let byte_limit = target_tokens.max(1).saturating_mul(4);
283            let cut = results[idx]
284                .text
285                .floor_char_boundary(byte_limit.min(results[idx].text.len()));
286            let trimmed = format!("{} [...truncated]", &results[idx].text[..cut]);
287            let new_tokens = tc.count_tokens(&trimmed);
288            remaining = remaining.saturating_sub(current).saturating_add(new_tokens);
289            results[idx].compressed_tokens = new_tokens;
290            results[idx].text = trimmed;
291            results[idx].method = CompressionMethod::BatchTrimmed;
292        }
293
294        results
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    struct WordCounter;
303    impl TokenCounting for WordCounter {
304        fn count_tokens(&self, text: &str) -> usize {
305            text.split_whitespace().count()
306        }
307
308        fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize {
309            0
310        }
311    }
312
313    fn cfg(passthrough: usize, summarize: usize, budget: usize) -> ToolResultCompressionConfig {
314        ToolResultCompressionConfig {
315            passthrough_threshold: passthrough,
316            summarize_threshold: summarize,
317            total_budget: budget,
318        }
319    }
320
321    #[test]
322    fn compress_single_passthrough_below_threshold() {
323        let tc = WordCounter;
324        let config = cfg(10, 20, 40);
325        let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
326        assert_eq!(r.method, CompressionMethod::PassThrough);
327        assert_eq!(r.text, "one two three");
328        assert_eq!(r.original_tokens, r.compressed_tokens);
329    }
330
331    #[test]
332    fn compress_single_passthrough_at_exact_threshold() {
333        let tc = WordCounter;
334        // "a b c" = 3 words; threshold = 3 → passthrough.
335        let config = cfg(3, 10, 20);
336        let r = ToolResultCompressor::compress_single("a b c", &tc, &config);
337        assert_eq!(r.method, CompressionMethod::PassThrough);
338    }
339
340    #[test]
341    fn compress_single_truncated_above_threshold() {
342        let tc = WordCounter;
343        let config = cfg(2, 10, 20);
344        let text = "one two three four five";
345        let r = ToolResultCompressor::compress_single(text, &tc, &config);
346        assert_eq!(r.method, CompressionMethod::Truncated);
347        assert!(r.text.ends_with("[...truncated]"));
348        assert!(r.compressed_tokens <= r.original_tokens);
349    }
350
351    #[test]
352    fn compress_single_empty_text_passthrough() {
353        let tc = WordCounter;
354        let config = cfg(5, 10, 20);
355        let r = ToolResultCompressor::compress_single("", &tc, &config);
356        assert_eq!(r.method, CompressionMethod::PassThrough);
357        assert_eq!(r.text, "");
358    }
359
360    #[test]
361    fn compress_batch_empty_input() {
362        let tc = WordCounter;
363        let config = cfg(5, 10, 20);
364        let results = ToolResultCompressor::compress_batch(&[], &tc, &config);
365        assert!(results.is_empty());
366    }
367
368    #[test]
369    fn compress_batch_within_budget_no_batch_trim() {
370        let tc = WordCounter;
371        let config = cfg(100, 200, 1000);
372        let entries = vec![
373            ToolResultEntry {
374                tool_name: "a",
375                text: "one two",
376                index: 0,
377            },
378            ToolResultEntry {
379                tool_name: "b",
380                text: "three four",
381                index: 1,
382            },
383        ];
384        let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
385        assert!(
386            results
387                .iter()
388                .all(|r| r.method != CompressionMethod::BatchTrimmed),
389            "no batch trimming expected within budget"
390        );
391    }
392
393    #[test]
394    fn compress_batch_exceeds_budget_trims_largest_first() {
395        let tc = WordCounter;
396        // budget = 3; two entries totaling 6 words.
397        let config = cfg(100, 200, 3);
398        let entries = vec![
399            ToolResultEntry {
400                tool_name: "a",
401                text: "one two three",
402                index: 0,
403            }, // 3 tokens
404            ToolResultEntry {
405                tool_name: "b",
406                text: "four five six",
407                index: 1,
408            }, // 3 tokens
409        ];
410        let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
411        assert_eq!(results.len(), 2);
412        // At least one must be BatchTrimmed.
413        assert!(
414            results
415                .iter()
416                .any(|r| r.method == CompressionMethod::BatchTrimmed)
417        );
418        // Total must not exceed budget (within rounding from the truncation marker).
419        let total: usize = results.iter().map(|r| r.compressed_tokens).sum();
420        assert!(
421            total <= config.total_budget + 3,
422            "total {total} should be near budget {}",
423            config.total_budget
424        );
425    }
426
427    #[test]
428    fn compress_batch_tiebreaker_lower_index_trimmed_first() {
429        let tc = WordCounter;
430        // Both entries have the same token count. budget = 3 < 6 total.
431        // Lower index (0) should be trimmed first.
432        let config = cfg(100, 200, 3);
433        let entries = vec![
434            ToolResultEntry {
435                tool_name: "a",
436                text: "one two three",
437                index: 0,
438            },
439            ToolResultEntry {
440                tool_name: "b",
441                text: "four five six",
442                index: 1,
443            },
444        ];
445        let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
446        // Index 0 should be BatchTrimmed (lower index trimmed first on tie).
447        assert_eq!(
448            results[0].method,
449            CompressionMethod::BatchTrimmed,
450            "lower index must be trimmed first on equal token counts"
451        );
452    }
453
454    #[test]
455    fn acon_config_default_into_compression_config() {
456        let acon = AconConfig::default();
457        let cfg = ToolResultCompressionConfig::from(&acon);
458        assert_eq!(cfg.passthrough_threshold, 2000);
459        assert_eq!(cfg.summarize_threshold, 4000);
460        assert_eq!(cfg.total_budget, 8000);
461    }
462}