Skip to main content

zeph_context/
tool_result_compress.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Acon tool-result compression (#4021).
5//!
6//! Stateless, pure-function compression pass that enforces per-result and batch-level
7//! token budgets on tool outputs **before** they enter message history. This runs as a
8//! pre-processing step in `zeph-core`'s tier loop, not as part of context assembly.
9//!
10//! # Compression model
11//!
12//! - Results below `passthrough_threshold`: returned unchanged (`PassThrough`).
13//! - Results above `passthrough_threshold`: char-truncated to approximately
14//!   `passthrough_threshold` tokens with a `" [...truncated]"` suffix (`Truncated`).
15//!   The suffix adds ~3–4 tokens, so `compressed_tokens` may slightly exceed
16//!   `passthrough_threshold` — callers must not rely on exact equality.
17//! - LLM summarization is **not** performed here — it is the caller's responsibility in
18//!   `zeph-core`. The caller may pre-summarize a result and then pass the shortened text
19//!   to `compress_single` or `compress_batch`.
20//! - After per-result compression, `compress_batch` enforces the `total_budget` cap by
21//!   proportionally trimming the largest results (`BatchTrimmed`).
22
23use zeph_common::memory::TokenCounting;
24use zeph_config::AconConfig;
25
26const TRUNCATION_MARKER: &str = " [...truncated]";
27
28/// Configuration for Acon tool-result compression.
29///
30/// Constructed from [`AconConfig`] (zeph-config) at session init via the [`From`] impl.
31#[derive(Debug, Clone)]
32pub struct ToolResultCompressionConfig {
33    /// Token count below which results pass through unchanged.
34    /// Also the approximate truncation target: results above this are char-truncated to
35    /// approximately this many tokens (the `" [...truncated]"` suffix adds ~3–4 tokens).
36    /// Default: `2000`.
37    pub passthrough_threshold: usize,
38    /// Token count above which the caller should attempt LLM summarization before
39    /// falling back to truncation. Not enforced here — informational for the caller.
40    /// Default: `4000`.
41    pub summarize_threshold: usize,
42    /// Maximum total tokens for all tool results combined in a single turn. Default: `8000`.
43    pub total_budget: usize,
44}
45
46impl From<&AconConfig> for ToolResultCompressionConfig {
47    fn from(cfg: &AconConfig) -> Self {
48        Self {
49            passthrough_threshold: cfg.passthrough_threshold,
50            summarize_threshold: cfg.summarize_threshold,
51            total_budget: cfg.total_budget,
52        }
53    }
54}
55
56/// A tool result entry before compression, used as input to `compress_batch`.
57///
58/// The `index` field is used as a deterministic tiebreaker when two results have
59/// equal token counts during batch budget enforcement (lower index trimmed first).
60pub struct ToolResultEntry<'a> {
61    /// Tool name for tracing and logging.
62    pub tool_name: &'a str,
63    /// Raw tool result text.
64    pub text: &'a str,
65    /// Position in the original tool call list. Used as tiebreaker in batch trimming.
66    pub index: usize,
67}
68
69/// Method applied when compressing a single tool result.
70///
71/// Does NOT include a `Summarized` variant — LLM summarization is the caller's
72/// responsibility in `zeph-core` before calling these methods.
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum CompressionMethod {
75    /// Result was within `passthrough_threshold` — returned unchanged.
76    PassThrough,
77    /// Result was truncated at a char boundary to approximately `passthrough_threshold` tokens.
78    /// The `" [...truncated]"` suffix adds ~3–4 tokens, so `compressed_tokens` may slightly
79    /// exceed `passthrough_threshold`.
80    Truncated,
81    /// Result was proportionally trimmed during batch budget enforcement.
82    BatchTrimmed,
83}
84
85/// Output of compressing a single tool result.
86#[derive(Debug, Clone)]
87pub struct CompressedToolResult {
88    /// Compressed (or unchanged) text.
89    pub text: String,
90    /// Token count before compression.
91    pub original_tokens: usize,
92    /// Token count after compression.
93    pub compressed_tokens: usize,
94    /// Method applied.
95    pub method: CompressionMethod,
96}
97
98/// Stateless tool-result compressor for Acon (#4021).
99///
100/// All methods are pure functions: they take text, a token counter, and a config, and
101/// return compressed text with metadata. No I/O, no async, no agent state.
102///
103/// # Examples
104///
105/// ```
106/// use zeph_context::tool_result_compress::{
107///     ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
108/// };
109///
110/// struct WordCounter;
111/// impl zeph_common::memory::TokenCounting for WordCounter {
112///     fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
113///     fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
114/// }
115///
116/// let config = ToolResultCompressionConfig {
117///     passthrough_threshold: 5,
118///     summarize_threshold: 10,
119///     total_budget: 20,
120/// };
121/// let tc = WordCounter;
122///
123/// let result = ToolResultCompressor::compress_single("hello world", &tc, &config);
124/// assert_eq!(result.method, CompressionMethod::PassThrough);
125/// ```
126pub struct ToolResultCompressor;
127
128impl ToolResultCompressor {
129    /// Compress a single tool result text.
130    ///
131    /// - Below `passthrough_threshold` tokens: returned unchanged.
132    /// - At or above `passthrough_threshold` tokens: char-truncated so that the truncated
133    ///   text has approximately `passthrough_threshold` tokens (using a char-boundary-safe
134    ///   cut at `passthrough_threshold * 4` bytes), with `" [...truncated]"` appended.
135    ///
136    /// # Examples
137    ///
138    /// ```
139    /// use zeph_context::tool_result_compress::{
140    ///     ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
141    /// };
142    ///
143    /// struct WordCounter;
144    /// impl zeph_common::memory::TokenCounting for WordCounter {
145    ///     fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
146    ///     fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
147    /// }
148    ///
149    /// let config = ToolResultCompressionConfig {
150    ///     passthrough_threshold: 3,
151    ///     summarize_threshold: 10,
152    ///     total_budget: 20,
153    /// };
154    /// let tc = WordCounter;
155    ///
156    /// // Short text passes through.
157    /// let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
158    /// assert_eq!(r.method, CompressionMethod::PassThrough);
159    ///
160    /// // Long text is truncated.
161    /// let r = ToolResultCompressor::compress_single("one two three four five", &tc, &config);
162    /// assert_eq!(r.method, CompressionMethod::Truncated);
163    /// assert!(r.text.ends_with("[...truncated]"));
164    /// ```
165    #[must_use]
166    pub fn compress_single(
167        text: &str,
168        tc: &dyn TokenCounting,
169        config: &ToolResultCompressionConfig,
170    ) -> CompressedToolResult {
171        let original_tokens = tc.count_tokens(text);
172
173        if original_tokens <= config.passthrough_threshold {
174            return CompressedToolResult {
175                text: text.to_owned(),
176                original_tokens,
177                compressed_tokens: original_tokens,
178                method: CompressionMethod::PassThrough,
179            };
180        }
181
182        // Truncate at a char boundary. The heuristic is ~4 bytes per token.
183        // Subtract the marker's byte length so the final result (text + marker) fits within
184        // the passthrough_threshold token budget — without this the marker inflates the count.
185        let byte_limit = config
186            .passthrough_threshold
187            .saturating_mul(4)
188            .saturating_sub(TRUNCATION_MARKER.len());
189        let cut = text.floor_char_boundary(byte_limit.min(text.len()));
190        let truncated = format!("{}{}", &text[..cut], TRUNCATION_MARKER);
191        let compressed_tokens = tc.count_tokens(&truncated);
192
193        CompressedToolResult {
194            text: truncated,
195            original_tokens,
196            compressed_tokens,
197            method: CompressionMethod::Truncated,
198        }
199    }
200
201    /// Compress a batch of tool results, enforcing both per-result and total-budget limits.
202    ///
203    /// 1. Applies `compress_single` to each entry.
204    /// 2. If the total compressed tokens still exceed `total_budget`, trims results in
205    ///    descending token-count order. Ties are broken by `entry.index` (lower index
206    ///    trimmed first) for deterministic output.
207    ///
208    /// Returns one `CompressedToolResult` per input entry, in the same order.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// use zeph_context::tool_result_compress::{
214    ///     ToolResultCompressor, ToolResultCompressionConfig, ToolResultEntry, CompressionMethod,
215    /// };
216    ///
217    /// struct WordCounter;
218    /// impl zeph_common::memory::TokenCounting for WordCounter {
219    ///     fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
220    ///     fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
221    /// }
222    ///
223    /// let config = ToolResultCompressionConfig {
224    ///     passthrough_threshold: 100,
225    ///     summarize_threshold: 200,
226    ///     total_budget: 5,
227    /// };
228    /// let tc = WordCounter;
229    /// let entries = vec![
230    ///     ToolResultEntry { tool_name: "shell", text: "one two three", index: 0 },
231    ///     ToolResultEntry { tool_name: "fetch", text: "four five six", index: 1 },
232    /// ];
233    /// let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
234    /// assert_eq!(results.len(), 2);
235    /// // Combined tokens (6) exceed total_budget (5) → at least one is BatchTrimmed.
236    /// assert!(results.iter().any(|r| r.method == CompressionMethod::BatchTrimmed));
237    /// ```
238    #[must_use]
239    pub fn compress_batch(
240        entries: &[ToolResultEntry<'_>],
241        tc: &dyn TokenCounting,
242        config: &ToolResultCompressionConfig,
243    ) -> Vec<CompressedToolResult> {
244        if entries.is_empty() {
245            return Vec::new();
246        }
247
248        // Phase 1: per-result compression.
249        let mut results: Vec<CompressedToolResult> = entries
250            .iter()
251            .map(|e| Self::compress_single(e.text, tc, config))
252            .collect();
253
254        // Phase 2: batch budget enforcement.
255        let total_tokens: usize = results.iter().map(|r| r.compressed_tokens).sum();
256        if total_tokens <= config.total_budget {
257            return results;
258        }
259
260        // Build a sorted list of (compressed_tokens, original_index) to trim the largest first.
261        // Tiebreaker: lower input index is trimmed first (critic note M3).
262        let mut order: Vec<usize> = (0..results.len()).collect();
263        order.sort_unstable_by(|&a, &b| {
264            let ta = results[a].compressed_tokens;
265            let tb = results[b].compressed_tokens;
266            // Descending by tokens, then ascending by index for ties.
267            tb.cmp(&ta)
268                .then_with(|| entries[a].index.cmp(&entries[b].index))
269        });
270
271        let mut remaining = total_tokens;
272        for &idx in &order {
273            if remaining <= config.total_budget {
274                break;
275            }
276            let current = results[idx].compressed_tokens;
277            // Shrink this result proportionally so the total fits.
278            let excess = remaining.saturating_sub(config.total_budget);
279            let target_tokens = current.saturating_sub(excess.min(current));
280            // target_tokens == 0 means we'd remove the entire result — keep a minimal stub.
281            let byte_limit = target_tokens.max(1).saturating_mul(4);
282            let cut = results[idx]
283                .text
284                .floor_char_boundary(byte_limit.min(results[idx].text.len()));
285            let trimmed = format!("{} [...truncated]", &results[idx].text[..cut]);
286            let new_tokens = tc.count_tokens(&trimmed);
287            remaining = remaining.saturating_sub(current).saturating_add(new_tokens);
288            results[idx].compressed_tokens = new_tokens;
289            results[idx].text = trimmed;
290            results[idx].method = CompressionMethod::BatchTrimmed;
291        }
292
293        results
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    struct WordCounter;
302    impl TokenCounting for WordCounter {
303        fn count_tokens(&self, text: &str) -> usize {
304            text.split_whitespace().count()
305        }
306
307        fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize {
308            0
309        }
310    }
311
312    fn cfg(passthrough: usize, summarize: usize, budget: usize) -> ToolResultCompressionConfig {
313        ToolResultCompressionConfig {
314            passthrough_threshold: passthrough,
315            summarize_threshold: summarize,
316            total_budget: budget,
317        }
318    }
319
320    #[test]
321    fn compress_single_passthrough_below_threshold() {
322        let tc = WordCounter;
323        let config = cfg(10, 20, 40);
324        let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
325        assert_eq!(r.method, CompressionMethod::PassThrough);
326        assert_eq!(r.text, "one two three");
327        assert_eq!(r.original_tokens, r.compressed_tokens);
328    }
329
330    #[test]
331    fn compress_single_passthrough_at_exact_threshold() {
332        let tc = WordCounter;
333        // "a b c" = 3 words; threshold = 3 → passthrough.
334        let config = cfg(3, 10, 20);
335        let r = ToolResultCompressor::compress_single("a b c", &tc, &config);
336        assert_eq!(r.method, CompressionMethod::PassThrough);
337    }
338
339    #[test]
340    fn compress_single_truncated_above_threshold() {
341        let tc = WordCounter;
342        let config = cfg(2, 10, 20);
343        let text = "one two three four five";
344        let r = ToolResultCompressor::compress_single(text, &tc, &config);
345        assert_eq!(r.method, CompressionMethod::Truncated);
346        assert!(r.text.ends_with("[...truncated]"));
347        assert!(r.compressed_tokens <= r.original_tokens);
348    }
349
350    #[test]
351    fn compress_single_empty_text_passthrough() {
352        let tc = WordCounter;
353        let config = cfg(5, 10, 20);
354        let r = ToolResultCompressor::compress_single("", &tc, &config);
355        assert_eq!(r.method, CompressionMethod::PassThrough);
356        assert_eq!(r.text, "");
357    }
358
359    #[test]
360    fn compress_batch_empty_input() {
361        let tc = WordCounter;
362        let config = cfg(5, 10, 20);
363        let results = ToolResultCompressor::compress_batch(&[], &tc, &config);
364        assert!(results.is_empty());
365    }
366
367    #[test]
368    fn compress_batch_within_budget_no_batch_trim() {
369        let tc = WordCounter;
370        let config = cfg(100, 200, 1000);
371        let entries = vec![
372            ToolResultEntry {
373                tool_name: "a",
374                text: "one two",
375                index: 0,
376            },
377            ToolResultEntry {
378                tool_name: "b",
379                text: "three four",
380                index: 1,
381            },
382        ];
383        let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
384        assert!(
385            results
386                .iter()
387                .all(|r| r.method != CompressionMethod::BatchTrimmed),
388            "no batch trimming expected within budget"
389        );
390    }
391
392    #[test]
393    fn compress_batch_exceeds_budget_trims_largest_first() {
394        let tc = WordCounter;
395        // budget = 3; two entries totaling 6 words.
396        let config = cfg(100, 200, 3);
397        let entries = vec![
398            ToolResultEntry {
399                tool_name: "a",
400                text: "one two three",
401                index: 0,
402            }, // 3 tokens
403            ToolResultEntry {
404                tool_name: "b",
405                text: "four five six",
406                index: 1,
407            }, // 3 tokens
408        ];
409        let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
410        assert_eq!(results.len(), 2);
411        // At least one must be BatchTrimmed.
412        assert!(
413            results
414                .iter()
415                .any(|r| r.method == CompressionMethod::BatchTrimmed)
416        );
417        // Total must not exceed budget (within rounding from the truncation marker).
418        let total: usize = results.iter().map(|r| r.compressed_tokens).sum();
419        assert!(
420            total <= config.total_budget + 3,
421            "total {total} should be near budget {}",
422            config.total_budget
423        );
424    }
425
426    #[test]
427    fn compress_batch_tiebreaker_lower_index_trimmed_first() {
428        let tc = WordCounter;
429        // Both entries have the same token count. budget = 3 < 6 total.
430        // Lower index (0) should be trimmed first.
431        let config = cfg(100, 200, 3);
432        let entries = vec![
433            ToolResultEntry {
434                tool_name: "a",
435                text: "one two three",
436                index: 0,
437            },
438            ToolResultEntry {
439                tool_name: "b",
440                text: "four five six",
441                index: 1,
442            },
443        ];
444        let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
445        // Index 0 should be BatchTrimmed (lower index trimmed first on tie).
446        assert_eq!(
447            results[0].method,
448            CompressionMethod::BatchTrimmed,
449            "lower index must be trimmed first on equal token counts"
450        );
451    }
452
453    #[test]
454    fn acon_config_default_into_compression_config() {
455        let acon = AconConfig::default();
456        let cfg = ToolResultCompressionConfig::from(&acon);
457        assert_eq!(cfg.passthrough_threshold, 2000);
458        assert_eq!(cfg.summarize_threshold, 4000);
459        assert_eq!(cfg.total_budget, 8000);
460    }
461}