zeph_context/tool_result_compress.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Acon tool-result compression (#4021).
5//!
6//! Stateless, pure-function compression pass that enforces per-result and batch-level
7//! token budgets on tool outputs **before** they enter message history. This runs as a
8//! pre-processing step in `zeph-core`'s tier loop, not as part of context assembly.
9//!
10//! # Compression model
11//!
12//! - Results below `passthrough_threshold`: returned unchanged (`PassThrough`).
13//! - Results above `passthrough_threshold`: char-truncated to approximately
14//! `passthrough_threshold` tokens with a `" [...truncated]"` suffix (`Truncated`).
15//! The suffix adds ~3–4 tokens, so `compressed_tokens` may slightly exceed
16//! `passthrough_threshold` — callers must not rely on exact equality.
17//! - LLM summarization is **not** performed here — it is the caller's responsibility in
18//! `zeph-core`. The caller may pre-summarize a result and then pass the shortened text
19//! to `compress_single` or `compress_batch`.
20//! - After per-result compression, `compress_batch` enforces the `total_budget` cap by
21//! proportionally trimming the largest results (`BatchTrimmed`).
22
23use zeph_common::memory::TokenCounting;
24use zeph_config::AconConfig;
25
26const TRUNCATION_MARKER: &str = " [...truncated]";
27
28/// Configuration for Acon tool-result compression.
29///
30/// Constructed from [`AconConfig`] (zeph-config) at session init via the [`From`] impl.
31#[derive(Debug, Clone)]
32pub struct ToolResultCompressionConfig {
33 /// Token count below which results pass through unchanged.
34 /// Also the approximate truncation target: results above this are char-truncated to
35 /// approximately this many tokens (the `" [...truncated]"` suffix adds ~3–4 tokens).
36 /// Default: `2000`.
37 pub passthrough_threshold: usize,
38 /// Token count above which the caller should attempt LLM summarization before
39 /// falling back to truncation. Not enforced here — informational for the caller.
40 /// Default: `4000`.
41 pub summarize_threshold: usize,
42 /// Maximum total tokens for all tool results combined in a single turn. Default: `8000`.
43 pub total_budget: usize,
44}
45
46impl From<&AconConfig> for ToolResultCompressionConfig {
47 fn from(cfg: &AconConfig) -> Self {
48 Self {
49 passthrough_threshold: cfg.passthrough_threshold,
50 summarize_threshold: cfg.summarize_threshold,
51 total_budget: cfg.total_budget,
52 }
53 }
54}
55
56/// A tool result entry before compression, used as input to `compress_batch`.
57///
58/// The `index` field is used as a deterministic tiebreaker when two results have
59/// equal token counts during batch budget enforcement (lower index trimmed first).
60pub struct ToolResultEntry<'a> {
61 /// Tool name for tracing and logging.
62 pub tool_name: &'a str,
63 /// Raw tool result text.
64 pub text: &'a str,
65 /// Position in the original tool call list. Used as tiebreaker in batch trimming.
66 pub index: usize,
67}
68
69/// Method applied when compressing a single tool result.
70///
71/// Does NOT include a `Summarized` variant — LLM summarization is the caller's
72/// responsibility in `zeph-core` before calling these methods.
73#[non_exhaustive]
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75pub enum CompressionMethod {
76 /// Result was within `passthrough_threshold` — returned unchanged.
77 PassThrough,
78 /// Result was truncated at a char boundary to approximately `passthrough_threshold` tokens.
79 /// The `" [...truncated]"` suffix adds ~3–4 tokens, so `compressed_tokens` may slightly
80 /// exceed `passthrough_threshold`.
81 Truncated,
82 /// Result was proportionally trimmed during batch budget enforcement.
83 BatchTrimmed,
84}
85
86/// Output of compressing a single tool result.
87#[derive(Debug, Clone)]
88pub struct CompressedToolResult {
89 /// Compressed (or unchanged) text.
90 pub text: String,
91 /// Token count before compression.
92 pub original_tokens: usize,
93 /// Token count after compression.
94 pub compressed_tokens: usize,
95 /// Method applied.
96 pub method: CompressionMethod,
97}
98
99/// Stateless tool-result compressor for Acon (#4021).
100///
101/// All methods are pure functions: they take text, a token counter, and a config, and
102/// return compressed text with metadata. No I/O, no async, no agent state.
103///
104/// # Examples
105///
106/// ```
107/// use zeph_context::tool_result_compress::{
108/// ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
109/// };
110///
111/// struct WordCounter;
112/// impl zeph_common::memory::TokenCounting for WordCounter {
113/// fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
114/// fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
115/// }
116///
117/// let config = ToolResultCompressionConfig {
118/// passthrough_threshold: 5,
119/// summarize_threshold: 10,
120/// total_budget: 20,
121/// };
122/// let tc = WordCounter;
123///
124/// let result = ToolResultCompressor::compress_single("hello world", &tc, &config);
125/// assert_eq!(result.method, CompressionMethod::PassThrough);
126/// ```
127pub struct ToolResultCompressor;
128
129impl ToolResultCompressor {
130 /// Compress a single tool result text.
131 ///
132 /// - Below `passthrough_threshold` tokens: returned unchanged.
133 /// - At or above `passthrough_threshold` tokens: char-truncated so that the truncated
134 /// text has approximately `passthrough_threshold` tokens (using a char-boundary-safe
135 /// cut at `passthrough_threshold * 4` bytes), with `" [...truncated]"` appended.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use zeph_context::tool_result_compress::{
141 /// ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
142 /// };
143 ///
144 /// struct WordCounter;
145 /// impl zeph_common::memory::TokenCounting for WordCounter {
146 /// fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
147 /// fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
148 /// }
149 ///
150 /// let config = ToolResultCompressionConfig {
151 /// passthrough_threshold: 3,
152 /// summarize_threshold: 10,
153 /// total_budget: 20,
154 /// };
155 /// let tc = WordCounter;
156 ///
157 /// // Short text passes through.
158 /// let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
159 /// assert_eq!(r.method, CompressionMethod::PassThrough);
160 ///
161 /// // Long text is truncated.
162 /// let r = ToolResultCompressor::compress_single("one two three four five", &tc, &config);
163 /// assert_eq!(r.method, CompressionMethod::Truncated);
164 /// assert!(r.text.ends_with("[...truncated]"));
165 /// ```
166 #[must_use]
167 pub fn compress_single(
168 text: &str,
169 tc: &dyn TokenCounting,
170 config: &ToolResultCompressionConfig,
171 ) -> CompressedToolResult {
172 let original_tokens = tc.count_tokens(text);
173
174 if original_tokens <= config.passthrough_threshold {
175 return CompressedToolResult {
176 text: text.to_owned(),
177 original_tokens,
178 compressed_tokens: original_tokens,
179 method: CompressionMethod::PassThrough,
180 };
181 }
182
183 // Truncate at a char boundary. The heuristic is ~4 bytes per token.
184 // Subtract the marker's byte length so the final result (text + marker) fits within
185 // the passthrough_threshold token budget — without this the marker inflates the count.
186 let byte_limit = config
187 .passthrough_threshold
188 .saturating_mul(4)
189 .saturating_sub(TRUNCATION_MARKER.len());
190 let cut = text.floor_char_boundary(byte_limit.min(text.len()));
191 let truncated = format!("{}{}", &text[..cut], TRUNCATION_MARKER);
192 let compressed_tokens = tc.count_tokens(&truncated);
193
194 CompressedToolResult {
195 text: truncated,
196 original_tokens,
197 compressed_tokens,
198 method: CompressionMethod::Truncated,
199 }
200 }
201
202 /// Compress a batch of tool results, enforcing both per-result and total-budget limits.
203 ///
204 /// 1. Applies `compress_single` to each entry.
205 /// 2. If the total compressed tokens still exceed `total_budget`, trims results in
206 /// descending token-count order. Ties are broken by `entry.index` (lower index
207 /// trimmed first) for deterministic output.
208 ///
209 /// Returns one `CompressedToolResult` per input entry, in the same order.
210 ///
211 /// # Examples
212 ///
213 /// ```
214 /// use zeph_context::tool_result_compress::{
215 /// ToolResultCompressor, ToolResultCompressionConfig, ToolResultEntry, CompressionMethod,
216 /// };
217 ///
218 /// struct WordCounter;
219 /// impl zeph_common::memory::TokenCounting for WordCounter {
220 /// fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
221 /// fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
222 /// }
223 ///
224 /// let config = ToolResultCompressionConfig {
225 /// passthrough_threshold: 100,
226 /// summarize_threshold: 200,
227 /// total_budget: 5,
228 /// };
229 /// let tc = WordCounter;
230 /// let entries = vec![
231 /// ToolResultEntry { tool_name: "shell", text: "one two three", index: 0 },
232 /// ToolResultEntry { tool_name: "fetch", text: "four five six", index: 1 },
233 /// ];
234 /// let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
235 /// assert_eq!(results.len(), 2);
236 /// // Combined tokens (6) exceed total_budget (5) → at least one is BatchTrimmed.
237 /// assert!(results.iter().any(|r| r.method == CompressionMethod::BatchTrimmed));
238 /// ```
239 #[must_use]
240 pub fn compress_batch(
241 entries: &[ToolResultEntry<'_>],
242 tc: &dyn TokenCounting,
243 config: &ToolResultCompressionConfig,
244 ) -> Vec<CompressedToolResult> {
245 if entries.is_empty() {
246 return Vec::new();
247 }
248
249 // Phase 1: per-result compression.
250 let mut results: Vec<CompressedToolResult> = entries
251 .iter()
252 .map(|e| Self::compress_single(e.text, tc, config))
253 .collect();
254
255 // Phase 2: batch budget enforcement.
256 let total_tokens: usize = results.iter().map(|r| r.compressed_tokens).sum();
257 if total_tokens <= config.total_budget {
258 return results;
259 }
260
261 // Build a sorted list of (compressed_tokens, original_index) to trim the largest first.
262 // Tiebreaker: lower input index is trimmed first (critic note M3).
263 let mut order: Vec<usize> = (0..results.len()).collect();
264 order.sort_unstable_by(|&a, &b| {
265 let ta = results[a].compressed_tokens;
266 let tb = results[b].compressed_tokens;
267 // Descending by tokens, then ascending by index for ties.
268 tb.cmp(&ta)
269 .then_with(|| entries[a].index.cmp(&entries[b].index))
270 });
271
272 let mut remaining = total_tokens;
273 for &idx in &order {
274 if remaining <= config.total_budget {
275 break;
276 }
277 let current = results[idx].compressed_tokens;
278 // Shrink this result proportionally so the total fits.
279 let excess = remaining.saturating_sub(config.total_budget);
280 let target_tokens = current.saturating_sub(excess.min(current));
281 // target_tokens == 0 means we'd remove the entire result — keep a minimal stub.
282 let byte_limit = target_tokens.max(1).saturating_mul(4);
283 let cut = results[idx]
284 .text
285 .floor_char_boundary(byte_limit.min(results[idx].text.len()));
286 let trimmed = format!("{} [...truncated]", &results[idx].text[..cut]);
287 let new_tokens = tc.count_tokens(&trimmed);
288 remaining = remaining.saturating_sub(current).saturating_add(new_tokens);
289 results[idx].compressed_tokens = new_tokens;
290 results[idx].text = trimmed;
291 results[idx].method = CompressionMethod::BatchTrimmed;
292 }
293
294 results
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301
302 struct WordCounter;
303 impl TokenCounting for WordCounter {
304 fn count_tokens(&self, text: &str) -> usize {
305 text.split_whitespace().count()
306 }
307
308 fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize {
309 0
310 }
311 }
312
313 fn cfg(passthrough: usize, summarize: usize, budget: usize) -> ToolResultCompressionConfig {
314 ToolResultCompressionConfig {
315 passthrough_threshold: passthrough,
316 summarize_threshold: summarize,
317 total_budget: budget,
318 }
319 }
320
321 #[test]
322 fn compress_single_passthrough_below_threshold() {
323 let tc = WordCounter;
324 let config = cfg(10, 20, 40);
325 let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
326 assert_eq!(r.method, CompressionMethod::PassThrough);
327 assert_eq!(r.text, "one two three");
328 assert_eq!(r.original_tokens, r.compressed_tokens);
329 }
330
331 #[test]
332 fn compress_single_passthrough_at_exact_threshold() {
333 let tc = WordCounter;
334 // "a b c" = 3 words; threshold = 3 → passthrough.
335 let config = cfg(3, 10, 20);
336 let r = ToolResultCompressor::compress_single("a b c", &tc, &config);
337 assert_eq!(r.method, CompressionMethod::PassThrough);
338 }
339
340 #[test]
341 fn compress_single_truncated_above_threshold() {
342 let tc = WordCounter;
343 let config = cfg(2, 10, 20);
344 let text = "one two three four five";
345 let r = ToolResultCompressor::compress_single(text, &tc, &config);
346 assert_eq!(r.method, CompressionMethod::Truncated);
347 assert!(r.text.ends_with("[...truncated]"));
348 assert!(r.compressed_tokens <= r.original_tokens);
349 }
350
351 #[test]
352 fn compress_single_empty_text_passthrough() {
353 let tc = WordCounter;
354 let config = cfg(5, 10, 20);
355 let r = ToolResultCompressor::compress_single("", &tc, &config);
356 assert_eq!(r.method, CompressionMethod::PassThrough);
357 assert_eq!(r.text, "");
358 }
359
360 #[test]
361 fn compress_batch_empty_input() {
362 let tc = WordCounter;
363 let config = cfg(5, 10, 20);
364 let results = ToolResultCompressor::compress_batch(&[], &tc, &config);
365 assert!(results.is_empty());
366 }
367
368 #[test]
369 fn compress_batch_within_budget_no_batch_trim() {
370 let tc = WordCounter;
371 let config = cfg(100, 200, 1000);
372 let entries = vec![
373 ToolResultEntry {
374 tool_name: "a",
375 text: "one two",
376 index: 0,
377 },
378 ToolResultEntry {
379 tool_name: "b",
380 text: "three four",
381 index: 1,
382 },
383 ];
384 let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
385 assert!(
386 results
387 .iter()
388 .all(|r| r.method != CompressionMethod::BatchTrimmed),
389 "no batch trimming expected within budget"
390 );
391 }
392
393 #[test]
394 fn compress_batch_exceeds_budget_trims_largest_first() {
395 let tc = WordCounter;
396 // budget = 3; two entries totaling 6 words.
397 let config = cfg(100, 200, 3);
398 let entries = vec![
399 ToolResultEntry {
400 tool_name: "a",
401 text: "one two three",
402 index: 0,
403 }, // 3 tokens
404 ToolResultEntry {
405 tool_name: "b",
406 text: "four five six",
407 index: 1,
408 }, // 3 tokens
409 ];
410 let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
411 assert_eq!(results.len(), 2);
412 // At least one must be BatchTrimmed.
413 assert!(
414 results
415 .iter()
416 .any(|r| r.method == CompressionMethod::BatchTrimmed)
417 );
418 // Total must not exceed budget (within rounding from the truncation marker).
419 let total: usize = results.iter().map(|r| r.compressed_tokens).sum();
420 assert!(
421 total <= config.total_budget + 3,
422 "total {total} should be near budget {}",
423 config.total_budget
424 );
425 }
426
427 #[test]
428 fn compress_batch_tiebreaker_lower_index_trimmed_first() {
429 let tc = WordCounter;
430 // Both entries have the same token count. budget = 3 < 6 total.
431 // Lower index (0) should be trimmed first.
432 let config = cfg(100, 200, 3);
433 let entries = vec![
434 ToolResultEntry {
435 tool_name: "a",
436 text: "one two three",
437 index: 0,
438 },
439 ToolResultEntry {
440 tool_name: "b",
441 text: "four five six",
442 index: 1,
443 },
444 ];
445 let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
446 // Index 0 should be BatchTrimmed (lower index trimmed first on tie).
447 assert_eq!(
448 results[0].method,
449 CompressionMethod::BatchTrimmed,
450 "lower index must be trimmed first on equal token counts"
451 );
452 }
453
454 #[test]
455 fn acon_config_default_into_compression_config() {
456 let acon = AconConfig::default();
457 let cfg = ToolResultCompressionConfig::from(&acon);
458 assert_eq!(cfg.passthrough_threshold, 2000);
459 assert_eq!(cfg.summarize_threshold, 4000);
460 assert_eq!(cfg.total_budget, 8000);
461 }
462}