zeph_context/tool_result_compress.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Acon tool-result compression (#4021).
5//!
6//! Stateless, pure-function compression pass that enforces per-result and batch-level
7//! token budgets on tool outputs **before** they enter message history. This runs as a
8//! pre-processing step in `zeph-core`'s tier loop, not as part of context assembly.
9//!
10//! # Compression model
11//!
12//! - Results below `passthrough_threshold`: returned unchanged (`PassThrough`).
13//! - Results above `passthrough_threshold`: char-truncated to approximately
14//! `passthrough_threshold` tokens with a `" [...truncated]"` suffix (`Truncated`).
15//! The suffix adds ~3–4 tokens, so `compressed_tokens` may slightly exceed
16//! `passthrough_threshold` — callers must not rely on exact equality.
17//! - LLM summarization is **not** performed here — it is the caller's responsibility in
18//! `zeph-core`. The caller may pre-summarize a result and then pass the shortened text
19//! to `compress_single` or `compress_batch`.
20//! - After per-result compression, `compress_batch` enforces the `total_budget` cap by
21//! proportionally trimming the largest results (`BatchTrimmed`).
22
23use zeph_common::memory::TokenCounting;
24use zeph_config::AconConfig;
25
26const TRUNCATION_MARKER: &str = " [...truncated]";
27
28/// Configuration for Acon tool-result compression.
29///
30/// Constructed from [`AconConfig`] (zeph-config) at session init via the [`From`] impl.
31#[derive(Debug, Clone)]
32pub struct ToolResultCompressionConfig {
33 /// Token count below which results pass through unchanged.
34 /// Also the approximate truncation target: results above this are char-truncated to
35 /// approximately this many tokens (the `" [...truncated]"` suffix adds ~3–4 tokens).
36 /// Default: `2000`.
37 pub passthrough_threshold: usize,
38 /// Token count above which the caller should attempt LLM summarization before
39 /// falling back to truncation. Not enforced here — informational for the caller.
40 /// Default: `4000`.
41 pub summarize_threshold: usize,
42 /// Maximum total tokens for all tool results combined in a single turn. Default: `8000`.
43 pub total_budget: usize,
44}
45
46impl From<&AconConfig> for ToolResultCompressionConfig {
47 fn from(cfg: &AconConfig) -> Self {
48 Self {
49 passthrough_threshold: cfg.passthrough_threshold,
50 summarize_threshold: cfg.summarize_threshold,
51 total_budget: cfg.total_budget,
52 }
53 }
54}
55
56/// A tool result entry before compression, used as input to `compress_batch`.
57///
58/// The `index` field is used as a deterministic tiebreaker when two results have
59/// equal token counts during batch budget enforcement (lower index trimmed first).
60pub struct ToolResultEntry<'a> {
61 /// Tool name for tracing and logging.
62 pub tool_name: &'a str,
63 /// Raw tool result text.
64 pub text: &'a str,
65 /// Position in the original tool call list. Used as tiebreaker in batch trimming.
66 pub index: usize,
67}
68
69/// Method applied when compressing a single tool result.
70///
71/// Does NOT include a `Summarized` variant — LLM summarization is the caller's
72/// responsibility in `zeph-core` before calling these methods.
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum CompressionMethod {
75 /// Result was within `passthrough_threshold` — returned unchanged.
76 PassThrough,
77 /// Result was truncated at a char boundary to approximately `passthrough_threshold` tokens.
78 /// The `" [...truncated]"` suffix adds ~3–4 tokens, so `compressed_tokens` may slightly
79 /// exceed `passthrough_threshold`.
80 Truncated,
81 /// Result was proportionally trimmed during batch budget enforcement.
82 BatchTrimmed,
83}
84
85/// Output of compressing a single tool result.
86#[derive(Debug, Clone)]
87pub struct CompressedToolResult {
88 /// Compressed (or unchanged) text.
89 pub text: String,
90 /// Token count before compression.
91 pub original_tokens: usize,
92 /// Token count after compression.
93 pub compressed_tokens: usize,
94 /// Method applied.
95 pub method: CompressionMethod,
96}
97
98/// Stateless tool-result compressor for Acon (#4021).
99///
100/// All methods are pure functions: they take text, a token counter, and a config, and
101/// return compressed text with metadata. No I/O, no async, no agent state.
102///
103/// # Examples
104///
105/// ```
106/// use zeph_context::tool_result_compress::{
107/// ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
108/// };
109///
110/// struct WordCounter;
111/// impl zeph_common::memory::TokenCounting for WordCounter {
112/// fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
113/// fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
114/// }
115///
116/// let config = ToolResultCompressionConfig {
117/// passthrough_threshold: 5,
118/// summarize_threshold: 10,
119/// total_budget: 20,
120/// };
121/// let tc = WordCounter;
122///
123/// let result = ToolResultCompressor::compress_single("hello world", &tc, &config);
124/// assert_eq!(result.method, CompressionMethod::PassThrough);
125/// ```
126pub struct ToolResultCompressor;
127
128impl ToolResultCompressor {
129 /// Compress a single tool result text.
130 ///
131 /// - Below `passthrough_threshold` tokens: returned unchanged.
132 /// - At or above `passthrough_threshold` tokens: char-truncated so that the truncated
133 /// text has approximately `passthrough_threshold` tokens (using a char-boundary-safe
134 /// cut at `passthrough_threshold * 4` bytes), with `" [...truncated]"` appended.
135 ///
136 /// # Examples
137 ///
138 /// ```
139 /// use zeph_context::tool_result_compress::{
140 /// ToolResultCompressor, ToolResultCompressionConfig, CompressionMethod,
141 /// };
142 ///
143 /// struct WordCounter;
144 /// impl zeph_common::memory::TokenCounting for WordCounter {
145 /// fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
146 /// fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
147 /// }
148 ///
149 /// let config = ToolResultCompressionConfig {
150 /// passthrough_threshold: 3,
151 /// summarize_threshold: 10,
152 /// total_budget: 20,
153 /// };
154 /// let tc = WordCounter;
155 ///
156 /// // Short text passes through.
157 /// let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
158 /// assert_eq!(r.method, CompressionMethod::PassThrough);
159 ///
160 /// // Long text is truncated.
161 /// let r = ToolResultCompressor::compress_single("one two three four five", &tc, &config);
162 /// assert_eq!(r.method, CompressionMethod::Truncated);
163 /// assert!(r.text.ends_with("[...truncated]"));
164 /// ```
165 #[must_use]
166 pub fn compress_single(
167 text: &str,
168 tc: &dyn TokenCounting,
169 config: &ToolResultCompressionConfig,
170 ) -> CompressedToolResult {
171 let original_tokens = tc.count_tokens(text);
172
173 if original_tokens <= config.passthrough_threshold {
174 return CompressedToolResult {
175 text: text.to_owned(),
176 original_tokens,
177 compressed_tokens: original_tokens,
178 method: CompressionMethod::PassThrough,
179 };
180 }
181
182 // Truncate at a char boundary. The heuristic is ~4 bytes per token.
183 // Subtract the marker's byte length so the final result (text + marker) fits within
184 // the passthrough_threshold token budget — without this the marker inflates the count.
185 let byte_limit = config
186 .passthrough_threshold
187 .saturating_mul(4)
188 .saturating_sub(TRUNCATION_MARKER.len());
189 let cut = text.floor_char_boundary(byte_limit.min(text.len()));
190 let truncated = format!("{}{}", &text[..cut], TRUNCATION_MARKER);
191 let compressed_tokens = tc.count_tokens(&truncated);
192
193 CompressedToolResult {
194 text: truncated,
195 original_tokens,
196 compressed_tokens,
197 method: CompressionMethod::Truncated,
198 }
199 }
200
201 /// Compress a batch of tool results, enforcing both per-result and total-budget limits.
202 ///
203 /// 1. Applies `compress_single` to each entry.
204 /// 2. If the total compressed tokens still exceed `total_budget`, trims results in
205 /// descending token-count order. Ties are broken by `entry.index` (lower index
206 /// trimmed first) for deterministic output.
207 ///
208 /// Returns one `CompressedToolResult` per input entry, in the same order.
209 ///
210 /// # Examples
211 ///
212 /// ```
213 /// use zeph_context::tool_result_compress::{
214 /// ToolResultCompressor, ToolResultCompressionConfig, ToolResultEntry, CompressionMethod,
215 /// };
216 ///
217 /// struct WordCounter;
218 /// impl zeph_common::memory::TokenCounting for WordCounter {
219 /// fn count_tokens(&self, text: &str) -> usize { text.split_whitespace().count() }
220 /// fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { 0 }
221 /// }
222 ///
223 /// let config = ToolResultCompressionConfig {
224 /// passthrough_threshold: 100,
225 /// summarize_threshold: 200,
226 /// total_budget: 5,
227 /// };
228 /// let tc = WordCounter;
229 /// let entries = vec![
230 /// ToolResultEntry { tool_name: "shell", text: "one two three", index: 0 },
231 /// ToolResultEntry { tool_name: "fetch", text: "four five six", index: 1 },
232 /// ];
233 /// let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
234 /// assert_eq!(results.len(), 2);
235 /// // Combined tokens (6) exceed total_budget (5) → at least one is BatchTrimmed.
236 /// assert!(results.iter().any(|r| r.method == CompressionMethod::BatchTrimmed));
237 /// ```
238 #[must_use]
239 pub fn compress_batch(
240 entries: &[ToolResultEntry<'_>],
241 tc: &dyn TokenCounting,
242 config: &ToolResultCompressionConfig,
243 ) -> Vec<CompressedToolResult> {
244 if entries.is_empty() {
245 return Vec::new();
246 }
247
248 // Phase 1: per-result compression.
249 let mut results: Vec<CompressedToolResult> = entries
250 .iter()
251 .map(|e| Self::compress_single(e.text, tc, config))
252 .collect();
253
254 // Phase 2: batch budget enforcement.
255 let total_tokens: usize = results.iter().map(|r| r.compressed_tokens).sum();
256 if total_tokens <= config.total_budget {
257 return results;
258 }
259
260 // Build a sorted list of (compressed_tokens, original_index) to trim the largest first.
261 // Tiebreaker: lower input index is trimmed first (critic note M3).
262 let mut order: Vec<usize> = (0..results.len()).collect();
263 order.sort_unstable_by(|&a, &b| {
264 let ta = results[a].compressed_tokens;
265 let tb = results[b].compressed_tokens;
266 // Descending by tokens, then ascending by index for ties.
267 tb.cmp(&ta)
268 .then_with(|| entries[a].index.cmp(&entries[b].index))
269 });
270
271 let mut remaining = total_tokens;
272 for &idx in &order {
273 if remaining <= config.total_budget {
274 break;
275 }
276 let current = results[idx].compressed_tokens;
277 // Shrink this result proportionally so the total fits.
278 let excess = remaining.saturating_sub(config.total_budget);
279 let target_tokens = current.saturating_sub(excess.min(current));
280 // target_tokens == 0 means we'd remove the entire result — keep a minimal stub.
281 let byte_limit = target_tokens.max(1).saturating_mul(4);
282 let cut = results[idx]
283 .text
284 .floor_char_boundary(byte_limit.min(results[idx].text.len()));
285 let trimmed = format!("{} [...truncated]", &results[idx].text[..cut]);
286 let new_tokens = tc.count_tokens(&trimmed);
287 remaining = remaining.saturating_sub(current).saturating_add(new_tokens);
288 results[idx].compressed_tokens = new_tokens;
289 results[idx].text = trimmed;
290 results[idx].method = CompressionMethod::BatchTrimmed;
291 }
292
293 results
294 }
295}
296
297#[cfg(test)]
298mod tests {
299 use super::*;
300
301 struct WordCounter;
302 impl TokenCounting for WordCounter {
303 fn count_tokens(&self, text: &str) -> usize {
304 text.split_whitespace().count()
305 }
306
307 fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize {
308 0
309 }
310 }
311
312 fn cfg(passthrough: usize, summarize: usize, budget: usize) -> ToolResultCompressionConfig {
313 ToolResultCompressionConfig {
314 passthrough_threshold: passthrough,
315 summarize_threshold: summarize,
316 total_budget: budget,
317 }
318 }
319
320 #[test]
321 fn compress_single_passthrough_below_threshold() {
322 let tc = WordCounter;
323 let config = cfg(10, 20, 40);
324 let r = ToolResultCompressor::compress_single("one two three", &tc, &config);
325 assert_eq!(r.method, CompressionMethod::PassThrough);
326 assert_eq!(r.text, "one two three");
327 assert_eq!(r.original_tokens, r.compressed_tokens);
328 }
329
330 #[test]
331 fn compress_single_passthrough_at_exact_threshold() {
332 let tc = WordCounter;
333 // "a b c" = 3 words; threshold = 3 → passthrough.
334 let config = cfg(3, 10, 20);
335 let r = ToolResultCompressor::compress_single("a b c", &tc, &config);
336 assert_eq!(r.method, CompressionMethod::PassThrough);
337 }
338
339 #[test]
340 fn compress_single_truncated_above_threshold() {
341 let tc = WordCounter;
342 let config = cfg(2, 10, 20);
343 let text = "one two three four five";
344 let r = ToolResultCompressor::compress_single(text, &tc, &config);
345 assert_eq!(r.method, CompressionMethod::Truncated);
346 assert!(r.text.ends_with("[...truncated]"));
347 assert!(r.compressed_tokens <= r.original_tokens);
348 }
349
350 #[test]
351 fn compress_single_empty_text_passthrough() {
352 let tc = WordCounter;
353 let config = cfg(5, 10, 20);
354 let r = ToolResultCompressor::compress_single("", &tc, &config);
355 assert_eq!(r.method, CompressionMethod::PassThrough);
356 assert_eq!(r.text, "");
357 }
358
359 #[test]
360 fn compress_batch_empty_input() {
361 let tc = WordCounter;
362 let config = cfg(5, 10, 20);
363 let results = ToolResultCompressor::compress_batch(&[], &tc, &config);
364 assert!(results.is_empty());
365 }
366
367 #[test]
368 fn compress_batch_within_budget_no_batch_trim() {
369 let tc = WordCounter;
370 let config = cfg(100, 200, 1000);
371 let entries = vec![
372 ToolResultEntry {
373 tool_name: "a",
374 text: "one two",
375 index: 0,
376 },
377 ToolResultEntry {
378 tool_name: "b",
379 text: "three four",
380 index: 1,
381 },
382 ];
383 let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
384 assert!(
385 results
386 .iter()
387 .all(|r| r.method != CompressionMethod::BatchTrimmed),
388 "no batch trimming expected within budget"
389 );
390 }
391
392 #[test]
393 fn compress_batch_exceeds_budget_trims_largest_first() {
394 let tc = WordCounter;
395 // budget = 3; two entries totaling 6 words.
396 let config = cfg(100, 200, 3);
397 let entries = vec![
398 ToolResultEntry {
399 tool_name: "a",
400 text: "one two three",
401 index: 0,
402 }, // 3 tokens
403 ToolResultEntry {
404 tool_name: "b",
405 text: "four five six",
406 index: 1,
407 }, // 3 tokens
408 ];
409 let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
410 assert_eq!(results.len(), 2);
411 // At least one must be BatchTrimmed.
412 assert!(
413 results
414 .iter()
415 .any(|r| r.method == CompressionMethod::BatchTrimmed)
416 );
417 // Total must not exceed budget (within rounding from the truncation marker).
418 let total: usize = results.iter().map(|r| r.compressed_tokens).sum();
419 assert!(
420 total <= config.total_budget + 3,
421 "total {total} should be near budget {}",
422 config.total_budget
423 );
424 }
425
426 #[test]
427 fn compress_batch_tiebreaker_lower_index_trimmed_first() {
428 let tc = WordCounter;
429 // Both entries have the same token count. budget = 3 < 6 total.
430 // Lower index (0) should be trimmed first.
431 let config = cfg(100, 200, 3);
432 let entries = vec![
433 ToolResultEntry {
434 tool_name: "a",
435 text: "one two three",
436 index: 0,
437 },
438 ToolResultEntry {
439 tool_name: "b",
440 text: "four five six",
441 index: 1,
442 },
443 ];
444 let results = ToolResultCompressor::compress_batch(&entries, &tc, &config);
445 // Index 0 should be BatchTrimmed (lower index trimmed first on tie).
446 assert_eq!(
447 results[0].method,
448 CompressionMethod::BatchTrimmed,
449 "lower index must be trimmed first on equal token counts"
450 );
451 }
452
453 #[test]
454 fn acon_config_default_into_compression_config() {
455 let acon = AconConfig::default();
456 let cfg = ToolResultCompressionConfig::from(&acon);
457 assert_eq!(cfg.passthrough_threshold, 2000);
458 assert_eq!(cfg.summarize_threshold, 4000);
459 assert_eq!(cfg.total_budget, 8000);
460 }
461}