codetether_agent/session/helper/experimental/mod.rs
1//! Experimental context-management strategies applied before RLM compaction.
2//!
3//! The agentic loop re-sends the entire conversation every step, which
4//! means two structural wastes dominate token usage:
5//!
6//! 1. **Duplicate tool outputs.** Agents frequently re-read the same
7//! file, re-run the same `ls`, or re-grep for the same pattern
8//! across many steps. The verbatim content appears multiple times
9//! in the history. See [`dedup`].
10//! 2. **Stale oversized tool outputs.** A 40 KB `read_file` result from
11//! step 2 is rarely relevant at step 30, yet it still costs full
12//! input tokens every turn. See [`snippet`].
13//!
14//! Both strategies are **lossy** in the strict sense but preserve
15//! referenceability: the model can always ask the agent to re-run the
16//! original tool call if it needs the full output back.
17//!
18//! # Composition
19//!
20//! [`apply_all`] runs every strategy in a fixed order against the live
21//! [`Message`] buffer, mutating in place. Callers (the two prompt loops)
22//! invoke it immediately before
23//! [`enforce_context_window`](super::compression::enforce_context_window)
24//! so the RLM compaction pass sees the already-shrunken buffer. The
25//! returned [`ExperimentalStats`] is logged at `info` level for
26//! observability.
27//!
28//! # Default-on, no config
29//!
30//! These strategies are always active — there is intentionally no env
31//! flag to disable them. If a future regression requires an escape
32//! hatch, add a field to [`crate::config::Config`] rather than a magic
33//! env var so the setting is discoverable.
34//!
35//! # Examples
36//!
37//! ```rust
38//! use codetether_agent::provider::{ContentPart, Message, Role};
39//! use codetether_agent::session::helper::experimental::apply_all;
40//!
41//! let tool_result = ContentPart::ToolResult {
42//! tool_call_id: "call_a".into(),
43//! content: "file contents: hello world".repeat(40),
44//! };
45//! let duplicate = ContentPart::ToolResult {
46//! tool_call_id: "call_b".into(),
47//! content: "file contents: hello world".repeat(40),
48//! };
49//!
50//! let mut msgs = vec![
51//! Message { role: Role::Tool, content: vec![tool_result] },
52//! Message { role: Role::Tool, content: vec![duplicate] },
53//! ];
54//!
55//! let stats = apply_all(&mut msgs);
56//! assert!(stats.total_bytes_saved > 0);
57//! assert!(stats.dedup_hits >= 1);
58//! ```
59
60pub mod dedup;
61pub mod lingua;
62pub mod pairing;
63pub mod snippet;
64pub mod streaming_llm;
65pub mod thinking_prune;
66pub mod tool_call_dedup;
67
68use crate::provider::Message;
69
70/// Aggregate outcome of every strategy in [`apply_all`].
71#[derive(Debug, Clone, Default, PartialEq, Eq)]
72pub struct ExperimentalStats {
73 /// Number of tool-result content blocks replaced by a dedup marker.
74 pub dedup_hits: usize,
75 /// Number of tool-result content blocks head/tail-snipped.
76 pub snippet_hits: usize,
77 /// Total bytes removed from the `Vec<Message>` across all strategies.
78 pub total_bytes_saved: usize,
79}
80
81impl ExperimentalStats {
82 fn merge(&mut self, other: ExperimentalStats) {
83 self.dedup_hits += other.dedup_hits;
84 self.snippet_hits += other.snippet_hits;
85 self.total_bytes_saved += other.total_bytes_saved;
86 }
87}
88
89/// Apply every experimental strategy in order, mutating `messages` in
90/// place. Returns aggregate statistics suitable for logging.
91///
92/// Order matters:
93///
94/// 1. [`dedup::dedup_tool_outputs`] runs first because it can eliminate
95/// a duplicate in full before [`snippet`] has to think about it.
96/// 2. [`snippet::snippet_stale_tool_outputs`] runs second, snipping any
97/// remaining oversized tool outputs older than the recency window.
98///
99/// # Examples
100///
101/// ```rust
102/// use codetether_agent::provider::{ContentPart, Message, Role};
103/// use codetether_agent::session::helper::experimental::apply_all;
104///
105/// let mut msgs: Vec<Message> = Vec::new();
106/// let stats = apply_all(&mut msgs);
107/// assert_eq!(stats.total_bytes_saved, 0);
108/// ```
109pub fn apply_all(messages: &mut Vec<Message>) -> ExperimentalStats {
110 let mut stats = ExperimentalStats::default();
111 stats.merge(thinking_prune::prune_thinking(messages));
112 stats.merge(tool_call_dedup::collapse_duplicate_calls(messages));
113 stats.merge(dedup::dedup_tool_outputs(messages));
114 stats.merge(snippet::snippet_stale_tool_outputs(messages));
115 stats.merge(lingua::prune_low_entropy(messages));
116 stats.merge(streaming_llm::trim_middle(messages));
117 // Correctness pass: repair any orphaned tool_call/tool_result
118 // pairs broken by the strategies above. Must run LAST.
119 stats.merge(pairing::repair_orphans(messages));
120 if stats.total_bytes_saved > 0 {
121 tracing::info!(
122 dedup_hits = stats.dedup_hits,
123 snippet_hits = stats.snippet_hits,
124 bytes_saved = stats.total_bytes_saved,
125 "experimental context strategies applied"
126 );
127 }
128 stats
129}