codetether_agent/session/helper/experimental/mod.rs
1//! Experimental context-management strategies applied before RLM compaction.
2//!
3//! The agentic loop re-sends the entire conversation every step, which
4//! means two structural wastes dominate token usage:
5//!
6//! 1. **Duplicate tool outputs.** Agents frequently re-read the same
7//! file, re-run the same `ls`, or re-grep for the same pattern
8//! across many steps. The verbatim content appears multiple times
9//! in the history. See [`dedup`].
10//! 2. **Stale oversized tool outputs.** A 40 KB `read_file` result from
11//! step 2 is rarely relevant at step 30, yet it still costs full
12//! input tokens every turn. See [`snippet`].
13//!
14//! Both strategies are **lossy** in the strict sense but preserve
15//! referenceability: the model can always ask the agent to re-run the
16//! original tool call if it needs the full output back.
17//!
18//! # Composition
19//!
20//! [`apply_all`] runs every strategy in a fixed order against the live
21//! [`Message`] buffer, mutating in place. Callers (the two prompt loops)
22//! invoke it immediately before
23//! [`enforce_context_window`](super::compression::enforce_context_window)
24//! so the RLM compaction pass sees the already-shrunken buffer. The
25//! returned [`ExperimentalStats`] is logged at `info` level for
26//! observability.
27//!
28//! # Default-on, no config
29//!
30//! The conservative strategies in [`apply_all`] are always active —
31//! there is intentionally no env flag to disable them. More aggressive
32//! transforms should stay opt-in until they prove they do not erase
33//! still-relevant chat state.
34//!
35//! # Examples
36//!
37//! ```rust
38//! use codetether_agent::provider::{ContentPart, Message, Role};
39//! use codetether_agent::session::helper::experimental::apply_all;
40//!
41//! let tool_result = ContentPart::ToolResult {
42//! tool_call_id: "call_a".into(),
43//! content: "file contents: hello world".repeat(40),
44//! };
45//! let duplicate = ContentPart::ToolResult {
46//! tool_call_id: "call_b".into(),
47//! content: "file contents: hello world".repeat(40),
48//! };
49//!
50//! let mut msgs = vec![
51//! Message { role: Role::Tool, content: vec![tool_result] },
52//! Message { role: Role::Tool, content: vec![duplicate] },
53//! ];
54//!
55//! let stats = apply_all(&mut msgs);
56//! assert!(stats.total_bytes_saved > 0);
57//! assert!(stats.dedup_hits >= 1);
58//! ```
59
60pub mod dedup;
61pub mod lingua;
62pub mod pairing;
63#[allow(dead_code)]
64pub mod snippet;
65#[allow(dead_code)]
66pub mod streaming_llm;
67pub mod thinking_prune;
68#[allow(dead_code)]
69pub mod tool_call_dedup;
70
71use crate::provider::Message;
72
73/// Aggregate outcome of every strategy in [`apply_all`].
74#[derive(Debug, Clone, Default, PartialEq, Eq)]
75pub struct ExperimentalStats {
76 /// Number of tool-result content blocks replaced by a dedup marker.
77 pub dedup_hits: usize,
78 /// Number of tool-result content blocks head/tail-snipped.
79 pub snippet_hits: usize,
80 /// Total bytes removed from the `Vec<Message>` across all strategies.
81 pub total_bytes_saved: usize,
82}
83
84impl ExperimentalStats {
85 fn merge(&mut self, other: ExperimentalStats) {
86 self.dedup_hits += other.dedup_hits;
87 self.snippet_hits += other.snippet_hits;
88 self.total_bytes_saved += other.total_bytes_saved;
89 }
90}
91
92/// Apply the default-safe experimental strategies in order, mutating
93/// `messages` in place. Returns aggregate statistics suitable for
94/// logging.
95///
96/// Order matters:
97///
98/// 1. [`dedup::dedup_tool_outputs`] runs before text cleanup so repeated
99/// tool outputs collapse against the original bytes.
100/// 2. [`lingua::prune_low_entropy`] runs after that to strip formatting
101/// noise from older assistant text without touching semantics.
102///
103/// # Examples
104///
105/// ```rust
106/// use codetether_agent::provider::{ContentPart, Message, Role};
107/// use codetether_agent::session::helper::experimental::apply_all;
108///
109/// let mut msgs: Vec<Message> = Vec::new();
110/// let stats = apply_all(&mut msgs);
111/// assert_eq!(stats.total_bytes_saved, 0);
112/// ```
113pub fn apply_all(messages: &mut Vec<Message>) -> ExperimentalStats {
114 let mut stats = ExperimentalStats::default();
115 stats.merge(thinking_prune::prune_thinking(messages));
116 stats.merge(dedup::dedup_tool_outputs(messages));
117 stats.merge(lingua::prune_low_entropy(messages));
118 // Correctness pass: repair any orphaned tool_call/tool_result
119 // pairs broken by the strategies above. Must run LAST.
120 stats.merge(pairing::repair_orphans(messages));
121 if stats.total_bytes_saved > 0 {
122 tracing::info!(
123 dedup_hits = stats.dedup_hits,
124 snippet_hits = stats.snippet_hits,
125 bytes_saved = stats.total_bytes_saved,
126 "experimental context strategies applied"
127 );
128 }
129 stats
130}
131
132#[cfg(test)]
133mod apply_all_tests;
134#[cfg(test)]
135mod apply_all_tool_history_tests;