Skip to main content

codetether_agent/session/helper/experimental/
mod.rs

1//! Experimental context-management strategies applied before RLM compaction.
2//!
3//! The agentic loop re-sends the entire conversation every step, which
4//! means two structural wastes dominate token usage:
5//!
6//! 1. **Duplicate tool outputs.** Agents frequently re-read the same
7//!    file, re-run the same `ls`, or re-grep for the same pattern
8//!    across many steps. The verbatim content appears multiple times
9//!    in the history. See [`dedup`].
10//! 2. **Stale oversized tool outputs.** A 40 KB `read_file` result from
11//!    step 2 is rarely relevant at step 30, yet it still costs full
12//!    input tokens every turn. See [`snippet`].
13//!
14//! Both strategies are **lossy** in the strict sense but preserve
15//! referenceability: the model can always ask the agent to re-run the
16//! original tool call if it needs the full output back.
17//!
18//! # Composition
19//!
20//! [`apply_all`] runs every strategy in a fixed order against the live
21//! [`Message`] buffer, mutating in place. Callers (the two prompt loops)
22//! invoke it immediately before
23//! [`enforce_context_window`](super::compression::enforce_context_window)
24//! so the RLM compaction pass sees the already-shrunken buffer. The
25//! returned [`ExperimentalStats`] is logged at `info` level for
26//! observability.
27//!
28//! # Default-on, no config
29//!
30//! The conservative strategies in [`apply_all`] are always active —
31//! there is intentionally no env flag to disable them. More aggressive
32//! transforms should stay opt-in until they prove they do not erase
33//! still-relevant chat state.
34//!
35//! # Examples
36//!
37//! ```rust
38//! use codetether_agent::provider::{ContentPart, Message, Role};
39//! use codetether_agent::session::helper::experimental::apply_all;
40//!
41//! let tool_result = ContentPart::ToolResult {
42//!     tool_call_id: "call_a".into(),
43//!     content: "file contents: hello world".repeat(40),
44//! };
45//! let duplicate = ContentPart::ToolResult {
46//!     tool_call_id: "call_b".into(),
47//!     content: "file contents: hello world".repeat(40),
48//! };
49//!
50//! let mut msgs = vec![
51//!     Message { role: Role::Tool, content: vec![tool_result] },
52//!     Message { role: Role::Tool, content: vec![duplicate] },
53//! ];
54//!
55//! let stats = apply_all(&mut msgs);
56//! assert!(stats.total_bytes_saved > 0);
57//! assert!(stats.dedup_hits >= 1);
58//! ```
59
60pub mod dedup;
61pub mod lingua;
62pub mod pairing;
63#[allow(dead_code)]
64pub mod snippet;
65#[allow(dead_code)]
66pub mod streaming_llm;
67pub mod thinking_prune;
68#[allow(dead_code)]
69pub mod tool_call_dedup;
70
71use crate::provider::Message;
72
73/// Aggregate outcome of every strategy in [`apply_all`].
74#[derive(Debug, Clone, Default, PartialEq, Eq)]
75pub struct ExperimentalStats {
76    /// Number of tool-result content blocks replaced by a dedup marker.
77    pub dedup_hits: usize,
78    /// Number of tool-result content blocks head/tail-snipped.
79    pub snippet_hits: usize,
80    /// Total bytes removed from the `Vec<Message>` across all strategies.
81    pub total_bytes_saved: usize,
82}
83
84impl ExperimentalStats {
85    fn merge(&mut self, other: ExperimentalStats) {
86        self.dedup_hits += other.dedup_hits;
87        self.snippet_hits += other.snippet_hits;
88        self.total_bytes_saved += other.total_bytes_saved;
89    }
90}
91
92/// Apply the default-safe experimental strategies in order, mutating
93/// `messages` in place. Returns aggregate statistics suitable for
94/// logging.
95///
96/// Order matters:
97///
98/// 1. [`dedup::dedup_tool_outputs`] runs before text cleanup so repeated
99///    tool outputs collapse against the original bytes.
100/// 2. [`lingua::prune_low_entropy`] runs after that to strip formatting
101///    noise from older assistant text without touching semantics.
102///
103/// # Examples
104///
105/// ```rust
106/// use codetether_agent::provider::{ContentPart, Message, Role};
107/// use codetether_agent::session::helper::experimental::apply_all;
108///
109/// let mut msgs: Vec<Message> = Vec::new();
110/// let stats = apply_all(&mut msgs);
111/// assert_eq!(stats.total_bytes_saved, 0);
112/// ```
113pub fn apply_all(messages: &mut Vec<Message>) -> ExperimentalStats {
114    let mut stats = ExperimentalStats::default();
115    stats.merge(thinking_prune::prune_thinking(messages));
116    stats.merge(dedup::dedup_tool_outputs(messages));
117    stats.merge(lingua::prune_low_entropy(messages));
118    // Correctness pass: repair any orphaned tool_call/tool_result
119    // pairs broken by the strategies above. Must run LAST.
120    stats.merge(pairing::repair_orphans(messages));
121    if stats.total_bytes_saved > 0 {
122        tracing::info!(
123            dedup_hits = stats.dedup_hits,
124            snippet_hits = stats.snippet_hits,
125            bytes_saved = stats.total_bytes_saved,
126            "experimental context strategies applied"
127        );
128    }
129    stats
130}
131
132#[cfg(test)]
133mod apply_all_tests;
134#[cfg(test)]
135mod apply_all_tool_history_tests;