Skip to main content

codetether_agent/session/helper/experimental/
thinking_prune.rs

1//! Strip extended-thinking blocks from older messages.
2//!
3//! Modern reasoning models (Claude extended thinking, DeepSeek R1,
4//! GPT-5 reasoning, Gemini thought summaries) emit `Thinking` content
5//! parts that can be **10-100×** larger than the assistant's actual
6//! reply. These blocks help the *current* turn's decision but carry
7//! almost no value once the turn has produced its tool calls and the
8//! loop has moved on — the final answer/action already reflects them.
9//!
10//! This module removes [`ContentPart::Thinking`] from every message
11//! older than [`KEEP_LAST_MESSAGES`]. Recent thinking is preserved so
12//! the model can still reference its own recent chain-of-thought.
13//!
14//! # Safety
15//!
16//! * Providers that inject thinking for correctness (cache-coherent
17//!   thought signatures on Gemini `ToolCall`) are unaffected — those
18//!   signatures live on `ContentPart::ToolCall::thought_signature`,
19//!   not on `Thinking` blocks.
20//! * An assistant message whose *only* content was a thinking block
21//!   becomes empty; such messages are removed entirely to keep the
22//!   buffer a valid provider-consumable shape.
23//!
24//! # Always-on
25//!
26//! No config. Thinking blocks are known to be non-essential after the
27//! turn completes; stripping them is the single highest-ROI shrink for
28//! reasoning-heavy agent loops.
29
30use super::ExperimentalStats;
31use crate::provider::{ContentPart, Message, Role};
32
33/// Keep thinking blocks in this many trailing messages.
34pub const KEEP_LAST_MESSAGES: usize = 4;
35
36/// Strip `Thinking` parts from older messages and drop any messages
37/// that become empty as a result.
38///
39/// # Examples
40///
41/// ```rust
42/// use codetether_agent::provider::{ContentPart, Message, Role};
43/// use codetether_agent::session::helper::experimental::thinking_prune::{
44///     prune_thinking, KEEP_LAST_MESSAGES,
45/// };
46///
47/// let thinking = ContentPart::Thinking {
48///     text: "Let me consider every option...".repeat(100),
49/// };
50/// let reply = ContentPart::Text { text: "Here is my answer.".into() };
51///
52/// let mut msgs = vec![
53///     Message { role: Role::Assistant, content: vec![thinking.clone(), reply.clone()] },
54///     Message { role: Role::Assistant, content: vec![thinking.clone()] }, // thinking-only
55/// ];
56/// for i in 0..KEEP_LAST_MESSAGES + 1 {
57///     msgs.push(Message {
58///         role: Role::User,
59///         content: vec![ContentPart::Text { text: format!("q{i}") }],
60///     });
61/// }
62///
63/// let stats = prune_thinking(&mut msgs);
64/// assert!(stats.total_bytes_saved > 1000);
65/// // Thinking-only message was removed entirely.
66/// // Remaining first message retains the Text reply.
67/// assert!(matches!(&msgs[0].content[..], [ContentPart::Text { .. }]));
68/// ```
69pub fn prune_thinking(messages: &mut Vec<Message>) -> ExperimentalStats {
70    let mut stats = ExperimentalStats::default();
71    let total = messages.len();
72    if total <= KEEP_LAST_MESSAGES {
73        return stats;
74    }
75    let eligible = total - KEEP_LAST_MESSAGES;
76
77    // Prune parts in-place.
78    for msg in messages[..eligible].iter_mut() {
79        if msg.role != Role::Assistant {
80            continue;
81        }
82        let before: usize = msg.content.iter().map(thinking_bytes).sum();
83        msg.content
84            .retain(|p| !matches!(p, ContentPart::Thinking { .. }));
85        let after: usize = msg.content.iter().map(thinking_bytes).sum();
86        let saved = before.saturating_sub(after);
87        if saved > 0 {
88            stats.total_bytes_saved += saved;
89            stats.snippet_hits += 1;
90        }
91    }
92
93    // Drop assistant messages that became empty. Indices in [0, eligible).
94    let mut write = 0;
95    for read in 0..messages.len() {
96        let drop = read < eligible
97            && messages[read].role == Role::Assistant
98            && messages[read].content.is_empty();
99        if !drop {
100            if write != read {
101                messages.swap(write, read);
102            }
103            write += 1;
104        }
105    }
106    messages.truncate(write);
107
108    stats
109}
110
111fn thinking_bytes(p: &ContentPart) -> usize {
112    match p {
113        ContentPart::Thinking { text } => text.len(),
114        _ => 0,
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn recent_thinking_preserved() {
124        let mut msgs = vec![Message {
125            role: Role::Assistant,
126            content: vec![ContentPart::Thinking {
127                text: "x".repeat(5000),
128            }],
129        }];
130        let stats = prune_thinking(&mut msgs);
131        assert_eq!(stats.total_bytes_saved, 0);
132    }
133
134    #[test]
135    fn empty_assistant_dropped() {
136        let mut msgs = vec![Message {
137            role: Role::Assistant,
138            content: vec![ContentPart::Thinking {
139                text: "x".repeat(1000),
140            }],
141        }];
142        for i in 0..KEEP_LAST_MESSAGES + 1 {
143            msgs.push(Message {
144                role: Role::User,
145                content: vec![ContentPart::Text {
146                    text: format!("q{i}"),
147                }],
148            });
149        }
150        let before = msgs.len();
151        prune_thinking(&mut msgs);
152        assert_eq!(msgs.len(), before - 1);
153    }
154
155    #[test]
156    fn user_thinking_untouched() {
157        // Users can't really emit thinking, but defense-in-depth.
158        let mut msgs = vec![Message {
159            role: Role::User,
160            content: vec![ContentPart::Thinking {
161                text: "user-thought".repeat(100),
162            }],
163        }];
164        for i in 0..KEEP_LAST_MESSAGES + 1 {
165            msgs.push(Message {
166                role: Role::Assistant,
167                content: vec![ContentPart::Text {
168                    text: format!("r{i}"),
169                }],
170            });
171        }
172        let stats = prune_thinking(&mut msgs);
173        assert_eq!(stats.total_bytes_saved, 0);
174    }
175}