1use crate::protocol::{ChatMessage, Role};
2
3#[derive(Debug, Clone)]
5pub struct ContextConfig {
6 pub max_context_tokens: usize,
9 pub reserve_output_tokens: usize,
12}
13
14impl Default for ContextConfig {
15 fn default() -> Self {
16 Self {
17 max_context_tokens: 200_000,
18 reserve_output_tokens: 8_192,
19 }
20 }
21}
22
23impl ContextConfig {
24 pub fn input_budget(&self) -> usize {
26 self.max_context_tokens
27 .saturating_sub(self.reserve_output_tokens)
28 }
29}
30
31#[derive(Debug, Clone)]
33pub struct CompactionResult {
34 pub messages: Vec<ChatMessage>,
36 pub dropped_count: usize,
38 pub tokens_before: usize,
40 pub tokens_after: usize,
42}
43
44pub fn estimate_tokens(text: &str) -> usize {
49 let content_tokens = text.len().div_ceil(4);
52 content_tokens + 4
54}
55
56pub fn estimate_total_tokens(messages: &[ChatMessage]) -> usize {
58 messages.iter().map(|m| estimate_tokens(&m.content)).sum()
59}
60
61pub fn compact_messages(
71 messages: &[ChatMessage],
72 config: &ContextConfig,
73) -> Option<CompactionResult> {
74 let budget = config.input_budget();
75 let tokens_before = estimate_total_tokens(messages);
76
77 if tokens_before <= budget {
78 return None;
79 }
80
81 let mut system_msgs: Vec<(usize, &ChatMessage)> = Vec::new();
83 let mut other_msgs: Vec<(usize, &ChatMessage)> = Vec::new();
84
85 for (i, msg) in messages.iter().enumerate() {
86 if msg.role == Role::System {
87 system_msgs.push((i, msg));
88 } else {
89 other_msgs.push((i, msg));
90 }
91 }
92
93 let last_user = other_msgs.iter().rposition(|(_i, m)| m.role == Role::User);
97
98 let mut keep_indices: Vec<usize> = system_msgs.iter().map(|(i, _)| *i).collect();
99
100 if let Some(last_user_pos) = last_user {
101 keep_indices.push(other_msgs[last_user_pos].0);
102 }
103
104 let fixed_tokens: usize = keep_indices
106 .iter()
107 .map(|&i| estimate_tokens(&messages[i].content))
108 .sum();
109
110 let mut remaining_budget = budget.saturating_sub(fixed_tokens);
111
112 let mut candidate_indices: Vec<usize> = other_msgs
114 .iter()
115 .map(|(i, _)| *i)
116 .filter(|i| !keep_indices.contains(i))
117 .collect();
118
119 candidate_indices.reverse();
121
122 let mut accepted: Vec<usize> = Vec::new();
123 for idx in &candidate_indices {
124 let msg_tokens = estimate_tokens(&messages[*idx].content);
125 if msg_tokens <= remaining_budget {
126 accepted.push(*idx);
127 remaining_budget = remaining_budget.saturating_sub(msg_tokens);
128 }
129 }
131
132 keep_indices.extend(accepted);
134 keep_indices.sort_unstable();
135 keep_indices.dedup();
136
137 let dropped_count = messages.len() - keep_indices.len();
138 if dropped_count == 0 {
139 return None;
140 }
141
142 let compacted: Vec<ChatMessage> = keep_indices.iter().map(|&i| messages[i].clone()).collect();
143
144 let tokens_after = estimate_total_tokens(&compacted);
145
146 Some(CompactionResult {
151 messages: compacted,
152 dropped_count,
153 tokens_before,
154 tokens_after,
155 })
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161 use crate::protocol::ChatMessage;
162
163 fn make_msg(role: Role, content: &str) -> ChatMessage {
164 match role {
165 Role::System => ChatMessage::system(content),
166 Role::User => ChatMessage::user(content),
167 Role::Assistant => ChatMessage::assistant(content),
168 Role::Tool => ChatMessage::tool(content),
169 }
170 }
171
172 #[test]
173 fn no_compaction_when_within_budget() {
174 let messages = vec![
175 make_msg(Role::System, "You are an agent."),
176 make_msg(Role::User, "Hello"),
177 make_msg(Role::Assistant, "Hi there!"),
178 ];
179 let config = ContextConfig {
180 max_context_tokens: 100_000,
181 reserve_output_tokens: 4_096,
182 };
183 assert!(compact_messages(&messages, &config).is_none());
184 }
185
186 #[test]
187 fn compaction_drops_oldest_messages() {
188 let mut messages = vec![make_msg(Role::System, "sys")];
190 for i in 0..50 {
191 messages.push(make_msg(Role::User, &format!("user message {i}")));
192 messages.push(make_msg(
193 Role::Assistant,
194 &"long response text ".repeat(100),
195 ));
196 }
197 messages.push(make_msg(Role::User, "current question"));
199
200 let config = ContextConfig {
201 max_context_tokens: 2_000,
202 reserve_output_tokens: 500,
203 };
204
205 let result = compact_messages(&messages, &config).expect("should compact");
206 assert!(result.dropped_count > 0);
207 assert!(result.tokens_after <= config.input_budget());
208
209 assert_eq!(result.messages[0].role, Role::System);
211 assert_eq!(result.messages[0].content, "sys");
212
213 assert!(
215 result
216 .messages
217 .iter()
218 .any(|m| m.content == "current question")
219 );
220 }
221
222 #[test]
223 fn system_messages_always_preserved() {
224 let messages = vec![
225 make_msg(Role::System, "system prompt 1"),
226 make_msg(Role::System, "system prompt 2"),
227 make_msg(Role::User, &"long msg ".repeat(500)),
228 make_msg(Role::Assistant, &"long reply ".repeat(500)),
229 make_msg(Role::User, "current"),
230 ];
231
232 let config = ContextConfig {
233 max_context_tokens: 500,
234 reserve_output_tokens: 100,
235 };
236
237 let result = compact_messages(&messages, &config).expect("should compact");
238
239 let system_count = result
241 .messages
242 .iter()
243 .filter(|m| m.role == Role::System)
244 .count();
245 assert_eq!(system_count, 2);
246 }
247
248 #[test]
249 fn last_user_message_always_preserved() {
250 let messages = vec![
251 make_msg(Role::System, "sys"),
252 make_msg(Role::User, &"old ".repeat(500)),
253 make_msg(Role::Assistant, &"reply ".repeat(500)),
254 make_msg(Role::User, "latest question"),
255 ];
256
257 let config = ContextConfig {
258 max_context_tokens: 200,
259 reserve_output_tokens: 50,
260 };
261
262 let result = compact_messages(&messages, &config).expect("should compact");
263 let last = result.messages.last().expect("should have messages");
264 assert_eq!(last.content, "latest question");
265 }
266
267 #[test]
268 fn recency_bias_keeps_newer_messages() {
269 let mut messages = vec![make_msg(Role::System, "sys")];
270 for i in 0..20 {
272 messages.push(make_msg(
273 Role::User,
274 &format!("question {i} {}", "q".repeat(200)),
275 ));
276 messages.push(make_msg(
277 Role::Assistant,
278 &format!("answer {i} {}", "x".repeat(200)),
279 ));
280 }
281 messages.push(make_msg(Role::User, "final"));
282
283 let config = ContextConfig {
284 max_context_tokens: 1_000,
285 reserve_output_tokens: 200,
286 };
287
288 let result = compact_messages(&messages, &config).expect("should compact");
289
290 let has_recent = result
292 .messages
293 .iter()
294 .any(|m| m.content.contains("answer 19"));
295 let has_old = result
296 .messages
297 .iter()
298 .any(|m| m.content.contains("answer 0"));
299
300 assert!(has_recent, "Recent messages should be kept");
301 if result.dropped_count > 2 {
304 assert!(!has_old, "Old messages should be dropped first");
305 }
306 }
307
308 #[test]
309 fn empty_messages_no_compaction() {
310 let messages: Vec<ChatMessage> = Vec::new();
311 let config = ContextConfig::default();
312 assert!(compact_messages(&messages, &config).is_none());
313 }
314
315 #[test]
316 fn single_user_message_no_compaction_if_within_budget() {
317 let messages = vec![make_msg(Role::User, "hello")];
318 let config = ContextConfig::default();
319 assert!(compact_messages(&messages, &config).is_none());
320 }
321
322 #[test]
323 fn estimate_tokens_reasonable() {
324 let tokens = estimate_tokens("hello");
326 assert!(
327 tokens >= 5,
328 "Should have at least 5 tokens for 'hello' + overhead"
329 );
330 assert!(tokens <= 10, "Should not be excessive");
331
332 let empty = estimate_tokens("");
334 assert!(empty >= 4, "Should have overhead");
335
336 let long = estimate_tokens(&"a".repeat(1000));
338 assert!(long >= 250);
339 assert!(long <= 260);
340 }
341
342 #[test]
343 fn default_config_reasonable() {
344 let config = ContextConfig::default();
345 assert_eq!(config.max_context_tokens, 200_000);
346 assert_eq!(config.reserve_output_tokens, 8_192);
347 assert!(config.input_budget() > 190_000);
348 }
349
350 #[test]
351 fn compaction_result_reports_accurate_counts() {
352 let mut messages = vec![make_msg(Role::System, "sys")];
353 for i in 0..10 {
354 messages.push(make_msg(Role::User, &format!("q{i}")));
355 messages.push(make_msg(Role::Assistant, &"x".repeat(200)));
356 }
357 messages.push(make_msg(Role::User, "final"));
358
359 let config = ContextConfig {
360 max_context_tokens: 300,
361 reserve_output_tokens: 50,
362 };
363
364 let result = compact_messages(&messages, &config).expect("should compact");
365 assert_eq!(result.messages.len() + result.dropped_count, messages.len());
366 assert!(result.tokens_before > result.tokens_after);
367 }
368
369 #[test]
370 fn tool_messages_can_be_dropped() {
371 let messages = vec![
372 make_msg(Role::System, "sys"),
373 make_msg(Role::User, "q1"),
374 make_msg(Role::Assistant, "calling tool"),
375 ChatMessage::tool_result("call-1", "x".repeat(500)),
376 make_msg(Role::User, "current"),
377 ];
378
379 let config = ContextConfig {
380 max_context_tokens: 100,
381 reserve_output_tokens: 20,
382 };
383
384 let result = compact_messages(&messages, &config).expect("should compact");
385 assert!(result.dropped_count > 0);
387 assert!(result.messages.iter().any(|m| m.content == "current"));
388 }
389}