1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::fmt::Write;
4use vtcode_config::constants::context::DEFAULT_COMPACTION_TRIGGER_RATIO;
5
6use crate::llm::provider::{LLMProvider, LLMRequest, Message, MessageRole};
7use crate::llm::utils::truncate_to_token_limit;
8
9pub mod summarizer;
10
11const DEFAULT_COMPACTION_TARGET_THRESHOLD: f64 = 0.50;
12const DEFAULT_COMPACTION_KEEP_LAST_MESSAGES: usize = 10;
13const DEFAULT_RETAINED_USER_MESSAGE_TOKENS: usize = 20_000;
14const DEFAULT_RETAINED_USER_MESSAGES: usize = 4;
15const SUMMARY_PREFIX: &str = "Previous conversation summary:\n";
16
17#[derive(Debug, Clone)]
19pub struct CompactionConfig {
20 pub trigger_threshold: f64,
22 pub target_threshold: f64,
24 pub summary_prompt: String,
26 pub keep_last_messages: usize,
28 pub retained_user_message_tokens: usize,
30 pub retained_user_messages: usize,
32 pub always_summarize: bool,
34}
35
36impl Default for CompactionConfig {
37 fn default() -> Self {
38 Self {
39 trigger_threshold: DEFAULT_COMPACTION_TRIGGER_RATIO,
40 target_threshold: DEFAULT_COMPACTION_TARGET_THRESHOLD,
41 summary_prompt: "Summarize the conversation so far using this exact structure:\n\n## Goal\n[What the user is trying to accomplish]\n\n## Constraints & Preferences\n- [Requirements, preferences, or constraints from the user]\n\n## Progress\n### Done\n- [Completed work]\n\n### In Progress\n- [Current work]\n\n### Blocked\n- [Blocking issues, if any]\n\n## Key Decisions\n- **[Decision]**: [Reason]\n\n## Next Steps\n1. [Most important next step]\n\n## Critical Context\n- [Facts needed to continue]\n\nKeep it concise and actionable. Always preserve the current task objective and acceptance criteria, file paths that were read or modified, test results and error messages, and decisions with their reasoning."
42 .to_string(),
43 keep_last_messages: DEFAULT_COMPACTION_KEEP_LAST_MESSAGES,
44 retained_user_message_tokens: DEFAULT_RETAINED_USER_MESSAGE_TOKENS,
45 retained_user_messages: DEFAULT_RETAINED_USER_MESSAGES,
46 always_summarize: false,
47 }
48 }
49}
50
51pub async fn compact_history(
53 provider: &dyn LLMProvider,
54 model: &str,
55 history: &[Message],
56 config: &CompactionConfig,
57) -> Result<Vec<Message>> {
58 if history.is_empty() {
59 return Ok(Vec::new());
60 }
61
62 if !config.always_summarize && history.len() <= config.keep_last_messages {
63 return Ok(history.to_vec());
64 }
65
66 if !config.always_summarize && provider.supports_responses_compaction(model) {
67 return provider
68 .compact_history(model, history)
69 .await
70 .context("Failed to compact history via Responses compact endpoint");
71 }
72
73 let summary_prompt = build_summary_prompt(history, &config.summary_prompt);
74 let request = LLMRequest {
75 messages: vec![Message::user(summary_prompt)],
76 model: model.to_string(),
77 ..Default::default()
78 };
79
80 let response = provider
81 .generate(request)
82 .await
83 .context("Failed to generate compaction summary")?;
84
85 let summary = response.content.unwrap_or_default().trim().to_string();
86 Ok(build_local_compacted_history(
87 history,
88 &summary,
89 config.retained_user_message_tokens,
90 config.retained_user_messages,
91 ))
92}
93
94fn build_summary_prompt(history: &[Message], instructions: &str) -> String {
95 let mut formatted = String::new();
96 let now: DateTime<Utc> = Utc::now();
97 let _ = writeln!(
98 &mut formatted,
99 "Summary requested at {}.\n{}",
100 now.to_rfc3339(),
101 instructions
102 );
103
104 for message in history {
105 let role = match message.role {
106 MessageRole::System => "system",
107 MessageRole::User => "user",
108 MessageRole::Assistant => "assistant",
109 MessageRole::Tool => "tool",
110 };
111 let content = message.content.as_text();
112 if content.trim().is_empty() {
113 continue;
114 }
115 let _ = writeln!(&mut formatted, "\n[{}]\n{}", role, content.trim());
116 }
117
118 formatted
119}
120
121fn build_local_compacted_history(
122 history: &[Message],
123 summary: &str,
124 retained_user_message_tokens: usize,
125 retained_user_messages: usize,
126) -> Vec<Message> {
127 let retained_users = collect_retained_user_messages(
128 history,
129 retained_user_message_tokens,
130 retained_user_messages,
131 );
132 let mut new_history = Vec::with_capacity(retained_users.len().saturating_add(1));
133 new_history.push(Message::system(format!(
134 "{SUMMARY_PREFIX}{}",
135 summary.trim()
136 )));
137 new_history.extend(retained_users);
138 new_history
139}
140
141fn collect_retained_user_messages(
142 history: &[Message],
143 token_budget: usize,
144 max_messages: usize,
145) -> Vec<Message> {
146 if token_budget == 0 || max_messages == 0 {
147 return Vec::new();
148 }
149
150 let mut kept = Vec::new();
151 let mut remaining = token_budget;
152
153 for message in history.iter().rev() {
154 if kept.len() >= max_messages {
155 break;
156 }
157 if !is_real_user_message(message) {
158 continue;
159 }
160
161 let estimated = message.estimate_tokens();
162 if estimated <= remaining {
163 kept.push(message.clone());
164 remaining = remaining.saturating_sub(estimated);
165 continue;
166 }
167
168 if let Some(truncated) = truncate_user_message(message, remaining) {
169 kept.push(truncated);
170 }
171 break;
172 }
173
174 kept.reverse();
175 kept
176}
177
178fn is_real_user_message(message: &Message) -> bool {
179 message.role == MessageRole::User && !message.content.trim().is_empty()
180}
181
182fn truncate_user_message(message: &Message, token_budget: usize) -> Option<Message> {
183 if token_budget <= 4 {
184 return None;
185 }
186
187 let available_content_tokens = token_budget.saturating_sub(4);
188 let truncated =
189 truncate_to_token_limit(message.content.as_text().as_ref(), available_content_tokens);
190 let trimmed = truncated.trim();
191 if trimmed.is_empty() {
192 return None;
193 }
194
195 Some(Message::user(trimmed.to_string()))
196}
197
198#[cfg(test)]
199mod tests {
200 use super::{CompactionConfig, compact_history};
201 use crate::llm::provider::{
202 LLMError, LLMProvider, LLMRequest, LLMResponse, Message, MessageRole,
203 };
204 use async_trait::async_trait;
205
206 struct StubProvider;
207
208 struct NativeCompactionProvider;
209
210 #[async_trait]
211 impl LLMProvider for StubProvider {
212 fn name(&self) -> &str {
213 "stub"
214 }
215
216 async fn generate(&self, _request: LLMRequest) -> Result<LLMResponse, LLMError> {
217 Ok(LLMResponse::new("stub-model", "summary"))
218 }
219
220 fn supported_models(&self) -> Vec<String> {
221 vec!["stub-model".to_string()]
222 }
223
224 fn validate_request(&self, _request: &LLMRequest) -> Result<(), LLMError> {
225 Ok(())
226 }
227 }
228
229 #[async_trait]
230 impl LLMProvider for NativeCompactionProvider {
231 fn name(&self) -> &str {
232 "native"
233 }
234
235 async fn generate(&self, _request: LLMRequest) -> Result<LLMResponse, LLMError> {
236 Ok(LLMResponse::new("stub-model", "summary"))
237 }
238
239 fn supported_models(&self) -> Vec<String> {
240 vec!["stub-model".to_string()]
241 }
242
243 fn validate_request(&self, _request: &LLMRequest) -> Result<(), LLMError> {
244 Ok(())
245 }
246
247 fn supports_responses_compaction(&self, _model: &str) -> bool {
248 true
249 }
250
251 async fn compact_history(
252 &self,
253 _model: &str,
254 _history: &[Message],
255 ) -> Result<Vec<Message>, LLMError> {
256 Ok(vec![Message::system("provider compacted".to_string())])
257 }
258 }
259
260 #[tokio::test]
261 async fn compact_history_rebuilds_history_around_summary_and_users() {
262 let history = vec![
263 Message::assistant("setup".to_string()),
264 Message::user("first request".to_string()),
265 Message::assistant("working".to_string()),
266 Message::tool_response("call-1".to_string(), "done".to_string()),
267 Message::user("second request".to_string()),
268 Message::assistant("final reply".to_string()),
269 ];
270 let config = CompactionConfig {
271 always_summarize: true,
272 ..CompactionConfig::default()
273 };
274
275 let compacted = compact_history(&StubProvider, "stub-model", &history, &config)
276 .await
277 .expect("compacted history");
278
279 assert_eq!(compacted.len(), 3);
280 assert_eq!(
281 compacted[0].content.as_text(),
282 "Previous conversation summary:\nsummary"
283 );
284 assert_eq!(compacted[1].content.as_text(), "first request");
285 assert_eq!(compacted[2].content.as_text(), "second request");
286 assert!(compacted.iter().all(|message| {
287 message.role == MessageRole::System || message.role == MessageRole::User
288 }));
289 }
290
291 #[tokio::test]
292 async fn compact_history_truncates_oldest_retained_user_message_to_budget() {
293 let history = vec![
294 Message::user("alpha beta gamma delta epsilon zeta".to_string()),
295 Message::assistant("ack".to_string()),
296 Message::user("newest request".to_string()),
297 ];
298 let config = CompactionConfig {
299 always_summarize: true,
300 retained_user_message_tokens: 8,
301 ..CompactionConfig::default()
302 };
303
304 let compacted = compact_history(&StubProvider, "stub-model", &history, &config)
305 .await
306 .expect("compacted history");
307
308 assert_eq!(compacted.len(), 2);
309 assert_eq!(compacted[1].content.as_text(), "newest request");
310 }
311
312 #[tokio::test]
313 async fn compact_history_caps_retained_user_message_count() {
314 let history = vec![
315 Message::user("first request".to_string()),
316 Message::assistant("ack".to_string()),
317 Message::user("second request".to_string()),
318 Message::assistant("ack".to_string()),
319 Message::user("third request".to_string()),
320 Message::assistant("ack".to_string()),
321 Message::user("fourth request".to_string()),
322 Message::assistant("ack".to_string()),
323 Message::user("fifth request".to_string()),
324 ];
325 let config = CompactionConfig {
326 always_summarize: true,
327 retained_user_messages: 4,
328 ..CompactionConfig::default()
329 };
330
331 let compacted = compact_history(&StubProvider, "stub-model", &history, &config)
332 .await
333 .expect("compacted history");
334
335 let retained = compacted
336 .iter()
337 .skip(1)
338 .map(|message| message.content.as_text().to_string())
339 .collect::<Vec<_>>();
340 assert_eq!(
341 retained,
342 vec![
343 "second request".to_string(),
344 "third request".to_string(),
345 "fourth request".to_string(),
346 "fifth request".to_string(),
347 ]
348 );
349 }
350
351 #[tokio::test]
352 async fn compact_history_forces_local_summary_when_always_summarize_is_enabled() {
353 let history = vec![
354 Message::user("first request".to_string()),
355 Message::assistant("working".to_string()),
356 Message::user("second request".to_string()),
357 ];
358 let config = CompactionConfig {
359 always_summarize: true,
360 ..CompactionConfig::default()
361 };
362
363 let compacted = compact_history(&NativeCompactionProvider, "stub-model", &history, &config)
364 .await
365 .expect("compacted history");
366
367 assert_eq!(compacted.len(), 3);
368 assert_eq!(
369 compacted[0].content.as_text(),
370 "Previous conversation summary:\nsummary"
371 );
372 assert_eq!(compacted[1].content.as_text(), "first request");
373 assert_eq!(compacted[2].content.as_text(), "second request");
374 }
375
376 #[test]
377 fn default_summary_prompt_preserves_required_compaction_context() {
378 let prompt = CompactionConfig::default().summary_prompt;
379
380 assert!(prompt.contains("acceptance criteria"));
381 assert!(prompt.contains("file paths that were read or modified"));
382 assert!(prompt.contains("test results and error messages"));
383 assert!(prompt.contains("decisions with their reasoning"));
384 }
385}