vtcode_core/compaction/
mod.rs

1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::fmt::Write;
4use vtcode_config::constants::context::DEFAULT_COMPACTION_TRIGGER_RATIO;
5
6use crate::llm::provider::{LLMProvider, LLMRequest, Message, MessageRole};
7use crate::llm::utils::truncate_to_token_limit;
8
9pub mod summarizer;
10
11const DEFAULT_COMPACTION_TARGET_THRESHOLD: f64 = 0.50;
12const DEFAULT_COMPACTION_KEEP_LAST_MESSAGES: usize = 10;
13const DEFAULT_RETAINED_USER_MESSAGE_TOKENS: usize = 20_000;
14const DEFAULT_RETAINED_USER_MESSAGES: usize = 4;
15const SUMMARY_PREFIX: &str = "Previous conversation summary:\n";
16
17/// Compaction configuration for context window management.
18#[derive(Debug, Clone)]
19pub struct CompactionConfig {
20    /// Threshold (0.0-1.0) at which to trigger compaction.
21    pub trigger_threshold: f64,
22    /// Target usage ratio (0.0-1.0) after compaction.
23    pub target_threshold: f64,
24    /// Prompt for summarization.
25    pub summary_prompt: String,
26    /// Legacy short-circuit used to skip local compaction for tiny histories.
27    pub keep_last_messages: usize,
28    /// Total token budget reserved for retaining real user messages verbatim.
29    pub retained_user_message_tokens: usize,
30    /// Maximum number of recent user messages to retain verbatim.
31    pub retained_user_messages: usize,
32    /// Force local summarization even for short histories and providers with native compaction.
33    pub always_summarize: bool,
34}
35
36impl Default for CompactionConfig {
37    fn default() -> Self {
38        Self {
39            trigger_threshold: DEFAULT_COMPACTION_TRIGGER_RATIO,
40            target_threshold: DEFAULT_COMPACTION_TARGET_THRESHOLD,
41            summary_prompt: "Summarize the conversation so far using this exact structure:\n\n## Goal\n[What the user is trying to accomplish]\n\n## Constraints & Preferences\n- [Requirements, preferences, or constraints from the user]\n\n## Progress\n### Done\n- [Completed work]\n\n### In Progress\n- [Current work]\n\n### Blocked\n- [Blocking issues, if any]\n\n## Key Decisions\n- **[Decision]**: [Reason]\n\n## Next Steps\n1. [Most important next step]\n\n## Critical Context\n- [Facts needed to continue]\n\nKeep it concise and actionable. Always preserve the current task objective and acceptance criteria, file paths that were read or modified, test results and error messages, and decisions with their reasoning."
42                .to_string(),
43            keep_last_messages: DEFAULT_COMPACTION_KEEP_LAST_MESSAGES,
44            retained_user_message_tokens: DEFAULT_RETAINED_USER_MESSAGE_TOKENS,
45            retained_user_messages: DEFAULT_RETAINED_USER_MESSAGES,
46            always_summarize: false,
47        }
48    }
49}
50
51/// Compact conversation history using the configured summarizer.
52pub async fn compact_history(
53    provider: &dyn LLMProvider,
54    model: &str,
55    history: &[Message],
56    config: &CompactionConfig,
57) -> Result<Vec<Message>> {
58    if history.is_empty() {
59        return Ok(Vec::new());
60    }
61
62    if !config.always_summarize && history.len() <= config.keep_last_messages {
63        return Ok(history.to_vec());
64    }
65
66    if !config.always_summarize && provider.supports_responses_compaction(model) {
67        return provider
68            .compact_history(model, history)
69            .await
70            .context("Failed to compact history via Responses compact endpoint");
71    }
72
73    let summary_prompt = build_summary_prompt(history, &config.summary_prompt);
74    let request = LLMRequest {
75        messages: vec![Message::user(summary_prompt)],
76        model: model.to_string(),
77        ..Default::default()
78    };
79
80    let response = provider
81        .generate(request)
82        .await
83        .context("Failed to generate compaction summary")?;
84
85    let summary = response.content.unwrap_or_default().trim().to_string();
86    Ok(build_local_compacted_history(
87        history,
88        &summary,
89        config.retained_user_message_tokens,
90        config.retained_user_messages,
91    ))
92}
93
94fn build_summary_prompt(history: &[Message], instructions: &str) -> String {
95    let mut formatted = String::new();
96    let now: DateTime<Utc> = Utc::now();
97    let _ = writeln!(
98        &mut formatted,
99        "Summary requested at {}.\n{}",
100        now.to_rfc3339(),
101        instructions
102    );
103
104    for message in history {
105        let role = match message.role {
106            MessageRole::System => "system",
107            MessageRole::User => "user",
108            MessageRole::Assistant => "assistant",
109            MessageRole::Tool => "tool",
110        };
111        let content = message.content.as_text();
112        if content.trim().is_empty() {
113            continue;
114        }
115        let _ = writeln!(&mut formatted, "\n[{}]\n{}", role, content.trim());
116    }
117
118    formatted
119}
120
121fn build_local_compacted_history(
122    history: &[Message],
123    summary: &str,
124    retained_user_message_tokens: usize,
125    retained_user_messages: usize,
126) -> Vec<Message> {
127    let retained_users = collect_retained_user_messages(
128        history,
129        retained_user_message_tokens,
130        retained_user_messages,
131    );
132    let mut new_history = Vec::with_capacity(retained_users.len().saturating_add(1));
133    new_history.push(Message::system(format!(
134        "{SUMMARY_PREFIX}{}",
135        summary.trim()
136    )));
137    new_history.extend(retained_users);
138    new_history
139}
140
141fn collect_retained_user_messages(
142    history: &[Message],
143    token_budget: usize,
144    max_messages: usize,
145) -> Vec<Message> {
146    if token_budget == 0 || max_messages == 0 {
147        return Vec::new();
148    }
149
150    let mut kept = Vec::new();
151    let mut remaining = token_budget;
152
153    for message in history.iter().rev() {
154        if kept.len() >= max_messages {
155            break;
156        }
157        if !is_real_user_message(message) {
158            continue;
159        }
160
161        let estimated = message.estimate_tokens();
162        if estimated <= remaining {
163            kept.push(message.clone());
164            remaining = remaining.saturating_sub(estimated);
165            continue;
166        }
167
168        if let Some(truncated) = truncate_user_message(message, remaining) {
169            kept.push(truncated);
170        }
171        break;
172    }
173
174    kept.reverse();
175    kept
176}
177
178fn is_real_user_message(message: &Message) -> bool {
179    message.role == MessageRole::User && !message.content.trim().is_empty()
180}
181
182fn truncate_user_message(message: &Message, token_budget: usize) -> Option<Message> {
183    if token_budget <= 4 {
184        return None;
185    }
186
187    let available_content_tokens = token_budget.saturating_sub(4);
188    let truncated =
189        truncate_to_token_limit(message.content.as_text().as_ref(), available_content_tokens);
190    let trimmed = truncated.trim();
191    if trimmed.is_empty() {
192        return None;
193    }
194
195    Some(Message::user(trimmed.to_string()))
196}
197
198#[cfg(test)]
199mod tests {
200    use super::{CompactionConfig, compact_history};
201    use crate::llm::provider::{
202        LLMError, LLMProvider, LLMRequest, LLMResponse, Message, MessageRole,
203    };
204    use async_trait::async_trait;
205
206    struct StubProvider;
207
208    struct NativeCompactionProvider;
209
210    #[async_trait]
211    impl LLMProvider for StubProvider {
212        fn name(&self) -> &str {
213            "stub"
214        }
215
216        async fn generate(&self, _request: LLMRequest) -> Result<LLMResponse, LLMError> {
217            Ok(LLMResponse::new("stub-model", "summary"))
218        }
219
220        fn supported_models(&self) -> Vec<String> {
221            vec!["stub-model".to_string()]
222        }
223
224        fn validate_request(&self, _request: &LLMRequest) -> Result<(), LLMError> {
225            Ok(())
226        }
227    }
228
229    #[async_trait]
230    impl LLMProvider for NativeCompactionProvider {
231        fn name(&self) -> &str {
232            "native"
233        }
234
235        async fn generate(&self, _request: LLMRequest) -> Result<LLMResponse, LLMError> {
236            Ok(LLMResponse::new("stub-model", "summary"))
237        }
238
239        fn supported_models(&self) -> Vec<String> {
240            vec!["stub-model".to_string()]
241        }
242
243        fn validate_request(&self, _request: &LLMRequest) -> Result<(), LLMError> {
244            Ok(())
245        }
246
247        fn supports_responses_compaction(&self, _model: &str) -> bool {
248            true
249        }
250
251        async fn compact_history(
252            &self,
253            _model: &str,
254            _history: &[Message],
255        ) -> Result<Vec<Message>, LLMError> {
256            Ok(vec![Message::system("provider compacted".to_string())])
257        }
258    }
259
260    #[tokio::test]
261    async fn compact_history_rebuilds_history_around_summary_and_users() {
262        let history = vec![
263            Message::assistant("setup".to_string()),
264            Message::user("first request".to_string()),
265            Message::assistant("working".to_string()),
266            Message::tool_response("call-1".to_string(), "done".to_string()),
267            Message::user("second request".to_string()),
268            Message::assistant("final reply".to_string()),
269        ];
270        let config = CompactionConfig {
271            always_summarize: true,
272            ..CompactionConfig::default()
273        };
274
275        let compacted = compact_history(&StubProvider, "stub-model", &history, &config)
276            .await
277            .expect("compacted history");
278
279        assert_eq!(compacted.len(), 3);
280        assert_eq!(
281            compacted[0].content.as_text(),
282            "Previous conversation summary:\nsummary"
283        );
284        assert_eq!(compacted[1].content.as_text(), "first request");
285        assert_eq!(compacted[2].content.as_text(), "second request");
286        assert!(compacted.iter().all(|message| {
287            message.role == MessageRole::System || message.role == MessageRole::User
288        }));
289    }
290
291    #[tokio::test]
292    async fn compact_history_truncates_oldest_retained_user_message_to_budget() {
293        let history = vec![
294            Message::user("alpha beta gamma delta epsilon zeta".to_string()),
295            Message::assistant("ack".to_string()),
296            Message::user("newest request".to_string()),
297        ];
298        let config = CompactionConfig {
299            always_summarize: true,
300            retained_user_message_tokens: 8,
301            ..CompactionConfig::default()
302        };
303
304        let compacted = compact_history(&StubProvider, "stub-model", &history, &config)
305            .await
306            .expect("compacted history");
307
308        assert_eq!(compacted.len(), 2);
309        assert_eq!(compacted[1].content.as_text(), "newest request");
310    }
311
312    #[tokio::test]
313    async fn compact_history_caps_retained_user_message_count() {
314        let history = vec![
315            Message::user("first request".to_string()),
316            Message::assistant("ack".to_string()),
317            Message::user("second request".to_string()),
318            Message::assistant("ack".to_string()),
319            Message::user("third request".to_string()),
320            Message::assistant("ack".to_string()),
321            Message::user("fourth request".to_string()),
322            Message::assistant("ack".to_string()),
323            Message::user("fifth request".to_string()),
324        ];
325        let config = CompactionConfig {
326            always_summarize: true,
327            retained_user_messages: 4,
328            ..CompactionConfig::default()
329        };
330
331        let compacted = compact_history(&StubProvider, "stub-model", &history, &config)
332            .await
333            .expect("compacted history");
334
335        let retained = compacted
336            .iter()
337            .skip(1)
338            .map(|message| message.content.as_text().to_string())
339            .collect::<Vec<_>>();
340        assert_eq!(
341            retained,
342            vec![
343                "second request".to_string(),
344                "third request".to_string(),
345                "fourth request".to_string(),
346                "fifth request".to_string(),
347            ]
348        );
349    }
350
351    #[tokio::test]
352    async fn compact_history_forces_local_summary_when_always_summarize_is_enabled() {
353        let history = vec![
354            Message::user("first request".to_string()),
355            Message::assistant("working".to_string()),
356            Message::user("second request".to_string()),
357        ];
358        let config = CompactionConfig {
359            always_summarize: true,
360            ..CompactionConfig::default()
361        };
362
363        let compacted = compact_history(&NativeCompactionProvider, "stub-model", &history, &config)
364            .await
365            .expect("compacted history");
366
367        assert_eq!(compacted.len(), 3);
368        assert_eq!(
369            compacted[0].content.as_text(),
370            "Previous conversation summary:\nsummary"
371        );
372        assert_eq!(compacted[1].content.as_text(), "first request");
373        assert_eq!(compacted[2].content.as_text(), "second request");
374    }
375
376    #[test]
377    fn default_summary_prompt_preserves_required_compaction_context() {
378        let prompt = CompactionConfig::default().summary_prompt;
379
380        assert!(prompt.contains("acceptance criteria"));
381        assert!(prompt.contains("file paths that were read or modified"));
382        assert!(prompt.contains("test results and error messages"));
383        assert!(prompt.contains("decisions with their reasoning"));
384    }
385}
vtcode_core/compaction/mod.rs

vtcode_core/compaction/
mod.rs