1use crate::config::ModelSettings;
34use crate::db::Database;
35use crate::persistence::Persistence;
36use crate::providers::{ChatMessage, LlmProvider};
37use anyhow::{Result, bail};
38use std::sync::Arc;
39use std::sync::atomic::{AtomicU32, Ordering};
40use tokio::sync::RwLock;
41
42pub const COMPACT_PRESERVE_COUNT: usize = 4;
44
45const PARTIAL_COMPACT_FRACTION: f64 = 0.5;
47
48const PARTIAL_COMPACT_THRESHOLD: usize = 12;
50
51const MAX_CONSECUTIVE_FAILURES: u32 = 3;
55
56static CONSECUTIVE_FAILURES: AtomicU32 = AtomicU32::new(0);
58
59pub fn reset_compact_failures() {
61 CONSECUTIVE_FAILURES.store(0, Ordering::Relaxed);
62}
63
64pub fn is_compact_circuit_broken() -> bool {
66 CONSECUTIVE_FAILURES.load(Ordering::Relaxed) >= MAX_CONSECUTIVE_FAILURES
67}
68
69pub fn record_compact_failure() -> bool {
71 let prev = CONSECUTIVE_FAILURES.fetch_add(1, Ordering::Relaxed);
72 prev + 1 >= MAX_CONSECUTIVE_FAILURES
73}
74
75fn record_compact_success() {
77 reset_compact_failures();
78}
79
80const MAX_TRUNCATION_RETRIES: usize = 3;
82
83const TRUNCATION_DROP_FRACTION: f64 = 0.2;
85
86#[derive(Debug)]
88pub struct CompactResult {
89 pub deleted: usize,
91 pub summary_tokens: usize,
93}
94
95#[derive(Debug)]
97pub enum CompactSkip {
98 PendingToolCalls,
100 TooShort(usize),
102 HistoryTooLarge,
105}
106
107pub async fn compact_session(
112 db: &Database,
113 session_id: &str,
114 max_context_tokens: usize,
115 model_settings: &crate::config::ModelSettings,
116 provider: &Arc<RwLock<Box<dyn LlmProvider>>>,
117) -> Result<std::result::Result<CompactResult, CompactSkip>> {
118 let prov = provider.read().await;
119 compact_session_with_provider(db, session_id, max_context_tokens, model_settings, &**prov).await
120}
121
122pub async fn compact_session_with_provider(
131 db: &Database,
132 session_id: &str,
133 max_context_tokens: usize,
134 model_settings: &crate::config::ModelSettings,
135 provider: &dyn LlmProvider,
136) -> Result<std::result::Result<CompactResult, CompactSkip>> {
137 if db.has_pending_tool_calls(session_id).await.unwrap_or(false) {
139 return Ok(Err(CompactSkip::PendingToolCalls));
140 }
141
142 let history = db.load_context(session_id).await?;
143
144 if history.len() < 4 {
145 return Ok(Err(CompactSkip::TooShort(history.len())));
146 }
147
148 let preserve_count = compute_preserve_count(history.len());
152
153 let compact_count = history.len().saturating_sub(preserve_count);
154 if compact_count == 0 {
155 return Ok(Err(CompactSkip::TooShort(history.len())));
156 }
157
158 let to_compact = &history[..compact_count];
160 let conversation_text = build_conversation_text(to_compact);
161
162 tracing::info!(
163 "Compacting {compact_count}/{} messages (preserving {preserve_count})",
164 history.len(),
165 );
166
167 let text_tokens = (conversation_text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN)
170 as usize
171 + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
172 let available = max_context_tokens.saturating_sub(4096);
173
174 let final_text = if text_tokens <= available {
177 conversation_text
178 } else {
179 match truncate_until_fits(to_compact, available) {
180 Some(text) => text,
181 None => return Ok(Err(CompactSkip::HistoryTooLarge)),
182 }
183 };
184
185 let summary_prompt = build_summary_prompt(&final_text);
186
187 let messages = vec![ChatMessage::text("user", &summary_prompt)];
188 let compact_settings = ModelSettings {
192 model: model_settings.model.clone(),
193 max_tokens: Some(4096),
194 temperature: Some(0.3),
195 thinking_budget: None,
196 reasoning_effort: None,
197 max_context_tokens: model_settings.max_context_tokens,
198 };
199 let response = provider.chat(&messages, &[], &compact_settings).await?;
200
201 let summary = match response.content {
202 Some(text) if !text.trim().is_empty() => text,
203 _ => bail!("LLM returned an empty summary"),
204 };
205
206 let summary = strip_analysis_block(&summary);
207 let compact_message = format!("[Compacted conversation summary]\n\n{summary}");
208 let deleted = db
209 .compact_session(session_id, &compact_message, preserve_count)
210 .await?;
211
212 record_compact_success();
213
214 Ok(Ok(CompactResult {
215 deleted,
216 summary_tokens: summary.len() / 4,
217 }))
218}
219
220fn compute_preserve_count(total: usize) -> usize {
228 if total < PARTIAL_COMPACT_THRESHOLD {
229 COMPACT_PRESERVE_COUNT
230 } else {
231 let keep = (total as f64 * (1.0 - PARTIAL_COMPACT_FRACTION)).ceil() as usize;
232 keep.max(COMPACT_PRESERVE_COUNT)
233 }
234}
235
236fn build_summary_prompt(conversation_text: &str) -> String {
241 format!(
242 "CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.\n\
243 Tool calls will be REJECTED and will waste your only turn.\n\
244 Your entire response must be plain text: an <analysis> block followed by a <summary> block.\n\
245 \n\
246 Your task is to create a detailed summary of the conversation so far, paying close \n\
247 attention to the user's explicit requests and your previous actions.\n\
248 This summary should be thorough in capturing technical details, code patterns, and \n\
249 architectural decisions that would be essential for continuing development work \n\
250 without losing context.\n\
251 \n\
252 Before providing your final summary, wrap your analysis in <analysis> tags to \n\
253 organize your thoughts and ensure you've covered all necessary points. In your analysis:\n\
254 \n\
255 1. Chronologically analyze each message. For each section thoroughly identify:\n\
256 - The user's explicit requests and intents\n\
257 - Your approach to addressing them\n\
258 - Key decisions, technical concepts and code patterns\n\
259 - Specific details: file names, code snippets, function signatures, file edits\n\
260 - Errors encountered and how they were fixed\n\
261 - Specific user feedback, especially corrections\n\
262 2. Double-check for technical accuracy and completeness.\n\
263 \n\
264 Your summary should include these sections:\n\
265 \n\
266 1. **Primary Request and Intent**: Capture ALL of the user's explicit requests in detail.\n\
267 2. **Key Technical Concepts**: List all important technologies and frameworks discussed.\n\
268 3. **Files and Code Sections**: Enumerate specific files examined, modified, or created. \n\
269 Include code snippets where applicable and a summary of why each file matters.\n\
270 **Be exhaustive about file paths** — once compaction runs, the only record of\n\
271 files touched in the compacted range is this summary, so missing a path means\n\
272 losing it for the rest of the session. Group as: created / modified / deleted.\n\
273 4. **Errors and Fixes**: List all errors and how they were resolved. Note user feedback.\n\
274 5. **Problem Solving**: Document problems solved and ongoing troubleshooting.\n\
275 6. **All User Messages**: List ALL user messages (not tool results). Critical for \n\
276 preserving feedback and changing intent.\n\
277 7. **Pending Tasks**: Outline anything unfinished or deferred.\n\
278 **Preserve every outstanding TodoWrite item verbatim** with its current status\n\
279 (pending / in_progress). Compaction is the only mechanism that defends plan\n\
280 continuity across context-window pressure (`DESIGN.md § Progress Tracking:\n\
281 Model-Owned, History-Persisted, Engine-Surfaced`) — the system prompt does NOT\n\
282 re-inject the todo list, so anything dropped here is gone.\n\
283 8. **Current Work**: Describe precisely what was being worked on immediately before \n\
284 this summary. Include file names and code snippets.\n\
285 9. **Optional Next Step**: Only if directly in line with the user's most recent \n\
286 explicit request. Include direct quotes from the conversation to prevent drift.\n\
287 \n\
288 Format your response as:\n\
289 \n\
290 <analysis>\n\
291 [Your thought process ensuring all points are covered]\n\
292 </analysis>\n\
293 \n\
294 <summary>\n\
295 1. Primary Request and Intent:\n\
296 [Detailed description]\n\
297 ...\n\
298 </summary>\n\
299 \n\
300 REMINDER: Do NOT call any tools. Respond with plain text only.\n\
301 \n\
302 ---\n\n{conversation_text}"
303 )
304}
305
306pub fn strip_analysis_block(summary: &str) -> String {
325 let stripped = if let Some(start) = summary.find("<analysis>") {
327 if let Some(end) = summary.find("</analysis>") {
328 let after = end + "</analysis>".len();
329 format!("{}{}", &summary[..start], &summary[after..])
330 } else {
331 summary.to_string()
332 }
333 } else {
334 summary.to_string()
335 };
336
337 let stripped = if let Some(start) = stripped.find("<summary>") {
339 if let Some(end) = stripped.find("</summary>") {
340 let content_start = start + "<summary>".len();
341 stripped[content_start..end].trim().to_string()
342 } else {
343 stripped
344 }
345 } else {
346 stripped
347 };
348
349 let mut result = String::new();
351 let mut prev_empty = false;
352 for line in stripped.lines() {
353 let is_empty = line.trim().is_empty();
354 if is_empty && prev_empty {
355 continue;
356 }
357 if !result.is_empty() {
358 result.push('\n');
359 }
360 result.push_str(line);
361 prev_empty = is_empty;
362 }
363 result.trim().to_string()
364}
365
366fn truncate_until_fits(history: &[crate::db::Message], available_tokens: usize) -> Option<String> {
370 let total = history.len();
371 let min_keep = COMPACT_PRESERVE_COUNT + 1;
373 if total <= min_keep {
374 return None;
375 }
376
377 let mut drop_count = 0usize;
378 for attempt in 0..MAX_TRUNCATION_RETRIES {
379 let summarizable = total.saturating_sub(drop_count);
381 let to_drop = (summarizable as f64 * TRUNCATION_DROP_FRACTION).ceil() as usize;
382 drop_count += to_drop.max(1); if total.saturating_sub(drop_count) < min_keep {
386 drop_count = total - min_keep;
387 }
388
389 let truncated = &history[drop_count..];
390 let text = build_conversation_text(truncated);
391 let text_tokens = (text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN) as usize
392 + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
393
394 tracing::info!(
395 "Truncation attempt {}: dropped {drop_count}/{total} messages, \
396 ~{text_tokens} tokens (budget: {available_tokens})",
397 attempt + 1,
398 );
399
400 if text_tokens <= available_tokens {
401 return Some(text);
402 }
403 }
404
405 None
406}
407
408fn build_conversation_text(history: &[crate::db::Message]) -> String {
415 let mut text = String::new();
416 for msg in history {
417 let role = msg.role.as_str();
418 if let Some(ref content) = msg.content {
419 let truncated: String = content.chars().take(2000).collect();
420 text.push_str(&format!("[{role}]: {truncated}\n\n"));
421 }
422 if let Some(ref tool_calls) = msg.tool_calls {
423 let truncated: String = tool_calls.chars().take(500).collect();
424 text.push_str(&format!("[{role} tool_calls]: {truncated}\n\n"));
425 }
426 }
427 text
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433 use crate::db::Message;
434
435 fn make_msg(role: &str, content: Option<&str>, tool_calls: Option<&str>) -> Message {
436 Message {
437 id: 0,
438 session_id: String::new(),
439 role: role.parse().unwrap_or(crate::db::Role::User),
440 content: content.map(String::from),
441 full_content: None,
442 tool_calls: tool_calls.map(String::from),
443 tool_call_id: None,
444 prompt_tokens: None,
445 completion_tokens: None,
446 cache_read_tokens: None,
447 cache_creation_tokens: None,
448 thinking_tokens: None,
449 thinking_content: None,
450 created_at: None,
451 }
452 }
453
454 #[test]
455 fn test_circuit_breaker() {
456 reset_compact_failures();
457 assert!(!is_compact_circuit_broken());
458
459 assert!(!record_compact_failure()); assert!(!is_compact_circuit_broken());
461
462 assert!(!record_compact_failure()); assert!(!is_compact_circuit_broken());
464
465 assert!(record_compact_failure()); assert!(is_compact_circuit_broken());
467
468 reset_compact_failures();
470 assert!(!is_compact_circuit_broken());
471 }
472
473 #[test]
474 fn test_empty_history() {
475 assert_eq!(build_conversation_text(&[]), "");
476 }
477
478 #[test]
479 fn test_basic_conversation() {
480 let msgs = vec![
481 make_msg("user", Some("hello"), None),
482 make_msg("assistant", Some("hi"), None),
483 ];
484 let text = build_conversation_text(&msgs);
485 assert!(text.contains("[user]: hello"));
486 assert!(text.contains("[assistant]: hi"));
487 }
488
489 #[test]
490 fn test_truncates_long_content_per_message() {
491 let long = "x".repeat(3000);
492 let msgs = vec![make_msg("user", Some(&long), None)];
493 let text = build_conversation_text(&msgs);
494 assert!(text.len() < 2100);
496 }
497
498 #[test]
499 fn test_no_total_cap() {
500 let content = "y".repeat(500);
502 let msgs: Vec<_> = (0..50)
503 .map(|_| make_msg("user", Some(&content), None))
504 .collect();
505 let text = build_conversation_text(&msgs);
506 assert!(text.len() > 20_000);
508 assert!(!text.contains("truncated"));
509 }
510
511 #[test]
512 fn test_multibyte_boundary_safe() {
513 let mut content = "a".repeat(1999);
515 content.push('\u{1f43b}'); content.push_str("after");
517 let msgs = vec![make_msg("user", Some(&content), None)];
518 let text = build_conversation_text(&msgs);
519 assert!(text.contains("\u{1f43b}") || !text.contains("after"));
521 }
522
523 #[test]
524 fn test_tool_calls_included() {
525 let msgs = vec![make_msg("assistant", None, Some("{\"name\": \"Read\"}"))];
526 let text = build_conversation_text(&msgs);
527 assert!(text.contains("tool_calls"));
528 assert!(text.contains("Read"));
529 }
530
531 #[test]
532 fn test_none_content_skipped() {
533 let msgs = vec![make_msg("tool", None, None)];
534 let text = build_conversation_text(&msgs);
535 assert_eq!(text, "");
536 }
537
538 #[test]
539 fn test_strip_analysis_block() {
540 let input = "<analysis>\nthinking here\n</analysis>\n\n<summary>\n1. Primary Request:\n Build a thing\n</summary>";
541 let result = strip_analysis_block(input);
542 assert!(result.contains("Primary Request"));
543 assert!(!result.contains("<analysis>"));
544 assert!(!result.contains("thinking here"));
545 assert!(!result.contains("<summary>"));
546 }
547
548 #[test]
549 fn test_strip_analysis_no_tags() {
550 let input = "Just a plain summary";
551 assert_eq!(strip_analysis_block(input), "Just a plain summary");
552 }
553
554 #[test]
555 fn test_strip_analysis_only_summary_tags() {
556 let input = "<summary>\nThe good stuff\n</summary>";
557 let result = strip_analysis_block(input);
558 assert_eq!(result, "The good stuff");
559 }
560
561 #[test]
562 fn test_truncate_until_fits_drops_oldest() {
563 let msgs: Vec<_> = (0..20)
565 .map(|i| {
566 make_msg(
567 "user",
568 Some(&format!("Message number {i} with some padding text here")),
569 None,
570 )
571 })
572 .collect();
573
574 let result = truncate_until_fits(&msgs, 250);
578 assert!(result.is_some(), "should succeed after truncation");
579 let text = result.unwrap();
580 assert!(text.contains("Message number 19"));
582 assert!(!text.contains("Message number 0"));
583 }
584
585 #[test]
586 fn test_truncate_until_fits_too_few_messages() {
587 let msgs: Vec<_> = (0..5)
589 .map(|_| make_msg("user", Some(&"x".repeat(10_000)), None))
590 .collect();
591 let result = truncate_until_fits(&msgs, 10);
593 assert!(result.is_none());
594 }
595
596 #[test]
597 fn test_truncate_until_fits_already_fits() {
598 let msgs: Vec<_> = (0..10)
599 .map(|i| make_msg("user", Some(&format!("Short {i}")), None))
600 .collect();
601 let result = truncate_until_fits(&msgs, 100_000);
603 assert!(result.is_some());
604 let text = result.unwrap();
605 assert!(text.contains("Short 9"));
608 }
609
610 #[test]
611 fn test_compute_preserve_count_short_sessions() {
612 assert_eq!(compute_preserve_count(4), 4);
614 assert_eq!(compute_preserve_count(8), 4);
615 assert_eq!(compute_preserve_count(11), 4);
616 }
617
618 #[test]
619 fn test_compute_preserve_count_partial() {
620 assert_eq!(compute_preserve_count(12), 6);
622 assert_eq!(compute_preserve_count(20), 10);
624 assert_eq!(compute_preserve_count(50), 25);
626 assert_eq!(compute_preserve_count(100), 50);
628 }
629
630 #[test]
631 fn test_compute_preserve_count_never_below_minimum() {
632 for n in 0..200 {
634 assert!(compute_preserve_count(n) >= COMPACT_PRESERVE_COUNT);
635 }
636 }
637
638 #[test]
641 fn test_build_summary_prompt_embeds_conversation() {
642 let text = build_summary_prompt("[user]: hello\n\n[assistant]: hi\n\n");
643 assert!(
644 text.contains("[user]: hello"),
645 "prompt should embed the conversation text verbatim"
646 );
647 assert!(text.contains("[assistant]: hi"));
648 }
649
650 #[test]
651 fn test_build_summary_prompt_instructs_no_tool_calls() {
652 let text = build_summary_prompt("some conversation");
653 assert!(
654 text.contains("Do NOT call any tools"),
655 "prompt must forbid tool calls"
656 );
657 assert!(text.contains("CRITICAL"));
658 }
659
660 #[test]
661 fn test_build_summary_prompt_requests_analysis_and_summary_tags() {
662 let text = build_summary_prompt("some conversation");
663 assert!(
664 text.contains("<analysis>"),
665 "prompt should ask for <analysis> block"
666 );
667 assert!(
668 text.contains("<summary>"),
669 "prompt should ask for <summary> block"
670 );
671 }
672
673 #[test]
676 fn test_build_conversation_text_tool_calls_truncated_at_500() {
677 let long_tc = "T".repeat(600);
678 let msgs = vec![make_msg("assistant", None, Some(&long_tc))];
679 let text = build_conversation_text(&msgs);
680 assert!(
682 text.len() <= 550,
683 "tool_calls should be capped at 500 chars"
684 );
685 }
686
687 #[test]
688 fn test_build_conversation_text_both_content_and_tool_calls() {
689 let msgs = vec![make_msg(
690 "assistant",
691 Some("I will read the file"),
692 Some("{\"name\": \"Read\"}"),
693 )];
694 let text = build_conversation_text(&msgs);
695 assert!(text.contains("I will read the file"));
696 assert!(text.contains("tool_calls"));
697 }
698
699 #[test]
702 fn test_strip_analysis_unclosed_tag_passthrough() {
703 let input = "<analysis>\nthinking...\n1. Primary Request: build a thing";
705 let result = strip_analysis_block(input);
706 assert!(
707 result.contains("thinking"),
708 "unclosed analysis tag should leave text intact"
709 );
710 }
711
712 #[test]
713 fn test_strip_analysis_trims_extra_whitespace() {
714 let input = "<analysis>\nthink\n</analysis>\n\n\n\n<summary>\nClean content\n</summary>";
715 let result = strip_analysis_block(input);
716 assert!(!result.starts_with('\n'));
718 assert!(!result.ends_with('\n'));
719 assert_eq!(result, "Clean content");
720 }
721
722 }