1use crate::config::ModelSettings;
34use crate::db::Database;
35use crate::persistence::Persistence;
36use crate::providers::{ChatMessage, LlmProvider};
37use anyhow::{Result, bail};
38use std::sync::Arc;
39use std::sync::atomic::{AtomicU32, Ordering};
40use tokio::sync::RwLock;
41
42pub const COMPACT_PRESERVE_COUNT: usize = 4;
44
45const PARTIAL_COMPACT_FRACTION: f64 = 0.5;
47
48const PARTIAL_COMPACT_THRESHOLD: usize = 12;
50
51const MAX_CONSECUTIVE_FAILURES: u32 = 3;
55
56static CONSECUTIVE_FAILURES: AtomicU32 = AtomicU32::new(0);
58
59pub fn reset_compact_failures() {
61 CONSECUTIVE_FAILURES.store(0, Ordering::Relaxed);
62}
63
64pub fn is_compact_circuit_broken() -> bool {
66 CONSECUTIVE_FAILURES.load(Ordering::Relaxed) >= MAX_CONSECUTIVE_FAILURES
67}
68
69pub fn record_compact_failure() -> bool {
71 let prev = CONSECUTIVE_FAILURES.fetch_add(1, Ordering::Relaxed);
72 prev + 1 >= MAX_CONSECUTIVE_FAILURES
73}
74
75fn record_compact_success() {
77 reset_compact_failures();
78}
79
80const MAX_TRUNCATION_RETRIES: usize = 3;
82
83const TRUNCATION_DROP_FRACTION: f64 = 0.2;
85
86#[derive(Debug)]
88pub struct CompactResult {
89 pub deleted: usize,
91 pub summary_tokens: usize,
93}
94
95#[derive(Debug)]
97pub enum CompactSkip {
98 PendingToolCalls,
100 TooShort(usize),
102 HistoryTooLarge,
105}
106
107pub async fn compact_session(
112 db: &Database,
113 session_id: &str,
114 max_context_tokens: usize,
115 model_settings: &crate::config::ModelSettings,
116 provider: &Arc<RwLock<Box<dyn LlmProvider>>>,
117) -> Result<std::result::Result<CompactResult, CompactSkip>> {
118 let prov = provider.read().await;
119 compact_session_with_provider(db, session_id, max_context_tokens, model_settings, &**prov).await
120}
121
122pub async fn compact_session_with_provider(
131 db: &Database,
132 session_id: &str,
133 max_context_tokens: usize,
134 model_settings: &crate::config::ModelSettings,
135 provider: &dyn LlmProvider,
136) -> Result<std::result::Result<CompactResult, CompactSkip>> {
137 if db.has_pending_tool_calls(session_id).await.unwrap_or(false) {
139 return Ok(Err(CompactSkip::PendingToolCalls));
140 }
141
142 let history = db.load_context(session_id).await?;
143
144 if history.len() < 4 {
145 return Ok(Err(CompactSkip::TooShort(history.len())));
146 }
147
148 let preserve_count = compute_preserve_count(history.len());
152
153 let compact_count = history.len().saturating_sub(preserve_count);
154 if compact_count == 0 {
155 return Ok(Err(CompactSkip::TooShort(history.len())));
156 }
157
158 let to_compact = &history[..compact_count];
160 let conversation_text = build_conversation_text(to_compact);
161
162 tracing::info!(
163 "Compacting {compact_count}/{} messages (preserving {preserve_count})",
164 history.len(),
165 );
166
167 let text_tokens = (conversation_text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN)
170 as usize
171 + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
172 let available = max_context_tokens.saturating_sub(4096);
173
174 let final_text = if text_tokens <= available {
177 conversation_text
178 } else {
179 match truncate_until_fits(to_compact, available) {
180 Some(text) => text,
181 None => return Ok(Err(CompactSkip::HistoryTooLarge)),
182 }
183 };
184
185 let summary_prompt = build_summary_prompt(&final_text);
186
187 let messages = vec![ChatMessage::text("user", &summary_prompt)];
188 let compact_settings = ModelSettings {
192 model: model_settings.model.clone(),
193 max_tokens: Some(4096),
194 temperature: Some(0.3),
195 thinking_budget: None,
196 reasoning_effort: None,
197 max_context_tokens: model_settings.max_context_tokens,
198 };
199 let response = provider.chat(&messages, &[], &compact_settings).await?;
200
201 let summary = match response.content {
202 Some(text) if !text.trim().is_empty() => text,
203 _ => bail!("LLM returned an empty summary"),
204 };
205
206 let summary = strip_analysis_block(&summary);
207 let compact_message = format!("[Compacted conversation summary]\n\n{summary}");
208 let deleted = db
209 .compact_session(session_id, &compact_message, preserve_count)
210 .await?;
211
212 record_compact_success();
213
214 Ok(Ok(CompactResult {
215 deleted,
216 summary_tokens: summary.len() / 4,
217 }))
218}
219
220fn compute_preserve_count(total: usize) -> usize {
228 if total < PARTIAL_COMPACT_THRESHOLD {
229 COMPACT_PRESERVE_COUNT
230 } else {
231 let keep = (total as f64 * (1.0 - PARTIAL_COMPACT_FRACTION)).ceil() as usize;
232 keep.max(COMPACT_PRESERVE_COUNT)
233 }
234}
235
236fn build_summary_prompt(conversation_text: &str) -> String {
241 format!(
242 "CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.\n\
243 Tool calls will be REJECTED and will waste your only turn.\n\
244 Your entire response must be plain text: an <analysis> block followed by a <summary> block.\n\
245 \n\
246 Your task is to create a detailed summary of the conversation so far, paying close \n\
247 attention to the user's explicit requests and your previous actions.\n\
248 This summary should be thorough in capturing technical details, code patterns, and \n\
249 architectural decisions that would be essential for continuing development work \n\
250 without losing context.\n\
251 \n\
252 Before providing your final summary, wrap your analysis in <analysis> tags to \n\
253 organize your thoughts and ensure you've covered all necessary points. In your analysis:\n\
254 \n\
255 1. Chronologically analyze each message. For each section thoroughly identify:\n\
256 - The user's explicit requests and intents\n\
257 - Your approach to addressing them\n\
258 - Key decisions, technical concepts and code patterns\n\
259 - Specific details: file names, code snippets, function signatures, file edits\n\
260 - Errors encountered and how they were fixed\n\
261 - Specific user feedback, especially corrections\n\
262 2. Double-check for technical accuracy and completeness.\n\
263 \n\
264 Your summary should include these sections:\n\
265 \n\
266 1. **Primary Request and Intent**: Capture ALL of the user's explicit requests in detail.\n\
267 2. **Key Technical Concepts**: List all important technologies and frameworks discussed.\n\
268 3. **Files and Code Sections**: Enumerate specific files examined, modified, or created. \n\
269 Include code snippets where applicable and a summary of why each file matters.\n\
270 4. **Errors and Fixes**: List all errors and how they were resolved. Note user feedback.\n\
271 5. **Problem Solving**: Document problems solved and ongoing troubleshooting.\n\
272 6. **All User Messages**: List ALL user messages (not tool results). Critical for \n\
273 preserving feedback and changing intent.\n\
274 7. **Pending Tasks**: Outline anything unfinished or deferred.\n\
275 8. **Current Work**: Describe precisely what was being worked on immediately before \n\
276 this summary. Include file names and code snippets.\n\
277 9. **Optional Next Step**: Only if directly in line with the user's most recent \n\
278 explicit request. Include direct quotes from the conversation to prevent drift.\n\
279 \n\
280 Format your response as:\n\
281 \n\
282 <analysis>\n\
283 [Your thought process ensuring all points are covered]\n\
284 </analysis>\n\
285 \n\
286 <summary>\n\
287 1. Primary Request and Intent:\n\
288 [Detailed description]\n\
289 ...\n\
290 </summary>\n\
291 \n\
292 REMINDER: Do NOT call any tools. Respond with plain text only.\n\
293 \n\
294 ---\n\n{conversation_text}"
295 )
296}
297
298pub fn strip_analysis_block(summary: &str) -> String {
317 let stripped = if let Some(start) = summary.find("<analysis>") {
319 if let Some(end) = summary.find("</analysis>") {
320 let after = end + "</analysis>".len();
321 format!("{}{}", &summary[..start], &summary[after..])
322 } else {
323 summary.to_string()
324 }
325 } else {
326 summary.to_string()
327 };
328
329 let stripped = if let Some(start) = stripped.find("<summary>") {
331 if let Some(end) = stripped.find("</summary>") {
332 let content_start = start + "<summary>".len();
333 stripped[content_start..end].trim().to_string()
334 } else {
335 stripped
336 }
337 } else {
338 stripped
339 };
340
341 let mut result = String::new();
343 let mut prev_empty = false;
344 for line in stripped.lines() {
345 let is_empty = line.trim().is_empty();
346 if is_empty && prev_empty {
347 continue;
348 }
349 if !result.is_empty() {
350 result.push('\n');
351 }
352 result.push_str(line);
353 prev_empty = is_empty;
354 }
355 result.trim().to_string()
356}
357
358fn truncate_until_fits(history: &[crate::db::Message], available_tokens: usize) -> Option<String> {
362 let total = history.len();
363 let min_keep = COMPACT_PRESERVE_COUNT + 1;
365 if total <= min_keep {
366 return None;
367 }
368
369 let mut drop_count = 0usize;
370 for attempt in 0..MAX_TRUNCATION_RETRIES {
371 let summarizable = total.saturating_sub(drop_count);
373 let to_drop = (summarizable as f64 * TRUNCATION_DROP_FRACTION).ceil() as usize;
374 drop_count += to_drop.max(1); if total.saturating_sub(drop_count) < min_keep {
378 drop_count = total - min_keep;
379 }
380
381 let truncated = &history[drop_count..];
382 let text = build_conversation_text(truncated);
383 let text_tokens = (text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN) as usize
384 + crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
385
386 tracing::info!(
387 "Truncation attempt {}: dropped {drop_count}/{total} messages, \
388 ~{text_tokens} tokens (budget: {available_tokens})",
389 attempt + 1,
390 );
391
392 if text_tokens <= available_tokens {
393 return Some(text);
394 }
395 }
396
397 None
398}
399
400fn build_conversation_text(history: &[crate::db::Message]) -> String {
407 let mut text = String::new();
408 for msg in history {
409 let role = msg.role.as_str();
410 if let Some(ref content) = msg.content {
411 let truncated: String = content.chars().take(2000).collect();
412 text.push_str(&format!("[{role}]: {truncated}\n\n"));
413 }
414 if let Some(ref tool_calls) = msg.tool_calls {
415 let truncated: String = tool_calls.chars().take(500).collect();
416 text.push_str(&format!("[{role} tool_calls]: {truncated}\n\n"));
417 }
418 }
419 text
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425 use crate::db::Message;
426
427 fn make_msg(role: &str, content: Option<&str>, tool_calls: Option<&str>) -> Message {
428 Message {
429 id: 0,
430 session_id: String::new(),
431 role: role.parse().unwrap_or(crate::db::Role::User),
432 content: content.map(String::from),
433 full_content: None,
434 tool_calls: tool_calls.map(String::from),
435 tool_call_id: None,
436 prompt_tokens: None,
437 completion_tokens: None,
438 cache_read_tokens: None,
439 cache_creation_tokens: None,
440 thinking_tokens: None,
441 thinking_content: None,
442 created_at: None,
443 }
444 }
445
446 #[test]
447 fn test_circuit_breaker() {
448 reset_compact_failures();
449 assert!(!is_compact_circuit_broken());
450
451 assert!(!record_compact_failure()); assert!(!is_compact_circuit_broken());
453
454 assert!(!record_compact_failure()); assert!(!is_compact_circuit_broken());
456
457 assert!(record_compact_failure()); assert!(is_compact_circuit_broken());
459
460 reset_compact_failures();
462 assert!(!is_compact_circuit_broken());
463 }
464
465 #[test]
466 fn test_empty_history() {
467 assert_eq!(build_conversation_text(&[]), "");
468 }
469
470 #[test]
471 fn test_basic_conversation() {
472 let msgs = vec![
473 make_msg("user", Some("hello"), None),
474 make_msg("assistant", Some("hi"), None),
475 ];
476 let text = build_conversation_text(&msgs);
477 assert!(text.contains("[user]: hello"));
478 assert!(text.contains("[assistant]: hi"));
479 }
480
481 #[test]
482 fn test_truncates_long_content_per_message() {
483 let long = "x".repeat(3000);
484 let msgs = vec![make_msg("user", Some(&long), None)];
485 let text = build_conversation_text(&msgs);
486 assert!(text.len() < 2100);
488 }
489
490 #[test]
491 fn test_no_total_cap() {
492 let content = "y".repeat(500);
494 let msgs: Vec<_> = (0..50)
495 .map(|_| make_msg("user", Some(&content), None))
496 .collect();
497 let text = build_conversation_text(&msgs);
498 assert!(text.len() > 20_000);
500 assert!(!text.contains("truncated"));
501 }
502
503 #[test]
504 fn test_multibyte_boundary_safe() {
505 let mut content = "a".repeat(1999);
507 content.push('\u{1f43b}'); content.push_str("after");
509 let msgs = vec![make_msg("user", Some(&content), None)];
510 let text = build_conversation_text(&msgs);
511 assert!(text.contains("\u{1f43b}") || !text.contains("after"));
513 }
514
515 #[test]
516 fn test_tool_calls_included() {
517 let msgs = vec![make_msg("assistant", None, Some("{\"name\": \"Read\"}"))];
518 let text = build_conversation_text(&msgs);
519 assert!(text.contains("tool_calls"));
520 assert!(text.contains("Read"));
521 }
522
523 #[test]
524 fn test_none_content_skipped() {
525 let msgs = vec![make_msg("tool", None, None)];
526 let text = build_conversation_text(&msgs);
527 assert_eq!(text, "");
528 }
529
530 #[test]
531 fn test_strip_analysis_block() {
532 let input = "<analysis>\nthinking here\n</analysis>\n\n<summary>\n1. Primary Request:\n Build a thing\n</summary>";
533 let result = strip_analysis_block(input);
534 assert!(result.contains("Primary Request"));
535 assert!(!result.contains("<analysis>"));
536 assert!(!result.contains("thinking here"));
537 assert!(!result.contains("<summary>"));
538 }
539
540 #[test]
541 fn test_strip_analysis_no_tags() {
542 let input = "Just a plain summary";
543 assert_eq!(strip_analysis_block(input), "Just a plain summary");
544 }
545
546 #[test]
547 fn test_strip_analysis_only_summary_tags() {
548 let input = "<summary>\nThe good stuff\n</summary>";
549 let result = strip_analysis_block(input);
550 assert_eq!(result, "The good stuff");
551 }
552
553 #[test]
554 fn test_truncate_until_fits_drops_oldest() {
555 let msgs: Vec<_> = (0..20)
557 .map(|i| {
558 make_msg(
559 "user",
560 Some(&format!("Message number {i} with some padding text here")),
561 None,
562 )
563 })
564 .collect();
565
566 let result = truncate_until_fits(&msgs, 250);
570 assert!(result.is_some(), "should succeed after truncation");
571 let text = result.unwrap();
572 assert!(text.contains("Message number 19"));
574 assert!(!text.contains("Message number 0"));
575 }
576
577 #[test]
578 fn test_truncate_until_fits_too_few_messages() {
579 let msgs: Vec<_> = (0..5)
581 .map(|_| make_msg("user", Some(&"x".repeat(10_000)), None))
582 .collect();
583 let result = truncate_until_fits(&msgs, 10);
585 assert!(result.is_none());
586 }
587
588 #[test]
589 fn test_truncate_until_fits_already_fits() {
590 let msgs: Vec<_> = (0..10)
591 .map(|i| make_msg("user", Some(&format!("Short {i}")), None))
592 .collect();
593 let result = truncate_until_fits(&msgs, 100_000);
595 assert!(result.is_some());
596 let text = result.unwrap();
597 assert!(text.contains("Short 9"));
600 }
601
602 #[test]
603 fn test_compute_preserve_count_short_sessions() {
604 assert_eq!(compute_preserve_count(4), 4);
606 assert_eq!(compute_preserve_count(8), 4);
607 assert_eq!(compute_preserve_count(11), 4);
608 }
609
610 #[test]
611 fn test_compute_preserve_count_partial() {
612 assert_eq!(compute_preserve_count(12), 6);
614 assert_eq!(compute_preserve_count(20), 10);
616 assert_eq!(compute_preserve_count(50), 25);
618 assert_eq!(compute_preserve_count(100), 50);
620 }
621
622 #[test]
623 fn test_compute_preserve_count_never_below_minimum() {
624 for n in 0..200 {
626 assert!(compute_preserve_count(n) >= COMPACT_PRESERVE_COUNT);
627 }
628 }
629
630 #[test]
633 fn test_build_summary_prompt_embeds_conversation() {
634 let text = build_summary_prompt("[user]: hello\n\n[assistant]: hi\n\n");
635 assert!(
636 text.contains("[user]: hello"),
637 "prompt should embed the conversation text verbatim"
638 );
639 assert!(text.contains("[assistant]: hi"));
640 }
641
642 #[test]
643 fn test_build_summary_prompt_instructs_no_tool_calls() {
644 let text = build_summary_prompt("some conversation");
645 assert!(
646 text.contains("Do NOT call any tools"),
647 "prompt must forbid tool calls"
648 );
649 assert!(text.contains("CRITICAL"));
650 }
651
652 #[test]
653 fn test_build_summary_prompt_requests_analysis_and_summary_tags() {
654 let text = build_summary_prompt("some conversation");
655 assert!(
656 text.contains("<analysis>"),
657 "prompt should ask for <analysis> block"
658 );
659 assert!(
660 text.contains("<summary>"),
661 "prompt should ask for <summary> block"
662 );
663 }
664
665 #[test]
668 fn test_build_conversation_text_tool_calls_truncated_at_500() {
669 let long_tc = "T".repeat(600);
670 let msgs = vec![make_msg("assistant", None, Some(&long_tc))];
671 let text = build_conversation_text(&msgs);
672 assert!(
674 text.len() <= 550,
675 "tool_calls should be capped at 500 chars"
676 );
677 }
678
679 #[test]
680 fn test_build_conversation_text_both_content_and_tool_calls() {
681 let msgs = vec![make_msg(
682 "assistant",
683 Some("I will read the file"),
684 Some("{\"name\": \"Read\"}"),
685 )];
686 let text = build_conversation_text(&msgs);
687 assert!(text.contains("I will read the file"));
688 assert!(text.contains("tool_calls"));
689 }
690
691 #[test]
694 fn test_strip_analysis_unclosed_tag_passthrough() {
695 let input = "<analysis>\nthinking...\n1. Primary Request: build a thing";
697 let result = strip_analysis_block(input);
698 assert!(
699 result.contains("thinking"),
700 "unclosed analysis tag should leave text intact"
701 );
702 }
703
704 #[test]
705 fn test_strip_analysis_trims_extra_whitespace() {
706 let input = "<analysis>\nthink\n</analysis>\n\n\n\n<summary>\nClean content\n</summary>";
707 let result = strip_analysis_block(input);
708 assert!(!result.starts_with('\n'));
710 assert!(!result.ends_with('\n'));
711 assert_eq!(result, "Clean content");
712 }
713
714 }