use crate::config::ModelSettings;
use crate::db::Database;
use crate::persistence::Persistence;
use crate::providers::{ChatMessage, LlmProvider};
use anyhow::{Result, bail};
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use tokio::sync::RwLock;
pub const COMPACT_PRESERVE_COUNT: usize = 4;
const PARTIAL_COMPACT_FRACTION: f64 = 0.5;
const PARTIAL_COMPACT_THRESHOLD: usize = 12;
const MAX_CONSECUTIVE_FAILURES: u32 = 3;
static CONSECUTIVE_FAILURES: AtomicU32 = AtomicU32::new(0);
pub fn reset_compact_failures() {
CONSECUTIVE_FAILURES.store(0, Ordering::Relaxed);
}
pub fn is_compact_circuit_broken() -> bool {
CONSECUTIVE_FAILURES.load(Ordering::Relaxed) >= MAX_CONSECUTIVE_FAILURES
}
pub fn record_compact_failure() -> bool {
let prev = CONSECUTIVE_FAILURES.fetch_add(1, Ordering::Relaxed);
prev + 1 >= MAX_CONSECUTIVE_FAILURES
}
fn record_compact_success() {
reset_compact_failures();
}
const MAX_TRUNCATION_RETRIES: usize = 3;
const TRUNCATION_DROP_FRACTION: f64 = 0.2;
#[derive(Debug)]
pub struct CompactResult {
pub deleted: usize,
pub summary_tokens: usize,
}
#[derive(Debug)]
pub enum CompactSkip {
PendingToolCalls,
TooShort(usize),
HistoryTooLarge,
}
pub async fn compact_session(
db: &Database,
session_id: &str,
max_context_tokens: usize,
model_settings: &crate::config::ModelSettings,
provider: &Arc<RwLock<Box<dyn LlmProvider>>>,
) -> Result<std::result::Result<CompactResult, CompactSkip>> {
let prov = provider.read().await;
compact_session_with_provider(db, session_id, max_context_tokens, model_settings, &**prov).await
}
pub async fn compact_session_with_provider(
db: &Database,
session_id: &str,
max_context_tokens: usize,
model_settings: &crate::config::ModelSettings,
provider: &dyn LlmProvider,
) -> Result<std::result::Result<CompactResult, CompactSkip>> {
if db.has_pending_tool_calls(session_id).await.unwrap_or(false) {
return Ok(Err(CompactSkip::PendingToolCalls));
}
let history = db.load_context(session_id).await?;
if history.len() < 4 {
return Ok(Err(CompactSkip::TooShort(history.len())));
}
let preserve_count = compute_preserve_count(history.len());
let compact_count = history.len().saturating_sub(preserve_count);
if compact_count == 0 {
return Ok(Err(CompactSkip::TooShort(history.len())));
}
let to_compact = &history[..compact_count];
let conversation_text = build_conversation_text(to_compact);
tracing::info!(
"Compacting {compact_count}/{} messages (preserving {preserve_count})",
history.len(),
);
let text_tokens = (conversation_text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN)
as usize
+ crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
let available = max_context_tokens.saturating_sub(4096);
let final_text = if text_tokens <= available {
conversation_text
} else {
match truncate_until_fits(to_compact, available) {
Some(text) => text,
None => return Ok(Err(CompactSkip::HistoryTooLarge)),
}
};
let summary_prompt = build_summary_prompt(&final_text);
let messages = vec![ChatMessage::text("user", &summary_prompt)];
let compact_settings = ModelSettings {
model: model_settings.model.clone(),
max_tokens: Some(4096),
temperature: Some(0.3),
thinking_budget: None,
reasoning_effort: None,
max_context_tokens: model_settings.max_context_tokens,
};
let response = provider.chat(&messages, &[], &compact_settings).await?;
let summary = match response.content {
Some(text) if !text.trim().is_empty() => text,
_ => bail!("LLM returned an empty summary"),
};
let summary = strip_analysis_block(&summary);
let compact_message = format!("[Compacted conversation summary]\n\n{summary}");
let deleted = db
.compact_session(session_id, &compact_message, preserve_count)
.await?;
record_compact_success();
Ok(Ok(CompactResult {
deleted,
summary_tokens: summary.len() / 4,
}))
}
fn compute_preserve_count(total: usize) -> usize {
if total < PARTIAL_COMPACT_THRESHOLD {
COMPACT_PRESERVE_COUNT
} else {
let keep = (total as f64 * (1.0 - PARTIAL_COMPACT_FRACTION)).ceil() as usize;
keep.max(COMPACT_PRESERVE_COUNT)
}
}
fn build_summary_prompt(conversation_text: &str) -> String {
format!(
"CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.\n\
Tool calls will be REJECTED and will waste your only turn.\n\
Your entire response must be plain text: an <analysis> block followed by a <summary> block.\n\
\n\
Your task is to create a detailed summary of the conversation so far, paying close \n\
attention to the user's explicit requests and your previous actions.\n\
This summary should be thorough in capturing technical details, code patterns, and \n\
architectural decisions that would be essential for continuing development work \n\
without losing context.\n\
\n\
Before providing your final summary, wrap your analysis in <analysis> tags to \n\
organize your thoughts and ensure you've covered all necessary points. In your analysis:\n\
\n\
1. Chronologically analyze each message. For each section thoroughly identify:\n\
- The user's explicit requests and intents\n\
- Your approach to addressing them\n\
- Key decisions, technical concepts and code patterns\n\
- Specific details: file names, code snippets, function signatures, file edits\n\
- Errors encountered and how they were fixed\n\
- Specific user feedback, especially corrections\n\
2. Double-check for technical accuracy and completeness.\n\
\n\
Your summary should include these sections:\n\
\n\
1. **Primary Request and Intent**: Capture ALL of the user's explicit requests in detail.\n\
2. **Key Technical Concepts**: List all important technologies and frameworks discussed.\n\
3. **Files and Code Sections**: Enumerate specific files examined, modified, or created. \n\
Include code snippets where applicable and a summary of why each file matters.\n\
4. **Errors and Fixes**: List all errors and how they were resolved. Note user feedback.\n\
5. **Problem Solving**: Document problems solved and ongoing troubleshooting.\n\
6. **All User Messages**: List ALL user messages (not tool results). Critical for \n\
preserving feedback and changing intent.\n\
7. **Pending Tasks**: Outline anything unfinished or deferred.\n\
8. **Current Work**: Describe precisely what was being worked on immediately before \n\
this summary. Include file names and code snippets.\n\
9. **Optional Next Step**: Only if directly in line with the user's most recent \n\
explicit request. Include direct quotes from the conversation to prevent drift.\n\
\n\
Format your response as:\n\
\n\
<analysis>\n\
[Your thought process ensuring all points are covered]\n\
</analysis>\n\
\n\
<summary>\n\
1. Primary Request and Intent:\n\
[Detailed description]\n\
...\n\
</summary>\n\
\n\
REMINDER: Do NOT call any tools. Respond with plain text only.\n\
\n\
---\n\n{conversation_text}"
)
}
pub fn strip_analysis_block(summary: &str) -> String {
let stripped = if let Some(start) = summary.find("<analysis>") {
if let Some(end) = summary.find("</analysis>") {
let after = end + "</analysis>".len();
format!("{}{}", &summary[..start], &summary[after..])
} else {
summary.to_string()
}
} else {
summary.to_string()
};
let stripped = if let Some(start) = stripped.find("<summary>") {
if let Some(end) = stripped.find("</summary>") {
let content_start = start + "<summary>".len();
stripped[content_start..end].trim().to_string()
} else {
stripped
}
} else {
stripped
};
let mut result = String::new();
let mut prev_empty = false;
for line in stripped.lines() {
let is_empty = line.trim().is_empty();
if is_empty && prev_empty {
continue;
}
if !result.is_empty() {
result.push('\n');
}
result.push_str(line);
prev_empty = is_empty;
}
result.trim().to_string()
}
fn truncate_until_fits(history: &[crate::db::Message], available_tokens: usize) -> Option<String> {
let total = history.len();
let min_keep = COMPACT_PRESERVE_COUNT + 1;
if total <= min_keep {
return None;
}
let mut drop_count = 0usize;
for attempt in 0..MAX_TRUNCATION_RETRIES {
let summarizable = total.saturating_sub(drop_count);
let to_drop = (summarizable as f64 * TRUNCATION_DROP_FRACTION).ceil() as usize;
drop_count += to_drop.max(1);
if total.saturating_sub(drop_count) < min_keep {
drop_count = total - min_keep;
}
let truncated = &history[drop_count..];
let text = build_conversation_text(truncated);
let text_tokens = (text.len() as f64 / crate::inference_helpers::CHARS_PER_TOKEN) as usize
+ crate::inference_helpers::SYSTEM_PROMPT_OVERHEAD;
tracing::info!(
"Truncation attempt {}: dropped {drop_count}/{total} messages, \
~{text_tokens} tokens (budget: {available_tokens})",
attempt + 1,
);
if text_tokens <= available_tokens {
return Some(text);
}
}
None
}
fn build_conversation_text(history: &[crate::db::Message]) -> String {
let mut text = String::new();
for msg in history {
let role = msg.role.as_str();
if let Some(ref content) = msg.content {
let truncated: String = content.chars().take(2000).collect();
text.push_str(&format!("[{role}]: {truncated}\n\n"));
}
if let Some(ref tool_calls) = msg.tool_calls {
let truncated: String = tool_calls.chars().take(500).collect();
text.push_str(&format!("[{role} tool_calls]: {truncated}\n\n"));
}
}
text
}
#[cfg(test)]
mod tests {
use super::*;
use crate::db::Message;
fn make_msg(role: &str, content: Option<&str>, tool_calls: Option<&str>) -> Message {
Message {
id: 0,
session_id: String::new(),
role: role.parse().unwrap_or(crate::db::Role::User),
content: content.map(String::from),
full_content: None,
tool_calls: tool_calls.map(String::from),
tool_call_id: None,
prompt_tokens: None,
completion_tokens: None,
cache_read_tokens: None,
cache_creation_tokens: None,
thinking_tokens: None,
created_at: None,
}
}
#[test]
fn test_circuit_breaker() {
reset_compact_failures();
assert!(!is_compact_circuit_broken());
assert!(!record_compact_failure()); assert!(!is_compact_circuit_broken());
assert!(!record_compact_failure()); assert!(!is_compact_circuit_broken());
assert!(record_compact_failure()); assert!(is_compact_circuit_broken());
reset_compact_failures();
assert!(!is_compact_circuit_broken());
}
#[test]
fn test_empty_history() {
assert_eq!(build_conversation_text(&[]), "");
}
#[test]
fn test_basic_conversation() {
let msgs = vec![
make_msg("user", Some("hello"), None),
make_msg("assistant", Some("hi"), None),
];
let text = build_conversation_text(&msgs);
assert!(text.contains("[user]: hello"));
assert!(text.contains("[assistant]: hi"));
}
#[test]
fn test_truncates_long_content_per_message() {
let long = "x".repeat(3000);
let msgs = vec![make_msg("user", Some(&long), None)];
let text = build_conversation_text(&msgs);
assert!(text.len() < 2100);
}
#[test]
fn test_no_total_cap() {
let content = "y".repeat(500);
let msgs: Vec<_> = (0..50)
.map(|_| make_msg("user", Some(&content), None))
.collect();
let text = build_conversation_text(&msgs);
assert!(text.len() > 20_000);
assert!(!text.contains("truncated"));
}
#[test]
fn test_multibyte_boundary_safe() {
let mut content = "a".repeat(1999);
content.push('\u{1f43b}'); content.push_str("after");
let msgs = vec![make_msg("user", Some(&content), None)];
let text = build_conversation_text(&msgs);
assert!(text.contains("\u{1f43b}") || !text.contains("after"));
}
#[test]
fn test_tool_calls_included() {
let msgs = vec![make_msg("assistant", None, Some("{\"name\": \"Read\"}"))];
let text = build_conversation_text(&msgs);
assert!(text.contains("tool_calls"));
assert!(text.contains("Read"));
}
#[test]
fn test_none_content_skipped() {
let msgs = vec![make_msg("tool", None, None)];
let text = build_conversation_text(&msgs);
assert_eq!(text, "");
}
#[test]
fn test_strip_analysis_block() {
let input = "<analysis>\nthinking here\n</analysis>\n\n<summary>\n1. Primary Request:\n Build a thing\n</summary>";
let result = strip_analysis_block(input);
assert!(result.contains("Primary Request"));
assert!(!result.contains("<analysis>"));
assert!(!result.contains("thinking here"));
assert!(!result.contains("<summary>"));
}
#[test]
fn test_strip_analysis_no_tags() {
let input = "Just a plain summary";
assert_eq!(strip_analysis_block(input), "Just a plain summary");
}
#[test]
fn test_strip_analysis_only_summary_tags() {
let input = "<summary>\nThe good stuff\n</summary>";
let result = strip_analysis_block(input);
assert_eq!(result, "The good stuff");
}
#[test]
fn test_truncate_until_fits_drops_oldest() {
let msgs: Vec<_> = (0..20)
.map(|i| {
make_msg(
"user",
Some(&format!("Message number {i} with some padding text here")),
None,
)
})
.collect();
let result = truncate_until_fits(&msgs, 250);
assert!(result.is_some(), "should succeed after truncation");
let text = result.unwrap();
assert!(text.contains("Message number 19"));
assert!(!text.contains("Message number 0"));
}
#[test]
fn test_truncate_until_fits_too_few_messages() {
let msgs: Vec<_> = (0..5)
.map(|_| make_msg("user", Some(&"x".repeat(10_000)), None))
.collect();
let result = truncate_until_fits(&msgs, 10);
assert!(result.is_none());
}
#[test]
fn test_truncate_until_fits_already_fits() {
let msgs: Vec<_> = (0..10)
.map(|i| make_msg("user", Some(&format!("Short {i}")), None))
.collect();
let result = truncate_until_fits(&msgs, 100_000);
assert!(result.is_some());
let text = result.unwrap();
assert!(text.contains("Short 9"));
}
#[test]
fn test_compute_preserve_count_short_sessions() {
assert_eq!(compute_preserve_count(4), 4);
assert_eq!(compute_preserve_count(8), 4);
assert_eq!(compute_preserve_count(11), 4);
}
#[test]
fn test_compute_preserve_count_partial() {
assert_eq!(compute_preserve_count(12), 6);
assert_eq!(compute_preserve_count(20), 10);
assert_eq!(compute_preserve_count(50), 25);
assert_eq!(compute_preserve_count(100), 50);
}
#[test]
fn test_compute_preserve_count_never_below_minimum() {
for n in 0..200 {
assert!(compute_preserve_count(n) >= COMPACT_PRESERVE_COUNT);
}
}
#[test]
fn test_build_summary_prompt_embeds_conversation() {
let text = build_summary_prompt("[user]: hello\n\n[assistant]: hi\n\n");
assert!(
text.contains("[user]: hello"),
"prompt should embed the conversation text verbatim"
);
assert!(text.contains("[assistant]: hi"));
}
#[test]
fn test_build_summary_prompt_instructs_no_tool_calls() {
let text = build_summary_prompt("some conversation");
assert!(
text.contains("Do NOT call any tools"),
"prompt must forbid tool calls"
);
assert!(text.contains("CRITICAL"));
}
#[test]
fn test_build_summary_prompt_requests_analysis_and_summary_tags() {
let text = build_summary_prompt("some conversation");
assert!(
text.contains("<analysis>"),
"prompt should ask for <analysis> block"
);
assert!(
text.contains("<summary>"),
"prompt should ask for <summary> block"
);
}
#[test]
fn test_build_conversation_text_tool_calls_truncated_at_500() {
let long_tc = "T".repeat(600);
let msgs = vec![make_msg("assistant", None, Some(&long_tc))];
let text = build_conversation_text(&msgs);
assert!(
text.len() <= 550,
"tool_calls should be capped at 500 chars"
);
}
#[test]
fn test_build_conversation_text_both_content_and_tool_calls() {
let msgs = vec![make_msg(
"assistant",
Some("I will read the file"),
Some("{\"name\": \"Read\"}"),
)];
let text = build_conversation_text(&msgs);
assert!(text.contains("I will read the file"));
assert!(text.contains("tool_calls"));
}
#[test]
fn test_strip_analysis_unclosed_tag_passthrough() {
let input = "<analysis>\nthinking...\n1. Primary Request: build a thing";
let result = strip_analysis_block(input);
assert!(
result.contains("thinking"),
"unclosed analysis tag should leave text intact"
);
}
#[test]
fn test_strip_analysis_trims_extra_whitespace() {
let input = "<analysis>\nthink\n</analysis>\n\n\n\n<summary>\nClean content\n</summary>";
let result = strip_analysis_block(input);
assert!(!result.starts_with('\n'));
assert!(!result.ends_with('\n'));
assert_eq!(result, "Clean content");
}
#[test]
fn test_circuit_not_broken_after_reset() {
reset_compact_failures();
assert!(!is_compact_circuit_broken());
}
#[test]
fn test_circuit_broken_after_max_failures() {
reset_compact_failures();
record_compact_failure();
record_compact_failure();
let tripped = record_compact_failure();
assert!(tripped, "third failure should trip the circuit");
assert!(is_compact_circuit_broken());
reset_compact_failures(); }
#[test]
fn test_reset_clears_broken_circuit() {
reset_compact_failures();
record_compact_failure();
record_compact_failure();
record_compact_failure();
assert!(is_compact_circuit_broken());
reset_compact_failures();
assert!(!is_compact_circuit_broken());
}
#[test]
fn test_record_failure_not_tripped_below_threshold() {
reset_compact_failures();
let first = record_compact_failure();
let second = record_compact_failure();
assert!(!first, "first failure should not trip circuit");
assert!(!second, "second failure should not trip circuit");
reset_compact_failures(); }
}