use cersei_provider::Provider;
use cersei_types::*;
pub const AUTOCOMPACT_TRIGGER_FRACTION: f64 = 0.90;
pub const KEEP_RECENT_MESSAGES: usize = 10;
pub const MAX_CONSECUTIVE_FAILURES: u32 = 3;
pub const WARNING_PCT: f64 = 0.80;
pub const CRITICAL_PCT: f64 = 0.95;
#[derive(Debug, Clone, Default)]
pub struct AutoCompactState {
pub compaction_count: u32,
pub consecutive_failures: u32,
pub disabled: bool,
}
impl AutoCompactState {
pub fn on_success(&mut self) {
self.compaction_count += 1;
self.consecutive_failures = 0;
}
pub fn on_failure(&mut self) {
self.consecutive_failures += 1;
if self.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
self.disabled = true;
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenWarningState {
Ok,
Warning,
Critical,
}
#[derive(Debug, Clone)]
pub struct MessageGroup {
pub messages: Vec<Message>,
pub topic_hint: Option<String>,
pub token_estimate: usize,
}
#[derive(Debug, Clone)]
pub struct CompactResult {
pub messages_before: usize,
pub messages_after: usize,
pub tokens_freed_estimate: u64,
pub summary: String,
}
#[derive(Debug, Clone, Copy)]
pub enum CompactTrigger {
AutoThreshold,
Manual,
ContextOverflow,
}
pub fn estimate_tokens(text: &str) -> u64 {
(text.len() as u64) / 4
}
pub fn estimate_messages_tokens(messages: &[Message]) -> u64 {
messages
.iter()
.map(|m| estimate_tokens(&m.get_all_text()))
.sum()
}
pub fn context_window_for_model(model: &str) -> u64 {
match model {
m if m.contains("gpt-5") => 1_000_000,
m if m.contains("gemini") => 1_000_000,
m if m.starts_with("o1") || m.starts_with("o3") => 200_000,
m if m.contains("opus") => 200_000,
m if m.contains("sonnet") => 200_000,
m if m.contains("haiku") => 200_000,
m if m.contains("gpt-4o") => 128_000,
m if m.contains("gpt-4-turbo") => 128_000,
m if m.contains("gpt-4") => 8_192,
m if m.contains("gpt-3.5") => 16_385,
m if m.contains("llama") => 8_192,
_ => 200_000, }
}
pub fn calculate_token_warning_state(tokens_used: u64, context_limit: u64) -> TokenWarningState {
if context_limit == 0 {
return TokenWarningState::Ok;
}
let pct = tokens_used as f64 / context_limit as f64;
if pct >= CRITICAL_PCT {
TokenWarningState::Critical
} else if pct >= WARNING_PCT {
TokenWarningState::Warning
} else {
TokenWarningState::Ok
}
}
pub fn should_compact(tokens_used: u64, context_limit: u64) -> bool {
if context_limit == 0 {
return false;
}
(tokens_used as f64 / context_limit as f64) >= AUTOCOMPACT_TRIGGER_FRACTION
}
pub fn should_auto_compact(tokens_used: u64, context_limit: u64, state: &AutoCompactState) -> bool {
if state.disabled {
return false;
}
should_compact(tokens_used, context_limit)
}
pub fn should_context_collapse(tokens_used: u64, context_limit: u64) -> bool {
if context_limit == 0 {
return false;
}
(tokens_used as f64 / context_limit as f64) >= 0.98
}
fn extract_topic_hint(messages: &[Message]) -> Option<String> {
for msg in messages {
for block in msg.content_blocks() {
match &block {
ContentBlock::ToolUse { name, input, .. } => {
if let Some(path) = input.get("file_path").and_then(|v| v.as_str()) {
return Some(path.to_string());
}
return Some(name.clone());
}
_ => {}
}
}
}
None
}
pub fn group_messages_for_compact(messages: &[Message]) -> Vec<MessageGroup> {
let mut groups: Vec<MessageGroup> = Vec::new();
let mut current: Vec<Message> = Vec::new();
for msg in messages {
current.push(msg.clone());
if msg.role == Role::Assistant && !msg.has_tool_use() {
let token_est = current.iter().map(|m| m.get_all_text().len() / 4).sum();
let hint = extract_topic_hint(¤t);
groups.push(MessageGroup {
messages: std::mem::take(&mut current),
topic_hint: hint,
token_estimate: token_est,
});
}
}
if !current.is_empty() {
let token_est = current.iter().map(|m| m.get_all_text().len() / 4).sum();
let hint = extract_topic_hint(¤t);
groups.push(MessageGroup {
messages: current,
topic_hint: hint,
token_estimate: token_est,
});
}
groups
}
pub fn snip_compact(messages: Vec<Message>, keep_n: usize) -> (Vec<Message>, u64) {
if messages.len() <= keep_n {
return (messages, 0);
}
let removed = &messages[..messages.len() - keep_n];
let freed = estimate_messages_tokens(removed);
let kept = messages[messages.len() - keep_n..].to_vec();
(kept, freed)
}
pub fn calculate_messages_to_keep_index(messages: &[Message], token_budget: u64) -> usize {
let mut total: u64 = 0;
for (i, msg) in messages.iter().rev().enumerate() {
total += estimate_tokens(&msg.get_all_text());
if total > token_budget {
return messages.len() - i;
}
}
0 }
pub fn collapse_read_tool_results(messages: Vec<Message>) -> Vec<Message> {
let mut seen_files: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut result: Vec<Message> = Vec::new();
for msg in messages.into_iter().rev() {
let dominated = match &msg.content {
MessageContent::Blocks(blocks) => {
blocks.iter().all(|b| {
if let ContentBlock::ToolResult {
tool_use_id,
content,
..
} = b
{
if let ToolResultContent::Text(text) = content {
if text.contains('\t') {
let key = tool_use_id.clone();
if seen_files.contains(&key) {
return true; }
seen_files.insert(key);
}
}
false
} else {
false
}
})
}
_ => false,
};
if !dominated {
result.push(msg);
}
}
result.reverse();
result
}
pub fn get_compact_prompt(custom_instructions: Option<&str>) -> String {
let mut prompt = String::from(
"Summarize the conversation so far. Focus on:\n\
1. Key decisions made and their rationale\n\
2. Files that were read, created, or modified (with paths)\n\
3. Tool results that are still relevant\n\
4. Outstanding tasks or next steps\n\
5. Any errors encountered and how they were resolved\n\n\
Be concise but preserve all actionable information. \
Use bullet points. Include file paths verbatim.",
);
if let Some(instructions) = custom_instructions {
prompt.push_str("\n\nAdditional context: ");
prompt.push_str(instructions);
}
prompt
}
pub fn format_compact_summary(raw: &str) -> String {
format!(
"<context_summary>\n\
The following is a summary of the conversation so far:\n\n\
{}\n\
</context_summary>",
raw.trim()
)
}
pub async fn compact_conversation(
provider: &dyn Provider,
messages: &[Message],
model: &str,
keep_recent: usize,
custom_instructions: Option<&str>,
) -> Result<CompactResult> {
let messages_before = messages.len();
if messages.len() <= keep_recent {
return Ok(CompactResult {
messages_before,
messages_after: messages_before,
tokens_freed_estimate: 0,
summary: String::new(),
});
}
let split_idx = messages.len() - keep_recent;
let old_messages = &messages[..split_idx];
let recent_messages = &messages[split_idx..];
let old_text: String = old_messages
.iter()
.map(|m| {
let role = match m.role {
Role::User => "User",
Role::Assistant => "Assistant",
Role::System => "System",
};
format!("{}: {}", role, m.get_all_text())
})
.collect::<Vec<_>>()
.join("\n\n");
let compact_prompt = get_compact_prompt(custom_instructions);
let request = cersei_provider::CompletionRequest {
model: model.to_string(),
messages: vec![
Message::user(format!(
"Here is the conversation history to summarize:\n\n{}\n\n{}",
old_text, compact_prompt
)),
],
system: Some("You are a conversation summarizer. Be concise and preserve all actionable information.".into()),
tools: Vec::new(),
max_tokens: 4096,
temperature: Some(0.0),
stop_sequences: Vec::new(),
options: cersei_provider::ProviderOptions::default(),
};
let stream = provider.complete(request).await?;
let mut rx = stream.into_receiver();
let mut accumulator = cersei_provider::StreamAccumulator::new();
while let Some(event) = rx.recv().await {
accumulator.process_event(event);
}
let response = accumulator.into_response()?;
let summary_text = response.message.get_all_text();
let formatted_summary = format_compact_summary(&summary_text);
let tokens_freed = estimate_messages_tokens(old_messages);
let messages_after = 1 + recent_messages.len();
Ok(CompactResult {
messages_before,
messages_after,
tokens_freed_estimate: tokens_freed,
summary: formatted_summary,
})
}
pub async fn auto_compact_if_needed(
provider: &dyn Provider,
messages: &[Message],
model: &str,
tokens_used: u64,
state: &mut AutoCompactState,
) -> Option<CompactResult> {
let context_limit = context_window_for_model(model);
if !should_auto_compact(tokens_used, context_limit, state) {
return None;
}
match compact_conversation(provider, messages, model, KEEP_RECENT_MESSAGES, None).await {
Ok(result) => {
state.on_success();
Some(result)
}
Err(_) => {
state.on_failure();
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_messages(n: usize) -> Vec<Message> {
(0..n)
.map(|i| {
if i % 2 == 0 {
Message::user(format!("User message {}", i))
} else {
Message::assistant(format!("Assistant response {} with some longer text to simulate real content that takes up tokens in the context window.", i))
}
})
.collect()
}
#[test]
fn test_token_warning_ok() {
assert_eq!(
calculate_token_warning_state(50_000, 200_000),
TokenWarningState::Ok
);
}
#[test]
fn test_token_warning_warning() {
assert_eq!(
calculate_token_warning_state(170_000, 200_000),
TokenWarningState::Warning
);
}
#[test]
fn test_token_warning_critical() {
assert_eq!(
calculate_token_warning_state(196_000, 200_000),
TokenWarningState::Critical
);
}
#[test]
fn test_should_compact() {
assert!(!should_compact(100_000, 200_000)); assert!(!should_compact(170_000, 200_000)); assert!(should_compact(185_000, 200_000)); assert!(should_compact(195_000, 200_000)); }
#[test]
fn test_should_auto_compact_disabled() {
let state = AutoCompactState {
disabled: true,
..Default::default()
};
assert!(!should_auto_compact(195_000, 200_000, &state));
}
#[test]
fn test_circuit_breaker() {
let mut state = AutoCompactState::default();
state.on_failure();
state.on_failure();
assert!(!state.disabled);
state.on_failure(); assert!(state.disabled);
}
#[test]
fn test_snip_compact() {
let messages = make_messages(20);
let (kept, freed) = snip_compact(messages, 10);
assert_eq!(kept.len(), 10);
assert!(freed > 0);
}
#[test]
fn test_snip_compact_already_small() {
let messages = make_messages(5);
let (kept, freed) = snip_compact(messages, 10);
assert_eq!(kept.len(), 5);
assert_eq!(freed, 0);
}
#[test]
fn test_group_messages() {
let mut messages = Vec::new();
messages.push(Message::user("Read file A"));
messages.push(Message::assistant("Contents of A"));
messages.push(Message::user("Now edit B"));
messages.push(Message::assistant("Edited B"));
let groups = group_messages_for_compact(&messages);
assert_eq!(groups.len(), 2);
}
#[test]
fn test_estimate_tokens() {
assert_eq!(estimate_tokens("hello world"), 2); assert_eq!(estimate_tokens(""), 0);
assert!(estimate_tokens(&"x".repeat(1000)) > 200);
}
#[test]
fn test_context_window_for_model() {
assert_eq!(context_window_for_model("claude-sonnet-4-6"), 200_000);
assert_eq!(context_window_for_model("gpt-4o"), 128_000);
assert_eq!(context_window_for_model("gpt-4"), 8_192);
}
#[test]
fn test_compact_prompt_with_instructions() {
let prompt = get_compact_prompt(Some("Focus on API changes"));
assert!(prompt.contains("Focus on API changes"));
assert!(prompt.contains("Summarize"));
}
#[test]
fn test_format_compact_summary() {
let summary = format_compact_summary("- Did X\n- Did Y");
assert!(summary.contains("<context_summary>"));
assert!(summary.contains("- Did X"));
}
#[test]
fn test_calculate_messages_to_keep_index() {
let messages = make_messages(20);
let idx = calculate_messages_to_keep_index(&messages, 100);
assert!(idx > 0);
assert!(idx < 20);
}
#[test]
fn test_messages_to_keep_all_fit() {
let messages = make_messages(3);
let idx = calculate_messages_to_keep_index(&messages, 100_000);
assert_eq!(idx, 0); }
}