use crate::config::Config;
use crate::session::chat::get_animation_manager;
use crate::session::chat::session::ChatSession;
use crate::session::estimate_tokens;
use crate::{log_debug, log_info};
use anyhow::Result;
pub async fn should_check_compression(session: &mut ChatSession, config: &Config) -> (bool, f64) {
let current_tokens = session.get_full_context_tokens(config).await;
if config.max_session_tokens_threshold > 0
&& current_tokens >= config.max_session_tokens_threshold
{
let ratio = config
.compression
.pressure_levels
.iter()
.map(|l| l.target_ratio)
.fold(2.0_f64, f64::max);
log_debug!(
"Max session token threshold exceeded ({} >= {}) - FORCE triggering compression with ratio {:.1}x (bypasses all gates)",
current_tokens,
config.max_session_tokens_threshold,
ratio
);
return (true, ratio);
}
if config.compression.pressure_levels.is_empty() {
log_debug!("No pressure levels configured - compression disabled");
return (false, 2.0);
}
log_debug!(
"Compression check: current_tokens={}, thresholds={:?}",
current_tokens,
config
.compression
.pressure_levels
.iter()
.map(|l| l.threshold)
.collect::<Vec<_>>()
);
let matched_level = config
.compression
.pressure_levels
.iter()
.filter(|l| current_tokens >= l.threshold)
.max_by(|a, b| a.threshold.cmp(&b.threshold));
let num_levels = config.compression.pressure_levels.len();
if num_levels == 0 {
log_debug!("No pressure levels configured - compression disabled");
return (false, 2.0);
}
let level = match matched_level {
Some(_) => {
let n = session.session.info.consecutive_compressions as usize;
let escalated_idx = n % num_levels;
&config.compression.pressure_levels[escalated_idx]
}
None => {
log_debug!(
"No threshold exceeded (current: {}, lowest threshold: {})",
current_tokens,
config
.compression
.pressure_levels
.first()
.map(|l| l.threshold)
.unwrap_or(0)
);
return (false, 2.0);
}
};
let adjusted_ratio = calculate_adaptive_compression_ratio(session, level.target_ratio);
log_debug!(
"✓ Threshold exceeded! Context tokens: {} → base compression: {:.1}x → adaptive: {:.1}x (threshold: {})",
current_tokens,
level.target_ratio,
adjusted_ratio,
level.threshold
);
let tokens_after_last = session.session.info.context_tokens_after_last_compression;
if tokens_after_last > 0 {
let n = session.session.info.consecutive_compressions;
let growth_factor = (0.10 * 2.0_f64.powi(n as i32)).min(1.0);
let min_tokens_for_recompression =
(tokens_after_last as f64 * (1.0 + growth_factor)) as usize;
if current_tokens < min_tokens_for_recompression {
let actual_growth_pct =
((current_tokens as f64 / tokens_after_last as f64 - 1.0) * 100.0) as i32;
log_debug!(
"Exponential cooldown active (n={}): need {:.0}% growth, have {}% (current={}, required={}, base={})",
n,
growth_factor * 100.0,
actual_growth_pct,
current_tokens,
min_tokens_for_recompression,
tokens_after_last
);
return (false, 2.0);
}
}
log_debug!(
"Compression cooldown passed: current_tokens={}, tokens_after_last_compression={}, consecutive={}",
current_tokens,
tokens_after_last,
session.session.info.consecutive_compressions
);
let net_benefit =
calculate_compression_net_benefit(session, config, current_tokens, adjusted_ratio).await;
if net_benefit > 0.0 {
let (start_idx, end_idx) = match find_compression_range(
&session.session.messages,
session.first_prompt_idx,
false,
) {
Ok(range) => range,
Err(e) => {
log_debug!("Failed to find compression range: {}", e);
return (false, 2.0);
}
};
if start_idx >= end_idx {
log_debug!(
"Invalid compression range ({} >= {}), setting cooldown to prevent re-analysis loop",
start_idx,
end_idx
);
session.session.info.context_tokens_after_last_compression = current_tokens;
return (false, 2.0);
}
let compressible_tokens = match calculate_range_tokens(session, start_idx + 1, end_idx) {
Ok(tokens) => tokens,
Err(e) => {
log_debug!("Failed to calculate range tokens: {}", e);
return (false, 2.0);
}
};
let estimated_compressed_size = (compressible_tokens as f64 / adjusted_ratio) as u64;
let estimated_after_compression = (current_tokens as u64)
.saturating_sub(compressible_tokens)
.saturating_add(estimated_compressed_size);
if estimated_after_compression >= level.threshold as u64 {
log_debug!(
"Compression won't bring context below threshold: {} → {} (threshold: {}). Compressible: {} → {}. Setting cooldown.",
current_tokens,
estimated_after_compression,
level.threshold,
compressible_tokens,
estimated_compressed_size
);
session.session.info.context_tokens_after_last_compression = current_tokens;
return (false, 2.0);
}
log_debug!(
"Cache-aware analysis: Net benefit ${:.5} → COMPRESS (will reduce {} → {} tokens, below threshold {})",
net_benefit,
current_tokens,
estimated_after_compression,
level.threshold
);
(true, adjusted_ratio)
} else {
log_debug!(
"Cache-aware analysis: Net benefit ${:.5} → SKIP (would lose money)",
net_benefit
);
(false, 2.0)
}
}
async fn calculate_compression_net_benefit(
session: &ChatSession,
config: &crate::config::Config,
current_tokens: usize,
compression_ratio: f64,
) -> f64 {
let total_tokens = current_tokens as f64;
let headroom = total_tokens - (total_tokens / compression_ratio);
let estimated_future_turns = estimate_future_turns(session, headroom);
let compressed_tokens = total_tokens / compression_ratio;
let decision_model = &config.compression.decision.model;
let session_model = &session.model;
let decision_pricing = get_model_pricing(decision_model, config);
let session_pricing = get_model_pricing(session_model, config);
let (decision_pricing, session_pricing) = match (decision_pricing, session_pricing) {
(Some(d), Some(s)) => (d, s),
_ => {
log_debug!(
"Cannot get pricing for models: decision='{}', session='{}' - skipping compression",
decision_model,
session_model
);
return -1.0; }
};
let session_is_free = session_pricing.input_price_per_1m == 0.0
&& session_pricing.output_price_per_1m == 0.0
&& session_pricing.cache_write_price_per_1m == 0.0
&& session_pricing.cache_read_price_per_1m == 0.0;
if session_is_free {
log_debug!(
"Session model '{}' has zero pricing - compressing for context management (threshold exceeded)",
session_model
);
return 1.0; }
let total_api_calls = session.session.info.total_api_calls.max(1) as f64;
let avg_new_tokens_per_call =
(session.session.info.output_tokens as f64 / total_api_calls).max(2000.0);
let decision_prompt_tokens = estimate_tokens(
"Analyze the conversation history. Should older exchanges be compressed into a summary to save context space while preserving important information? Consider:\n\
- Are there repetitive or resolved topics that can be summarized?\n\
- Is there important context that must be preserved?\n\
- Would compression help focus on current topics?\n\n\
If YES, also provide a 2-3 sentence summary preserving logical structure (focus on what's needed to continue the conversation):\n\n\
[context chunks placeholder - ~500 tokens average]\n\n\
Respond with:\n\
'YES' followed by the summary on the next line, OR\n\
'NO' if compression is not beneficial."
) as f64;
let same_model = decision_model == session_model;
let decision_max_tokens = config.compression.decision.max_tokens;
let estimated_output_tokens = if decision_max_tokens > 0 {
(compressed_tokens as u64).min(decision_max_tokens as u64)
} else {
compressed_tokens as u64
};
let mut total_cost_no_compress = 0.0;
let mut base_context = total_tokens;
for _ in 0..estimated_future_turns as i32 {
let context_cost = session_pricing.calculate_cost(
avg_new_tokens_per_call as u64, 0, base_context as u64, 0, );
total_cost_no_compress += context_cost;
base_context += avg_new_tokens_per_call;
}
let ignore_cost = config.compression.decision.ignore_cost;
let compression_cost = if same_model {
decision_pricing.calculate_cost(
decision_prompt_tokens as u64, 0, (total_tokens - decision_prompt_tokens) as u64, estimated_output_tokens, )
} else {
decision_pricing.calculate_cost(
total_tokens as u64, 0, 0, estimated_output_tokens, )
};
let mut total_cost_with_compress = if ignore_cost { 0.0 } else { compression_cost };
let mut base_context_compressed = compressed_tokens;
for call_num in 0..estimated_future_turns as i32 {
let (input_tokens, cache_write, cache_read) = if call_num == 0 {
(
avg_new_tokens_per_call as u64,
base_context_compressed as u64,
0,
)
} else {
(
avg_new_tokens_per_call as u64,
0,
base_context_compressed as u64,
)
};
let context_cost = session_pricing.calculate_cost(input_tokens, cache_write, cache_read, 0);
total_cost_with_compress += context_cost;
base_context_compressed += avg_new_tokens_per_call;
}
let net_benefit = total_cost_no_compress - total_cost_with_compress;
log_debug!(
"Compression analysis (REAL PRICING):\n \
Decision model: {} (input: ${:.2}/1M, output: ${:.2}/1M, cache_write: ${:.2}/1M, cache_read: ${:.2}/1M)\n \
Session model: {} (input: ${:.2}/1M, output: ${:.2}/1M, cache_write: ${:.2}/1M, cache_read: ${:.2}/1M)\n \
Models match: {} (cache reuse: {})\n \
Current: {:.0} tokens (decision prompt: ~{:.0} tokens)\n \
After compression: {:.0} tokens ({:.1}x ratio) - saves {:.0} tokens\n \
Avg new tokens/call: {:.0} (output_tokens={}, api_calls={})\n \
Future calls: {:.0}\n \
SCENARIO A (no compress): ${:.5}\n \
- Per call: cache_read(base) + input({:.0} new tokens)\n \
- Base grows: {:.0} → {:.0} tokens over {} calls\n \
SCENARIO B (compress): ${:.5}\n \
- Compression cost: ${:.5} (using {}, {} uncached, {} cached) {}\n \
- Per call: cache_read/write(base) + input({:.0} new tokens)\n \
- Base grows: {:.0} → {:.0} tokens over {} calls\n \
Net benefit: ${:.5} → {}",
decision_model,
decision_pricing.input_price_per_1m,
decision_pricing.output_price_per_1m,
decision_pricing.cache_write_price_per_1m,
decision_pricing.cache_read_price_per_1m,
session_model,
session_pricing.input_price_per_1m,
session_pricing.output_price_per_1m,
session_pricing.cache_write_price_per_1m,
session_pricing.cache_read_price_per_1m,
if same_model { "YES" } else { "NO" },
if same_model { "YES" } else { "NO" },
total_tokens,
decision_prompt_tokens,
compressed_tokens,
compression_ratio,
total_tokens - compressed_tokens,
avg_new_tokens_per_call,
session.session.info.output_tokens,
session.session.info.total_api_calls,
estimated_future_turns,
total_cost_no_compress,
avg_new_tokens_per_call,
total_tokens,
base_context,
estimated_future_turns as i32,
total_cost_with_compress,
compression_cost,
decision_model,
if same_model { decision_prompt_tokens as u64 } else { total_tokens as u64 },
if same_model { (total_tokens - decision_prompt_tokens) as u64 } else { 0 },
if ignore_cost { "[IGNORED]" } else { "" },
avg_new_tokens_per_call,
compressed_tokens,
base_context_compressed,
estimated_future_turns as i32,
net_benefit,
if net_benefit > 0.0 {
"COMPRESS ✓"
} else {
"SKIP"
}
);
net_benefit
}
fn get_model_pricing(
model: &str,
_config: &crate::config::Config,
) -> Option<crate::providers::ModelPricing> {
let parts: Vec<&str> = model.split(':').collect();
if parts.len() != 2 {
log_debug!(
"Invalid model format: '{}' (expected 'provider:model')",
model
);
return None;
}
let provider_name = parts[0];
let model_name = parts[1];
let provider = crate::providers::ProviderFactory::create_provider(provider_name).ok()?;
provider.get_model_pricing(model_name)
}
fn calculate_adaptive_compression_ratio(session: &ChatSession, base_ratio: f64) -> f64 {
let info = &session.session.info;
let current_api_calls = info.total_api_calls as f64;
if current_api_calls < 5.0 {
return base_ratio;
}
let tool_density = info.tool_calls as f64 / current_api_calls;
let has_plan = crate::mcp::core::plan::core::has_active_plan();
let adjustment = if has_plan {
1.2
} else if tool_density > 2.5 {
1.15
} else if tool_density > 1.0 {
1.0
} else if tool_density > 0.3 {
0.9
} else {
0.8
};
let adaptive_ratio = base_ratio * adjustment;
let final_ratio = adaptive_ratio.clamp(1.5, 4.0);
crate::log_debug!(
"Adaptive compression ratio: base={:.1}, adjustment={:.2}, tool_density={:.2}, has_plan={}, final={:.1}",
base_ratio,
adjustment,
tool_density,
has_plan,
final_ratio
);
final_ratio
}
fn estimate_future_turns(session: &ChatSession, headroom: f64) -> f64 {
let info = &session.session.info;
let api_calls = info.total_api_calls as f64;
let growth_rate = if info.compression_stats.conversation_compressions > 0 {
let calls_since = (info.total_api_calls - info.api_calls_at_last_compression).max(1) as f64;
let output_since = info
.output_tokens
.saturating_sub(info.output_tokens_at_last_compression) as f64;
(output_since / calls_since).max(1.0)
} else {
(info.output_tokens as f64 / api_calls.max(1.0)).max(1.0)
};
let physical_ceiling = headroom / growth_rate;
let estimate = if api_calls > 0.0 {
physical_ceiling.min(api_calls)
} else {
physical_ceiling.min(100.0)
};
let accuracy = calculate_self_tuning_accuracy(info);
let adjusted = (estimate * accuracy).max(5.0);
crate::log_debug!(
"Future calls estimation: api_calls={:.0}, growth_rate={:.0} tok/call ({}), \
headroom={:.0}, physical_ceiling={:.1}, symmetry={:.1}, accuracy={:.2}, final={:.0}",
api_calls,
growth_rate,
if info.compression_stats.conversation_compressions > 0 {
"incremental"
} else {
"lifetime"
},
headroom,
physical_ceiling,
if api_calls > 0.0 {
api_calls
} else {
physical_ceiling.min(100.0)
},
accuracy,
adjusted
);
adjusted
}
fn calculate_self_tuning_accuracy(info: &crate::session::SessionInfo) -> f64 {
if info.compression_stats.conversation_compressions == 0 {
return 1.0; }
let predicted = info.predicted_turns_at_last_compression;
let actual = (info.total_api_calls as f64 - info.api_calls_at_last_compression as f64).max(0.0);
if predicted <= 0.0 || actual <= 0.0 {
return 1.0;
}
let ratio = actual / predicted;
crate::log_debug!(
"Self-tuning: predicted={:.1}, actual={:.1}, correction={:.2}",
predicted,
actual,
ratio
);
ratio.clamp(0.25, 4.0)
}
pub async fn check_and_compress_conversation(
session: &mut ChatSession,
config: &Config,
operation_rx: tokio::sync::watch::Receiver<bool>,
force: bool,
) -> Result<bool> {
let (should_check, computed_ratio) = should_check_compression(session, config).await;
if !force && !should_check {
return Ok(false);
}
let force = force
|| (config.max_session_tokens_threshold > 0 && {
let current_tokens = session.get_full_context_tokens(config).await;
current_tokens >= config.max_session_tokens_threshold
});
let target_ratio = if force {
config
.compression
.pressure_levels
.first()
.map(|l| l.target_ratio)
.unwrap_or(2.0)
} else {
computed_ratio
};
if *operation_rx.borrow() {
return Err(anyhow::anyhow!("Operation cancelled"));
}
let animation_manager = get_animation_manager();
let current_cost = session.session.info.total_cost;
let max_threshold = config.max_session_tokens_threshold;
let current_context_tokens = session.get_full_context_tokens(config).await as u64;
animation_manager
.start_with_params(current_cost, current_context_tokens, max_threshold)
.await;
animation_manager
.set_phase("Compressing conversation…")
.await;
log_debug!("Compression check triggered - asking AI for decision and summary in one call");
let (start_idx, end_idx) =
find_compression_range(&session.session.messages, session.first_prompt_idx, force)?;
if start_idx >= end_idx {
log_debug!("No messages to compress (range invalid)");
animation_manager.clear_phase();
return Ok(false);
}
let all_user_msgs: Vec<&crate::session::Message> = session.session.messages
[start_idx + 1..=end_idx]
.iter()
.filter(|m| m.role == "user" && !m.content.trim().is_empty())
.collect();
let last_user_message = all_user_msgs.last().cloned().cloned();
let user_tasks_msgs: Vec<String> = {
let exclude_last = if all_user_msgs.len() > 1 {
&all_user_msgs[..all_user_msgs.len() - 1]
} else {
&[]
};
exclude_last
.iter()
.rev()
.take(4)
.rev()
.map(|m| {
let content = m.content.trim();
if content.len() > 200 {
format!(
"{}…",
&content[..content
.char_indices()
.take_while(|&(i, _)| i <= 200)
.last()
.map(|(i, _)| i)
.unwrap_or(200)]
)
} else {
content.to_string()
}
})
.collect()
};
let tokens_before = calculate_range_tokens(session, start_idx + 1, end_idx)?;
let messages_to_compress: Vec<crate::session::Message> =
session.session.messages[start_idx + 1..=end_idx].to_vec();
let learning_rx = operation_rx.clone();
let (should_compress, context_summary) = ask_ai_decision_and_summary(
session,
config,
&messages_to_compress,
operation_rx,
force,
target_ratio,
)
.await?;
if !should_compress {
log_debug!("AI decided compression not beneficial at this point");
animation_manager.clear_phase();
return Ok(false);
}
log_info!("AI decided to compress older conversation exchanges");
apply_compression(
session,
start_idx,
end_idx,
&context_summary,
tokens_before,
current_context_tokens,
user_tasks_msgs,
last_user_message,
config.use_long_system_cache,
)
.await?;
if config.learning.enabled {
let user_msg_count = session
.session
.messages
.iter()
.filter(|m| m.role == "user")
.count();
if user_msg_count >= config.learning.min_messages_for_intermediate {
let role = crate::config::get_thread_role().unwrap_or_default();
let project = session
.session
.info
.name
.split('-')
.nth(2)
.unwrap_or("unknown")
.to_string();
match crate::learning::extract::extract_and_store_lessons(
session,
config,
&role,
&project,
learning_rx,
)
.await
{
Ok(n) if n > 0 => log_debug!("Intermediate learning: {} lessons extracted", n),
_ => {}
}
}
}
if force {
session.session.info.consecutive_compressions = 0;
session.session.info.context_tokens_after_last_compression = 0;
log_debug!("Forced compression: cooldown counters reset (fresh session phase)");
} else {
session.session.info.consecutive_compressions += 1;
log_debug!(
"Exponential cooldown: consecutive_compressions now {} (next requires {:.0}% growth)",
session.session.info.consecutive_compressions,
(0.10 * 2.0_f64.powi(session.session.info.consecutive_compressions as i32)).min(1.0)
* 100.0
);
}
animation_manager.clear_phase();
Ok(true)
}
fn build_compression_prompt(
session: &ChatSession,
messages_to_compress: &[crate::session::Message],
force: bool,
target_ratio: f64,
) -> (String, String) {
let system_content = if force {
"You are a conversation compressor. \
The user has explicitly requested compression. You MUST produce a summary — do NOT refuse. \
Do not start with YES or NO. Just write the summary directly using the format below.\n\n\
## CRITICAL PRIORITIES\n\n\
**Priority 1 — CURRENT TASK**: The user's MOST RECENT task/request is what matters most. \
If the user pivoted to a new topic mid-conversation, the new topic IS the current intent. \
Older completed/abandoned tasks can be compressed to a single line each.\n\n\
**Priority 2 — RECENCY**: Messages marked [RECENT] represent the current state of work. \
Preserve them with the highest fidelity — quote or closely paraphrase. \
Older messages can be compressed aggressively.\n\n\
**Priority 3 — TOOL CALLS are secondary**: Summarize what was done in one line each.\n\n\
## SUMMARY FORMAT\n\n\
**SESSION CONTEXT** (1 sentence):\n\
Brief overview of the session — what brought us here. Keep it short.\n\n\
**CURRENT TASK** (1-2 sentences):\n\
What is the user working on RIGHT NOW? This is the most recent request — highlight it as the primary focus.\n\n\
**PROGRESS** (2-4 sentences):\n\
What was completed for the current task? What is in progress? What was the outcome?\n\n\
**ANALYSIS FINDINGS** (preserve conclusions — this prevents re-doing work):\n\
Capture key findings from code analysis, debugging, or investigation. Include:\n\
- What was discovered (root causes, patterns, behaviors)\n\
- Specific code locations and what was found there\n\
- Conclusions drawn from tool results\n\
This section is CRITICAL — without it, the AI will re-read the same files to rediscover the same things.\n\n\
**RECENT EXCHANGES** (preserve with high fidelity — the most recent [RECENT] messages):\n\
For each recent user/assistant pair: quote or closely paraphrase.\n\n\
**KEY ENTITIES** (preserve exactly — copy values verbatim):\n\
- Files/paths: exact file paths, line numbers, code locations\n\
- Names: identifiers, function names, variable names, config keys\n\
- Errors/issues: problems encountered and their status\n\
- Decisions: choices made with reasoning\n\n\
**NEXT STEPS** (1-2 sentences):\n\
What needs to happen next to continue the current task?\n\n\
**FILE CONTEXT — files to auto-inject after compression (IMPORTANT):**\n\
Files listed in <context> tags will be AUTO-READ from disk and injected verbatim into the compressed summary. \
This is how the session retains real file content across compressions without re-reading. \
Include any file the session is actively working on or needs to continue.\n\
<context>\n\
filepath:startline:endline\n\
</context>\n\
Rules: <context> tags required; one entry per line as filepath:N:N (no spaces); \
paths from project root; line numbers 1–10000; max 5 ranges; prioritize files being edited or analyzed.\n\n\
**CRITICAL KNOWLEDGE — survives all future compressions:**\n\
If there is critical knowledge that MUST survive future compressions \
(e.g., a key architectural decision, a non-obvious constraint, a user preference, \
analysis conclusions, root cause findings), write it in a <knowledge> tag. \
2-3 sentences MAX. Only include if truly critical — not routine progress.\n\
<knowledge>\n\
Your critical insight here (2-3 sentences max).\n\
</knowledge>\n\n\
"
} else {
"You are a conversation compressor. \
Your job is to produce a lossless summary of a conversation transcript so the session can continue \
without losing any important context.\n\n\
## CRITICAL PRIORITIES (read carefully before summarizing)\n\n\
**Priority 1 — CURRENT TASK**: The user's MOST RECENT task/request is what matters most. \
If the user pivoted to a new topic mid-conversation, the new topic IS the current intent. \
Older completed/abandoned tasks can be compressed to a single line each.\n\n\
**Priority 2 — RECENCY**: Messages marked [RECENT] represent the current state of work. \
Preserve them with the highest fidelity — quote or closely paraphrase. \
Older messages without [RECENT] can be compressed aggressively.\n\n\
**Priority 3 — TOOL CALLS are secondary**: Summarize what was done in one line each \
(e.g. 'read file X', 'ran shell command Y, got Z'). Never reproduce full tool output.\n\n\
## WHEN TO ANSWER YES vs NO\n\n\
Answer YES if there are older exchanges that can be compressed without losing information needed \
to continue. Answer NO only if the transcript is already minimal and nothing can be safely reduced.\n\n\
## SUMMARY FORMAT (use when answering YES)\n\n\
**SESSION CONTEXT** (1 sentence):\n\
Brief overview of the session — what brought us here. Keep it short.\n\n\
**CURRENT TASK** (1-2 sentences):\n\
What is the user working on RIGHT NOW? This is the most recent request — highlight it as the primary focus.\n\n\
**PROGRESS** (2-4 sentences):\n\
What was completed for the current task? What is in progress? What was the outcome?\n\n\
**ANALYSIS FINDINGS** (preserve conclusions — this prevents re-doing work):\n\
Capture key findings from code analysis, debugging, or investigation. Include:\n\
- What was discovered (root causes, patterns, behaviors)\n\
- Specific code locations and what was found there\n\
- Conclusions drawn from tool results\n\
This section is CRITICAL — without it, the AI will re-read the same files to rediscover the same things.\n\n\
**RECENT EXCHANGES** (preserve with high fidelity — the most recent [RECENT] messages):\n\
For each recent user/assistant pair: quote or closely paraphrase. Do not compress these.\n\n\
**KEY ENTITIES** (preserve exactly — copy values verbatim):\n\
- Files/paths: exact file paths, line numbers, code locations\n\
- Names: identifiers, function names, variable names, config keys\n\
- Errors/issues: problems encountered and their status\n\
- Decisions: choices made with reasoning\n\n\
**NEXT STEPS** (1-2 sentences):\n\
What needs to happen next to continue the current task?\n\n\
## RESPONSE FORMAT\n\n\
Start with YES or NO on the first line.\n\
If YES, follow immediately with the summary using the sections above:\n\n\
YES\n\
**SESSION CONTEXT**: ...\n\
**CURRENT TASK**: ...\n\
**PROGRESS**: ...\n\
**ANALYSIS FINDINGS**:\n\
- [finding 1]\n\
- [finding 2]\n\
**RECENT EXCHANGES**:\n\
- User: [question] → Assistant: [answer]\n\
**KEY ENTITIES**:\n\
- Files/paths: ...\n\
- Errors/issues: ...\n\
- Decisions: ...\n\
**NEXT STEPS**: ...\n\n\
**FILE CONTEXT — files to auto-inject after compression (IMPORTANT):**\n\
Files listed in <context> tags will be AUTO-READ from disk and injected verbatim into the compressed summary. \
This is how the session retains real file content across compressions without re-reading. \
Include any file the session is actively working on or needs to continue.\n\
<context>\n\
filepath:startline:endline\n\
</context>\n\
Rules: <context> tags required; one entry per line as filepath:N:N (no spaces); \
paths from project root; line numbers 1–10000; max 5 ranges; prioritize files being edited or analyzed.\n\n\
**CRITICAL KNOWLEDGE — survives all future compressions:**\n\
If there is critical knowledge that MUST survive future compressions \
(e.g., a key architectural decision, a non-obvious constraint, a user preference, \
analysis conclusions, root cause findings), write it in a <knowledge> tag. \
2-3 sentences MAX. Only include if truly critical — not routine progress.\n\
<knowledge>\n\
Your critical insight here (2-3 sentences max).\n\
</knowledge>\n\n\
If NO, respond with just: NO"
}
.to_string();
let total_msgs = messages_to_compress.len();
let recent_count = (total_msgs / 4).clamp(4, 8);
let recent_start = total_msgs.saturating_sub(recent_count);
let reduction_pct = ((1.0 - 1.0 / target_ratio) * 100.0) as u32;
let aggressiveness = if target_ratio >= 4.0 {
"very aggressive"
} else if target_ratio >= 2.0 {
"selective"
} else {
"gentle"
};
let mut user_content = format!(
"**COMPRESSION TARGET**: Reduce this transcript to ~{}% of its original size ({:.1}x compression). \
Be {} in what you preserve.\n\n\
**Conversation transcript to compress:**\n\
NOTE: Messages marked [RECENT] are the most recent and most important — preserve them with \
highest fidelity. [USER]/[ASSISTANT] pairs are primary signal; [TOOL CALL]/[TOOL RESULT] are \
secondary context.\n\n",
reduction_pct, target_ratio, aggressiveness,
);
if !session.critical_knowledge.is_empty() {
user_content
.push_str("**CRITICAL KNOWLEDGE (from prior compressions — MUST be preserved):**\n");
for (i, knowledge) in session.critical_knowledge.iter().enumerate() {
user_content.push_str(&format!("{}. {}\n", i + 1, knowledge));
}
user_content.push('\n');
}
let mut file_refs: Vec<String> = Vec::new();
for (idx, msg) in messages_to_compress.iter().enumerate() {
let recent = if idx >= recent_start { "[RECENT] " } else { "" };
match msg.role.as_str() {
"system" => {} "assistant" => {
let assistant_text = if msg
.content
.starts_with("## Conversation Summary [COMPRESSED:")
{
strip_file_context_from_summary(&msg.content)
} else {
msg.content.trim().to_string()
};
if !assistant_text.is_empty() {
user_content.push_str(&format!("{}[ASSISTANT]: {}\n", recent, assistant_text));
}
if let Some(calls) = msg.tool_calls.as_ref().and_then(|v| v.as_array()) {
for call in calls {
let name = call
.get("function")
.and_then(|f| f.get("name"))
.and_then(|n| n.as_str())
.unwrap_or("unknown");
let key_arg = call
.get("function")
.and_then(|f| f.get("arguments"))
.and_then(|a| {
let obj = if let Some(s) = a.as_str() {
serde_json::from_str::<serde_json::Value>(s).ok()
} else {
Some(a.clone())
};
obj.and_then(|o| {
for key in &[
"path", "paths", "query", "command", "pattern", "content",
"task",
] {
if let Some(v) = o.get(key) {
let s = match v {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Array(arr) => arr
.iter()
.filter_map(|x| x.as_str())
.take(2)
.collect::<Vec<_>>()
.join(", "),
_ => continue,
};
if !s.is_empty() {
let hint = if s.len() > 80 {
let end = s
.char_indices()
.map(|(i, _)| i)
.take_while(|&i| i <= 80)
.last()
.unwrap_or(0);
format!("{}\u{2026}", &s[..end])
} else {
s
};
return Some(hint);
}
}
}
None
})
})
.unwrap_or_default();
if key_arg.is_empty() {
user_content.push_str(&format!("{}[TOOL CALL]: {}\n", recent, name));
} else {
user_content.push_str(&format!(
"{}[TOOL CALL]: {}({})\n",
recent, name, key_arg
));
}
if let Some(args) = call.get("function").and_then(|f| f.get("arguments")) {
super::file_context::extract_file_refs_from_args(
name,
args,
&mut file_refs,
);
}
}
}
}
"tool" => {
let name = msg.name.as_deref().unwrap_or("tool");
let content = msg.content.trim();
let truncated = if content.len() > 1500 {
let head_end = content
.char_indices()
.map(|(i, _)| i)
.take_while(|&i| i <= 600)
.last()
.unwrap_or(0);
let tail_start = content
.char_indices()
.rev()
.map(|(i, _)| i)
.take_while(|&i| content.len() - i <= 900)
.last()
.unwrap_or(content.len());
if head_end < tail_start {
format!(
"{}\u{2026}[truncated]\u{2026}{}",
&content[..head_end],
&content[tail_start..]
)
} else {
content[..head_end].to_string()
}
} else {
content.to_string()
};
user_content.push_str(&format!(
"{}[TOOL RESULT: {}]: {}\n",
recent, name, truncated
));
}
_ => {
if !msg.content.trim().is_empty() {
user_content.push_str(&format!("{}[USER]: {}\n", recent, msg.content.trim()));
}
}
}
}
if !file_refs.is_empty() {
let merged_refs = super::file_context::merge_file_refs(&file_refs);
if !merged_refs.is_empty() {
user_content.push_str("\n**File references (can be re-read on demand):**\n");
for ref_str in merged_refs.iter().take(10) {
user_content.push_str(&format!("- {}\n", ref_str));
}
}
}
(system_content, user_content)
}
async fn call_ai_for_decision(
session: &mut ChatSession,
config: &Config,
system_content: String,
user_content: String,
operation_rx: tokio::sync::watch::Receiver<bool>,
) -> Result<String> {
let now = crate::utils::time::now_secs();
let messages = vec![
crate::session::Message {
role: "system".to_string(),
content: system_content,
timestamp: now,
cached: false,
cache_ttl: None,
tool_call_id: None,
name: None,
tool_calls: None,
images: None,
videos: None,
thinking: None,
id: None,
},
crate::session::Message {
role: "user".to_string(),
content: user_content,
timestamp: now,
cached: false,
cache_ttl: None,
tool_call_id: None,
name: None,
tool_calls: None,
images: None,
videos: None,
thinking: None,
id: None,
},
];
let decision_config = &config.compression.decision;
crate::log_debug!(
"Using compression decision model '{}' (max_tokens={}, temp={}, ignore_cost={})",
decision_config.model,
decision_config.max_tokens,
decision_config.temperature,
decision_config.ignore_cost
);
let params = crate::session::ChatCompletionWithValidationParams::new(
&messages,
&decision_config.model,
decision_config.temperature,
decision_config.top_p,
decision_config.top_k,
decision_config.max_tokens,
config,
)
.with_max_retries(decision_config.max_retries)
.with_chat_session(session)
.with_cancellation_token(operation_rx);
let response = crate::session::chat_completion_with_validation(params).await?;
if !decision_config.ignore_cost {
if let Some(cost) = response.exchange.usage.as_ref().and_then(|u| u.cost) {
session.session.info.total_cost += cost;
session.estimated_cost = session.session.info.total_cost;
log_debug!(
"Compression decision cost: ${:.5} (total: ${:.5})",
cost,
session.session.info.total_cost
);
}
} else {
log_debug!("Compression decision cost ignored (ignore_cost=true)");
}
Ok(response.content)
}
fn parse_ai_response(
session: &mut ChatSession,
config: &Config,
content: &str,
force: bool,
) -> Result<(bool, String)> {
let content = content.trim();
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
if force {
return Err(anyhow::anyhow!(
"AI returned empty summary during forced compression"
));
}
log_debug!("AI compression decision: NO (empty response)");
return Ok((false, String::new()));
}
extract_and_store_knowledge(session, config, content);
if force {
let summary = strip_knowledge_tags(content);
log_debug!("AI forced compression summary ({} chars)", summary.len());
return Ok((true, summary));
}
let first_line = lines[0].trim().to_uppercase();
let decision = first_line.contains("YES");
if decision {
let summary = if lines.len() > 1 {
let raw = lines[1..].join("\n").trim().to_string();
strip_knowledge_tags(&raw)
} else {
String::new()
};
log_debug!(
"AI compression decision: YES with summary ({} chars)",
summary.len()
);
Ok((true, summary))
} else {
log_debug!("AI compression decision: NO");
Ok((false, String::new()))
}
}
async fn ask_ai_decision_and_summary(
session: &mut ChatSession,
config: &Config,
messages_to_compress: &[crate::session::Message],
operation_rx: tokio::sync::watch::Receiver<bool>,
force: bool,
target_ratio: f64,
) -> Result<(bool, String)> {
let (system_content, user_content) =
build_compression_prompt(session, messages_to_compress, force, target_ratio);
let response_content =
call_ai_for_decision(session, config, system_content, user_content, operation_rx).await?;
parse_ai_response(session, config, &response_content, force)
}
#[allow(clippy::too_many_arguments)]
async fn apply_compression(
session: &mut ChatSession,
start_idx: usize,
end_idx: usize,
context_summary: &str,
tokens_before: u64,
current_context_tokens: u64,
user_tasks_msgs: Vec<String>,
last_user_message: Option<crate::session::Message>,
use_long_cache: bool,
) -> Result<()> {
let file_contexts = super::file_context::parse_file_contexts(context_summary);
let file_context_content = if !file_contexts.is_empty() {
crate::log_debug!(
"Compression: AI requested {} file context(s) for continuation",
file_contexts.len()
);
for (filepath, start, end) in &file_contexts {
crate::log_debug!(" - {} (lines {}-{})", filepath, start, end);
}
super::file_context::generate_file_context_content(&file_contexts)
} else {
String::new()
};
let compression_id = crate::mcp::core::plan::compression::get_compression_id()
.unwrap_or_else(|| "unknown".to_string());
let base_entry = format_compressed_entry_with_context(
context_summary,
&file_context_content,
compression_id,
);
let compressed_entry = if user_tasks_msgs.is_empty() {
base_entry
} else {
let user_tasks = user_tasks_msgs
.iter()
.enumerate()
.map(|(i, msg)| format!("{}. {}", i + 1, msg))
.collect::<Vec<_>>()
.join("\n");
format!("## USER TASKS\n{}\n\n{}", user_tasks, base_entry)
};
let compressed_entry = match crate::mcp::core::plan::core::get_current_plan_display().await {
Ok(plan_display) => format!(
"{}\n\nCurrent plan we are working on:\n<plan>\n{}\n</plan>",
compressed_entry,
plan_display.trim()
),
Err(_) => compressed_entry,
};
let tokens_after = estimate_tokens(&compressed_entry) as u64;
let (messages_removed, _) = session.remove_messages_in_range(start_idx, end_idx)?;
let supports_caching = crate::session::model_supports_caching(&session.session.info.model);
if supports_caching {
for (i, msg) in session.session.messages.iter_mut().enumerate() {
if i == start_idx {
msg.cached = true;
msg.cache_ttl = if use_long_cache {
Some("1h".to_string())
} else {
None
};
} else if msg.cached && msg.role != "system" {
msg.cached = false;
msg.cache_ttl = None;
}
}
}
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let summary_msg = crate::session::Message {
role: "assistant".to_string(),
content: compressed_entry,
timestamp: now,
cached: false,
name: Some("plan_compression".to_string()),
..Default::default()
};
session.session.messages.insert(start_idx + 1, summary_msg);
let user_msg = match last_user_message {
Some(mut msg) => {
msg.cached = supports_caching;
msg
}
None => crate::session::Message {
role: "user".to_string(),
content: "Please continue.".to_string(),
timestamp: now,
cached: supports_caching,
..Default::default()
},
};
session.session.messages.insert(start_idx + 2, user_msg);
log_debug!(
"Re-injected last user message after compressed summary (USER TASKS: {})",
user_tasks_msgs.len()
);
session.first_prompt_idx = Some(start_idx);
let tokens_saved = tokens_before.saturating_sub(tokens_after);
let metrics = crate::mcp::core::plan::compression::CompressionMetrics::new(
messages_removed,
tokens_saved,
tokens_before,
);
crate::session::chat::cost_tracker::CostTracker::display_compression_result(
"Conversation",
&metrics,
);
session.session.info.compression_stats.add_compression(
crate::session::CompressionKind::Conversation,
messages_removed,
tokens_saved,
);
let post_compression_tokens = current_context_tokens.saturating_sub(tokens_saved);
session.session.info.context_tokens_after_last_compression = post_compression_tokens as usize;
let estimated_future_turns = estimate_future_turns(session, tokens_saved as f64);
let api_calls_at_compression = session.session.info.total_api_calls;
session.session.info.predicted_turns_at_last_compression = estimated_future_turns;
session.session.info.api_calls_at_last_compression = api_calls_at_compression;
session.session.info.output_tokens_at_last_compression = session.session.info.output_tokens;
log_debug!(
"Compression cooldown set: post_compression_tokens={}, consecutive={}, requires ≥{:.0}% growth before next compression",
post_compression_tokens,
session.session.info.consecutive_compressions,
(0.10 * 2.0_f64.powi(session.session.info.consecutive_compressions as i32)).min(1.0) * 100.0
);
let _ = crate::session::logger::log_compression_point(
&session.session.info.name,
"conversation",
messages_removed,
tokens_saved,
&session.session.messages,
);
session.session.info.current_non_cached_tokens = 0;
session.session.info.current_total_tokens = 0;
session.session.info.last_cache_checkpoint_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
Ok(())
}
fn format_compressed_entry_with_context(
context: &str,
file_context: &str,
compression_id: String,
) -> String {
let mut sections = Vec::new();
if !context.is_empty() {
sections.push(context.to_string());
}
if !file_context.is_empty() {
sections.push(format!(
"**FILE CONTEXT** (auto-expanded):\n{}",
file_context
));
}
format!(
"## Conversation Summary [COMPRESSED: {}]\n\n{}",
compression_id,
sections.join("\n\n"),
)
}
fn strip_file_context_from_summary(summary: &str) -> String {
const SENTINEL: &str = "\n\n**FILE CONTEXT** (auto-expanded):";
if let Some(pos) = summary.find(SENTINEL) {
summary[..pos].trim().to_string()
} else {
summary.trim().to_string()
}
}
fn extract_and_store_knowledge(session: &mut ChatSession, config: &Config, content: &str) {
let knowledge_entries = parse_knowledge_tags(content);
if knowledge_entries.is_empty() {
return;
}
let retention_limit = config.compression.knowledge_retention;
for entry in &knowledge_entries {
log_debug!("Extracted critical knowledge: {}", entry);
session.critical_knowledge.push(entry.clone());
let _ = crate::session::logger::log_knowledge_entry(&session.session.info.name, entry);
}
if retention_limit > 0 && session.critical_knowledge.len() > retention_limit {
let drain_count = session.critical_knowledge.len() - retention_limit;
session.critical_knowledge.drain(..drain_count);
log_debug!(
"Trimmed critical knowledge to {} entries (retention limit)",
retention_limit
);
}
log_info!(
"Stored {} new critical knowledge entries ({} total)",
knowledge_entries.len(),
session.critical_knowledge.len()
);
}
fn parse_knowledge_tags(content: &str) -> Vec<String> {
let mut entries = Vec::new();
let mut search_from = 0;
while let Some(start) = content[search_from..].find("<knowledge>") {
let abs_start = search_from + start + "<knowledge>".len();
if let Some(end) = content[abs_start..].find("</knowledge>") {
let abs_end = abs_start + end;
let entry = content[abs_start..abs_end].trim().to_string();
if !entry.is_empty() {
entries.push(entry);
}
search_from = abs_end + "</knowledge>".len();
} else {
break;
}
}
entries
}
fn strip_knowledge_tags(content: &str) -> String {
let mut result = content.to_string();
while let Some(start) = result.find("<knowledge>") {
if let Some(end) = result[start..].find("</knowledge>") {
let abs_end = start + end + "</knowledge>".len();
result = format!("{}{}", &result[..start], &result[abs_end..]);
} else {
break;
}
}
result.trim().to_string()
}
fn find_compression_range(
messages: &[crate::session::Message],
first_prompt_idx: Option<usize>,
force: bool,
) -> Result<(usize, usize)> {
let system_idx = messages
.iter()
.position(|m| m.role == "system")
.unwrap_or(0);
let mut start_idx = match first_prompt_idx {
Some(idx) => {
let has_subsequent_user = messages.iter().skip(idx + 1).any(|m| m.role == "user");
if has_subsequent_user
&& idx > 0 && messages.get(idx - 1).is_some_and(|m| m.role == "user")
{
idx - 1
} else {
idx
}
}
None => {
let mut idx = system_idx + 1;
let has_welcome = idx < messages.len()
&& messages[idx].role == "assistant"
&& messages[idx].tool_calls.is_none();
if has_welcome {
idx += 1;
}
if has_welcome
&& idx < messages.len()
&& messages[idx].role == "user"
&& (idx + 1 >= messages.len() || messages[idx + 1].role == "assistant")
{
idx += 1;
}
idx
}
};
if let Some(anchor) = messages.get(start_idx) {
if anchor.role == "assistant" && anchor.tool_calls.is_some() {
let mut next = start_idx + 1;
while next < messages.len() && messages[next].role == "tool" {
next += 1;
}
if next > start_idx + 1 && next < messages.len() {
start_idx = next;
}
}
}
let end_idx = messages.len() - 1;
let min_conv = if force { 3 } else { 5 };
let conv_count = messages
.iter()
.skip(start_idx)
.filter(|m| m.role == "user" || m.role == "assistant")
.count();
if conv_count < min_conv {
return Ok((0, 0));
}
if start_idx >= end_idx {
return Ok((0, 0));
}
Ok((start_idx, end_idx))
}
fn calculate_range_tokens(session: &ChatSession, start_idx: usize, end_idx: usize) -> Result<u64> {
let mut total_tokens = 0u64;
if start_idx >= session.session.messages.len() {
return Err(anyhow::anyhow!("Invalid start_index in range"));
}
if end_idx >= session.session.messages.len() {
return Err(anyhow::anyhow!("Invalid end_index in range"));
}
for i in start_idx..=end_idx {
if let Some(message) = session.session.messages.get(i) {
let tokens = crate::session::estimate_message_tokens(message) as u64;
total_tokens += tokens;
}
}
Ok(total_tokens)
}
#[cfg(test)]
mod tests {
use super::{find_compression_range, strip_file_context_from_summary};
use crate::session::Message;
use serde_json::json;
fn msg(role: &str) -> Message {
Message {
role: role.to_string(),
content: String::new(),
..Default::default()
}
}
#[test]
fn extends_range_to_include_tool_results() {
let mut messages = Vec::new();
messages.push(msg("system"));
messages.push(msg("user")); let mut assistant1 = msg("assistant"); assistant1.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant1);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(end_idx, 9, "compress-all: end_idx = last message");
}
#[test]
fn extends_when_ending_on_assistant_with_tools() {
let mut messages = vec![
msg("system"), msg("user"), msg("assistant"), msg("user"), ];
let mut assistant_with_tools = msg("assistant"); assistant_with_tools.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant_with_tools);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(end_idx, 9, "compress-all: end_idx = last message");
}
#[test]
fn handles_multiple_assistants_with_tools() {
let mut messages = Vec::new();
messages.push(msg("system"));
messages.push(msg("user"));
let mut assistant1 = msg("assistant"); assistant1.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant1);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
let mut assistant2 = msg("assistant"); assistant2.tool_calls = Some(json!([
{"id": "call_2", "type": "function", "function": {"name": "tool2"}}
]));
messages.push(assistant2);
let mut tool2 = msg("tool"); tool2.tool_call_id = Some("call_2".to_string());
messages.push(tool2);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(end_idx, 10, "compress-all: end_idx = last message");
}
#[test]
fn start_boundary_must_not_orphan_initial_tool_sequence() {
let mut messages = Vec::new();
messages.push(msg("system"));
let mut assistant_with_tools = msg("assistant"); assistant_with_tools.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant_with_tools);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); let (start_idx, end_idx) = find_compression_range(&messages, Some(3), false).unwrap();
assert_eq!(
start_idx, 3,
"start_idx must equal first_prompt_idx (INCLUSIVE boundary)"
);
assert!(
end_idx >= 4,
"range should start compressing only after first_prompt_idx"
);
}
#[test]
fn anchor_with_tool_calls_must_advance_past_tool_results() {
let mut messages = Vec::new();
messages.push(msg("system"));
let mut assistant = msg("assistant"); assistant.tool_calls = Some(json!([
{"id": "call_A", "type": "function", "function": {"name": "view_signatures", "arguments": "{}"}},
{"id": "call_B", "type": "function", "function": {"name": "view", "arguments": "{}"}}
]));
messages.push(assistant);
let mut tool_a = msg("tool"); tool_a.tool_call_id = Some("call_A".to_string());
tool_a.name = Some("view_signatures".to_string());
messages.push(tool_a);
let mut tool_b = msg("tool"); tool_b.tool_call_id = Some("call_B".to_string());
tool_b.name = Some("view".to_string());
messages.push(tool_b);
messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert!(
start_idx >= 4,
"start_idx must advance past tool results to avoid orphaning tool_calls. Got start_idx={start_idx}"
);
assert!(
end_idx > start_idx,
"end_idx must be after start_idx for a valid range. Got start={start_idx}, end={end_idx}"
);
for msg in messages.iter().take(end_idx + 1).skip(start_idx + 1) {
if msg.role == "tool" {
if let Some(ref tc_id) = msg.tool_call_id {
assert!(
tc_id != "call_A" && tc_id != "call_B",
"Drain range must not include tool results for anchor's tool_calls. Found {tc_id}"
);
}
}
}
}
#[test]
fn anchor_with_tool_calls_and_first_prompt_idx() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("user"));
let mut assistant = msg("assistant"); assistant.tool_calls = Some(json!([
{"id": "call_X", "type": "function", "function": {"name": "shell", "arguments": "{}"}}
]));
messages.push(assistant);
let mut tool_x = msg("tool"); tool_x.tool_call_id = Some("call_X".to_string());
tool_x.name = Some("shell".to_string());
messages.push(tool_x);
messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, Some(2), false).unwrap();
assert_eq!(
start_idx, 1,
"anchor moves back to user at idx 1 — tool_calls asst at 2 is in drain range"
);
assert!(end_idx > start_idx, "must have valid range");
assert!(end_idx >= 3, "drain must include tool result at idx 3");
}
#[test]
fn bootstrap_preserved_when_first_prompt_idx_is_none_no_instructions() {
let messages = vec![
msg("system"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert!(
start_idx >= 2,
"start_idx must be >= 2 to protect system and welcome. Got {start_idx}"
);
assert!(end_idx > start_idx, "must have valid range");
assert!(
start_idx + 1 > 1,
"drain range must not include welcome message at index 1"
);
}
#[test]
fn bootstrap_preserved_when_first_prompt_idx_is_none_with_instructions() {
let messages = vec![
msg("system"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert!(
start_idx >= 3,
"start_idx must be >= 3 to protect system, welcome, and instructions. Got {start_idx}"
);
assert!(end_idx > start_idx, "must have valid range");
}
#[test]
fn bootstrap_preserved_system_message_never_in_range() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("assistant")); for _ in 0..10 {
messages.push(msg("user"));
messages.push(msg("assistant"));
}
let (start_none, _end_none) = find_compression_range(&messages, None, false).unwrap();
assert!(start_none > 0, "system message at 0 must not be start_idx");
let (start_some, end_some) = find_compression_range(&messages, Some(1), false).unwrap();
assert!(start_some >= 1, "start_idx must be >= 1");
assert!(end_some > start_some);
}
#[test]
fn bootstrap_with_tool_calls_in_welcome_response() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("assistant")); messages.push(msg("user"));
let mut assistant_tc = msg("assistant"); assistant_tc.tool_calls = Some(serde_json::json!([
{"id": "call_1", "type": "function", "function": {"name": "view", "arguments": "{}"}}
]));
messages.push(assistant_tc);
let mut tool = msg("tool"); tool.tool_call_id = Some("call_1".to_string());
messages.push(tool);
messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert!(
start_idx >= 5,
"start_idx must advance past bootstrap AND tool results. Got {start_idx}"
);
assert!(
end_idx > start_idx,
"must have valid range. Got start={start_idx}, end={end_idx}"
);
}
#[test]
fn calculate_range_tokens_must_match_removal_range() {
use crate::session::estimate_message_tokens;
let mut messages = Vec::new();
messages.push(msg("system"));
let mut msg1 = msg("user"); msg1.content = "x".repeat(100); messages.push(msg1);
let mut msg2 = msg("assistant"); msg2.content = "y".repeat(200); messages.push(msg2);
let mut msg3 = msg("user"); msg3.content = "z".repeat(300); messages.push(msg3);
let mut msg4 = msg("assistant"); msg4.content = "a".repeat(400); messages.push(msg4);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert!(start_idx < end_idx, "Range must be valid");
let expected_tokens: u64 = messages[(start_idx + 1)..=end_idx]
.iter()
.map(|m| estimate_message_tokens(m) as u64)
.sum();
let chunked_tokens: u64 = messages[start_idx..=end_idx]
.iter()
.map(|m| estimate_message_tokens(m) as u64)
.sum();
if expected_tokens != chunked_tokens {
let start_msg_tokens = estimate_message_tokens(&messages[start_idx]) as u64;
assert_eq!(
chunked_tokens - expected_tokens,
start_msg_tokens,
"The difference should be exactly the tokens in start_idx message"
);
}
}
#[test]
fn bug_proof_token_mismatch_causes_zero_savings() {
use crate::session::estimate_message_tokens;
let mut messages = Vec::new();
messages.push(msg("system"));
let mut large_msg = msg("user"); large_msg.content = "x".repeat(4000); messages.push(large_msg);
let mut small1 = msg("assistant"); small1.content = "y".repeat(40); messages.push(small1);
let mut small2 = msg("user"); small2.content = "z".repeat(40); messages.push(small2);
let mut small3 = msg("assistant"); small3.content = "a".repeat(40); messages.push(small3);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, None, false).unwrap();
assert_eq!(start_idx, 1); assert_eq!(end_idx, 8);
let tokens_counted_by_function: u64 = messages[(start_idx + 1)..=end_idx]
.iter()
.map(|m| estimate_message_tokens(m) as u64)
.sum();
let tokens_in_chunking: u64 = messages[start_idx..=end_idx]
.iter()
.map(|m| estimate_message_tokens(m) as u64)
.sum();
let large_msg_tokens = estimate_message_tokens(&messages[start_idx]) as u64;
println!("Large message tokens: {}", large_msg_tokens);
println!("Tokens counted by function: {}", tokens_counted_by_function);
println!("Tokens in chunking: {}", tokens_in_chunking);
assert_eq!(
tokens_in_chunking,
tokens_counted_by_function + large_msg_tokens,
"Chunking includes the large message that wasn't counted!"
);
assert!(
large_msg_tokens > tokens_counted_by_function,
"Large message ({}) should have more tokens than all small messages combined ({})",
large_msg_tokens,
tokens_counted_by_function
);
}
#[test]
fn bug_proof_insufficient_compression_triggers_loop() {
let full_context_tokens = 55_000u64;
let threshold = 50_000u64;
let non_compressible_tokens = 52_000u64; let compressible_tokens = 3_000u64;
let compression_ratio = 2.0;
assert_eq!(
full_context_tokens,
non_compressible_tokens + compressible_tokens
);
let compressed_tokens = (compressible_tokens as f64 / compression_ratio) as u64;
let tokens_after_compression = non_compressible_tokens + compressed_tokens;
assert!(
tokens_after_compression > threshold,
"Compression didn't bring context below threshold: {} > {}",
tokens_after_compression,
threshold
);
}
#[test]
fn bug_proof_compression_should_verify_benefit() {
let threshold = 50_000u64;
let system_tokens = 5_000u64;
let tools_tokens = 30_000u64;
let recent_4_messages_tokens = 20_000u64;
let old_compressible_tokens = 2_000u64;
let non_compressible = system_tokens + tools_tokens + recent_4_messages_tokens;
let full_context = non_compressible + old_compressible_tokens;
assert!(full_context > threshold, "Triggers compression");
let best_case_compressed = old_compressible_tokens / 10;
let best_case_result = non_compressible + best_case_compressed;
assert!(
best_case_result > threshold,
"Non-compressible portion alone exceeds threshold: {} > {}",
best_case_result,
threshold
);
}
#[test]
fn test_cooldown_prevents_premature_recompression() {
let tokens_after_compression: usize = 50_000;
let current_tokens_52k: usize = 52_000;
let min_required = (tokens_after_compression as f64 * 1.1) as usize;
assert!(
current_tokens_52k < min_required,
"Cooldown should block at 52k: {} < {} (need 10% growth)",
current_tokens_52k,
min_required
);
let current_tokens_54k: usize = 54_999;
assert!(
current_tokens_54k < min_required,
"Cooldown should still block at 54,999: {} < {}",
current_tokens_54k,
min_required
);
let current_tokens_55k: usize = 55_000;
assert!(
current_tokens_55k >= min_required,
"Cooldown should pass at 55k: {} >= {}",
current_tokens_55k,
min_required
);
let current_tokens_60k: usize = 60_000;
assert!(
current_tokens_60k >= min_required,
"Compression should be allowed at 60k: {} >= {}",
current_tokens_60k,
min_required
);
}
#[test]
fn test_cooldown_default_allows_first_compression() {
let tokens_after_compression: usize = 0; let current_tokens: usize = 60_000;
let cooldown_active = tokens_after_compression > 0
&& current_tokens < (tokens_after_compression as f64 * 1.1) as usize;
assert!(
!cooldown_active,
"First compression should be allowed when watermark is 0"
);
}
#[test]
fn test_cooldown_scales_with_post_compression_size() {
let small_watermark: usize = 20_000;
let small_threshold = (small_watermark as f64 * 1.1) as usize;
assert_eq!(small_threshold, 22_000, "Small: need 22k");
let medium_watermark: usize = 80_000;
let medium_threshold = (medium_watermark as f64 * 1.1) as usize;
assert_eq!(medium_threshold, 88_000, "Medium: need 88k");
let large_watermark: usize = 150_000;
let large_threshold = (large_watermark as f64 * 1.1) as usize;
assert_eq!(large_threshold, 165_000, "Large: need 165k");
let small_headroom = small_threshold - small_watermark;
let large_headroom = large_threshold - large_watermark;
assert!(
large_headroom > small_headroom,
"Larger contexts get more headroom: {} > {}",
large_headroom,
small_headroom
);
}
#[test]
fn test_estimate_physical_ceiling_is_headroom_over_growth() {
let current_tokens = 100_000.0_f64;
let compression_ratio = 2.5_f64;
let compressed = current_tokens / compression_ratio; let headroom = current_tokens - compressed;
let growth_rate = 5_000.0_f64; let ceiling = headroom / growth_rate; assert_eq!(ceiling, 12.0);
let ceiling_fast = headroom / 10_000.0_f64; assert!(ceiling_fast < ceiling, "faster growth → lower ceiling");
let compressed_aggressive = current_tokens / 4.0; let headroom_aggressive = current_tokens - compressed_aggressive; let ceiling_aggressive = headroom_aggressive / growth_rate; assert!(
ceiling_aggressive > ceiling,
"more compression → more headroom → higher ceiling"
);
}
#[test]
fn test_estimate_symmetry_is_api_calls_so_far() {
let api_calls = 20.0_f64;
let physical_ceiling = 30.0_f64;
let estimate = physical_ceiling.min(api_calls);
assert_eq!(
estimate, api_calls,
"symmetry wins when smaller than ceiling"
);
let api_calls_large = 50.0_f64;
let estimate2 = physical_ceiling.min(api_calls_large);
assert_eq!(
estimate2, physical_ceiling,
"ceiling wins when smaller than symmetry"
);
}
#[test]
fn test_estimate_zero_api_calls_caps_physical_ceiling() {
let current_tokens = 100_000.0_f64;
let compression_ratio = 2.5_f64;
let compressed = current_tokens / compression_ratio;
let headroom = current_tokens - compressed;
let growth_rate = (0.0_f64 / 1.0_f64).max(1.0); let raw_ceiling = headroom / growth_rate; assert_eq!(raw_ceiling, 60_000.0);
let estimate = raw_ceiling.min(100.0);
assert_eq!(estimate, 100.0, "cold-start ceiling capped at 100, not 60k");
assert!(estimate >= 5.0, "always at least 5");
}
#[test]
fn test_estimate_growth_rate_from_measured_output() {
let cases = [
(10.0_f64, 50_000.0_f64, 5_000.0_f64), (1.0, 3_000.0, 3_000.0), (0.0, 0.0, 1.0), ];
for (api_calls, output_tokens, expected) in cases {
let rate = (output_tokens / api_calls.max(1.0)).max(1.0);
assert_eq!(
rate, expected,
"api_calls={api_calls}, output={output_tokens}"
);
}
}
#[test]
fn test_self_tuning_direct_ratio_no_blending() {
let predicted = 20.0_f64;
let actual = 10.0_f64;
let ratio = (actual / predicted).clamp(0.25, 4.0);
assert_eq!(ratio, 0.5, "underestimated → ratio < 1");
let ratio2 = (30.0_f64 / 10.0_f64).clamp(0.25, 4.0);
assert_eq!(ratio2, 3.0, "overestimated → ratio > 1");
let ratio_extreme_low = (1.0_f64 / 100.0_f64).clamp(0.25, 4.0);
assert_eq!(ratio_extreme_low, 0.25, "extreme low clamped");
let ratio_extreme_high = (100.0_f64 / 1.0_f64).clamp(0.25, 4.0);
assert_eq!(ratio_extreme_high, 4.0, "extreme high clamped");
}
#[test]
fn test_self_tuning_neutral_when_no_prior_compression() {
let compressions = 0_usize;
let result = if compressions == 0 { 1.0_f64 } else { 0.5 };
assert_eq!(result, 1.0, "no prior data → neutral multiplier");
}
#[test]
fn test_estimate_end_to_end_symmetry_wins() {
let api_calls = 10.0_f64;
let output_tokens = 50_000.0_f64;
let current_tokens = 100_000.0_f64;
let compression_ratio = 2.5_f64;
let growth_rate = (output_tokens / api_calls).max(1.0); let headroom = current_tokens - current_tokens / compression_ratio; let ceiling = headroom / growth_rate; let estimate = ceiling.min(api_calls);
assert_eq!(ceiling, 12.0);
assert_eq!(estimate, 10.0, "symmetry (10) wins over ceiling (12)");
assert!(estimate >= 5.0);
}
#[test]
fn test_estimate_end_to_end_ceiling_wins() {
let api_calls = 30.0_f64;
let output_tokens = 300_000.0_f64;
let current_tokens = 100_000.0_f64;
let compression_ratio = 2.5_f64;
let growth_rate = (output_tokens / api_calls).max(1.0); let headroom = current_tokens - current_tokens / compression_ratio; let ceiling = headroom / growth_rate; let estimate = ceiling.min(api_calls);
assert_eq!(ceiling, 6.0);
assert_eq!(estimate, 6.0, "ceiling (6) wins over symmetry (30)");
assert!(estimate >= 5.0);
}
#[test]
fn test_estimate_incremental_growth_rate_after_compression() {
let total_api_calls: usize = 25;
let total_output_tokens: u64 = 210_000;
let api_calls_at_last_compression: usize = 20;
let output_tokens_at_last_compression: u64 = 200_000;
let calls_since = (total_api_calls - api_calls_at_last_compression).max(1) as f64; let output_since =
total_output_tokens.saturating_sub(output_tokens_at_last_compression) as f64; let incremental_rate = (output_since / calls_since).max(1.0); assert_eq!(
incremental_rate, 2_000.0,
"incremental rate reflects post-compression phase"
);
let lifetime_rate = (total_output_tokens as f64 / total_api_calls as f64).max(1.0); assert_eq!(
lifetime_rate, 8_400.0,
"lifetime rate is inflated by heavy early phase"
);
let current_tokens = 100_000.0_f64;
let compression_ratio = 2.5_f64;
let headroom = current_tokens - current_tokens / compression_ratio;
let ceiling_incremental = headroom / incremental_rate; let ceiling_lifetime = headroom / lifetime_rate;
assert!(
ceiling_incremental > ceiling_lifetime,
"incremental ceiling ({ceiling_incremental}) > lifetime ceiling ({ceiling_lifetime}): \
stale lifetime rate would trigger re-compression 4x too soon"
);
assert_eq!(ceiling_incremental, 30.0);
}
#[test]
fn test_estimate_growth_rate_falls_back_to_lifetime_before_first_compression() {
let compressions: usize = 0;
let total_api_calls = 10_usize;
let total_output_tokens: u64 = 50_000;
let api_calls_at_last_compression: usize = 0;
let output_tokens_at_last_compression: u64 = 0;
let growth_rate = if compressions > 0 {
let calls_since = (total_api_calls - api_calls_at_last_compression).max(1) as f64;
let output_since =
total_output_tokens.saturating_sub(output_tokens_at_last_compression) as f64;
(output_since / calls_since).max(1.0)
} else {
(total_output_tokens as f64 / total_api_calls.max(1) as f64).max(1.0)
};
assert_eq!(
growth_rate, 5_000.0,
"lifetime fallback: 50k / 10 calls = 5k/call"
);
}
#[test]
fn test_estimate_incremental_rate_single_call_since_compression() {
let total_api_calls: usize = 21;
let total_output_tokens: u64 = 205_000;
let api_calls_at_last_compression: usize = 20;
let output_tokens_at_last_compression: u64 = 200_000;
let calls_since = (total_api_calls - api_calls_at_last_compression).max(1) as f64; let output_since =
total_output_tokens.saturating_sub(output_tokens_at_last_compression) as f64; let rate = (output_since / calls_since).max(1.0);
assert_eq!(
rate, 5_000.0,
"single post-compression call measured correctly"
);
}
#[test]
fn test_estimate_incremental_rate_saturating_sub_prevents_underflow() {
let total_output_tokens: u64 = 1_000;
let output_tokens_at_last_compression: u64 = 5_000; let output_since = total_output_tokens.saturating_sub(output_tokens_at_last_compression); assert_eq!(output_since, 0, "saturating_sub: no underflow");
let rate = (output_since as f64 / 1.0_f64).max(1.0);
assert_eq!(rate, 1.0, "floors at 1.0, no panic");
}
#[test]
fn first_prompt_idx_never_changes_after_compression() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); for i in 0..8 {
messages.push(msg(if i % 2 == 0 { "assistant" } else { "user" }));
}
let first_prompt_idx = Some(1usize);
let (start1, end1) = find_compression_range(&messages, first_prompt_idx, false).unwrap();
assert_eq!(start1, 1, "start_idx must be first_prompt_idx");
assert!(end1 >= 4);
assert_eq!(
first_prompt_idx,
Some(1),
"first_prompt_idx must not change"
);
let mut after = Vec::new();
after.push(msg("system")); after.push(msg("user")); let mut comp = msg("assistant");
comp.name = Some("plan_compression".to_string());
after.push(comp); for i in 0..8 {
after.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (start2, end2) = find_compression_range(&after, first_prompt_idx, false).unwrap();
assert_eq!(
start2, 1,
"second compression also starts at original anchor"
);
assert!(end2 >= 4);
}
#[test]
fn old_compressed_summary_is_recompressed_on_next_cycle() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); let mut comp = msg("assistant");
comp.name = Some("plan_compression".to_string());
comp.content = "OLD_SUMMARY_V1".to_string();
messages.push(comp); for i in 0..8 {
messages.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert_eq!(start_idx, 1, "start at permanent anchor");
let drain_range = (start_idx + 1)..=end_idx;
assert!(
drain_range.contains(&2),
"Old compressed summary must be IN the drain range (re-compressed)"
);
let to_compress = &messages[start_idx + 1..=end_idx];
assert!(
to_compress
.iter()
.any(|m| m.content.contains("OLD_SUMMARY_V1")),
"Old summary must be included in messages sent to AI for re-compression"
);
}
#[test]
fn bootstrap_messages_before_start_idx_dont_inflate_compress_count() {
let mut comp = msg("assistant");
comp.name = Some("plan_compression".to_string());
let mut a5 = msg("assistant");
a5.tool_calls =
Some(json!([{"id": "c1", "type": "function", "function": {"name": "plan"}}]));
let mut t6 = msg("tool");
t6.tool_call_id = Some("c1".to_string());
let mut a7 = msg("assistant");
a7.tool_calls =
Some(json!([{"id": "c2", "type": "function", "function": {"name": "shell"}}]));
let mut t8 = msg("tool");
t8.tool_call_id = Some("c2".to_string());
let mut a9 = msg("assistant");
a9.tool_calls =
Some(json!([{"id": "c3", "type": "function", "function": {"name": "plan"}}]));
let mut t10 = msg("tool");
t10.tool_call_id = Some("c3".to_string());
let messages = vec![
msg("system"), msg("assistant"), msg("user"), msg("user"), comp, a5, t6, a7, t8, a9, t10, msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, Some(3), false).unwrap();
assert_eq!(start_idx, 3, "start at first_prompt_idx");
assert!(
end_idx >= 4,
"end_idx must cover at least the old summary at index 4, got {end_idx}"
);
assert!(
end_idx > start_idx,
"drain range must be non-empty: start={start_idx}, end={end_idx}"
);
}
#[test]
fn bootstrap_with_many_messages_compresses_all() {
let mut messages = vec![
msg("system"), msg("assistant"), msg("user"), msg("user"), ];
for i in 0..10 {
messages.push(msg(if i % 2 == 0 { "assistant" } else { "user" }));
}
let (start_idx, end_idx) = find_compression_range(&messages, Some(3), false).unwrap();
assert_eq!(start_idx, 2, "anchor must be instructions file at idx 2");
assert_eq!(end_idx, 13, "compress-all: end_idx = last message");
}
#[test]
fn triple_compression_always_one_summary() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); let mut comp = msg("assistant");
comp.name = Some("plan_compression".to_string());
comp.content = "SUMMARY_V2".to_string();
messages.push(comp); for i in 0..8 {
messages.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert_eq!(start_idx, 1);
assert!((start_idx + 1..=end_idx).contains(&2));
}
#[test]
fn anchor_message_never_included_in_drain_range() {
let messages = vec![
msg("system"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
let drain_start = start_idx + 1;
let drain_end = end_idx;
assert!(drain_start > start_idx, "Drain must start AFTER anchor");
assert!(drain_end >= drain_start, "Drain range must be valid");
assert!(
!(start_idx >= drain_start && start_idx <= drain_end),
"Anchor must NOT be in drain range"
);
let correct_range = (start_idx + 1)..=end_idx;
assert!(correct_range.contains(&(start_idx + 1)));
assert!(
!correct_range.contains(&start_idx),
"Anchor must NOT be in compression range"
);
}
#[test]
fn compression_preserves_message_count_consistency() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); for i in 2..=9 {
messages.push(msg(if i % 2 == 0 { "assistant" } else { "user" }));
}
let before_count = messages.len();
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
let messages_to_remove = end_idx - start_idx; let _expected_after = before_count - messages_to_remove + 1;
assert_eq!(
messages_to_remove,
(end_idx - (start_idx + 1) + 1),
"Removal count must match drain range"
);
assert!(
messages_to_remove < before_count,
"Must remove fewer messages than total"
);
}
#[test]
fn messages_to_compress_excludes_anchor_message() {
let mut messages = Vec::new();
messages.push(msg("system"));
let mut anchor = msg("user"); anchor.content = "ANCHOR_CONTENT_MUST_NOT_BE_SUMMARIZED".to_string();
messages.push(anchor);
messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert_eq!(start_idx, 1);
let correct = &messages[start_idx + 1..=end_idx];
let wrong = &messages[start_idx..=end_idx];
assert_eq!(correct.len(), end_idx - start_idx);
assert_eq!(wrong.len(), end_idx - start_idx + 1);
assert!(
!correct.iter().any(|m| m.content.contains("ANCHOR_CONTENT")),
"Anchor must NOT be in messages_to_compress"
);
assert!(
wrong.iter().any(|m| m.content.contains("ANCHOR_CONTENT")),
"Old bug: anchor WAS in messages_to_compress"
);
}
#[test]
fn calculate_range_tokens_matches_actual_removal() {
use crate::session::estimate_message_tokens;
let mut messages = Vec::new();
messages.push(msg("system"));
let mut anchor = msg("user");
anchor.content = "x".repeat(1000);
messages.push(anchor);
for i in 0..4 {
let mut m = msg(if i % 2 == 0 { "assistant" } else { "user" });
m.content = format!("Message {}", i);
messages.push(m);
}
for i in 0..4 {
messages.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
let mut tokens_removed = 0u64;
for msg in messages.iter().take(end_idx + 1).skip(start_idx + 1) {
tokens_removed += estimate_message_tokens(msg) as u64;
}
let mut tokens_with_anchor = 0u64;
for msg in messages.iter().take(end_idx + 1).skip(start_idx) {
tokens_with_anchor += estimate_message_tokens(msg) as u64;
}
let anchor_tokens = estimate_message_tokens(&messages[start_idx]) as u64;
assert_eq!(
tokens_with_anchor - tokens_removed,
anchor_tokens,
"Difference must be exactly the anchor message tokens"
);
}
#[test]
fn test_file_context_stripped_from_recompression_input() {
let summary_with_context = "## Conversation Summary [COMPRESSED: abc]\n\
Some important history here.\n\n\
**FILE CONTEXT** (auto-expanded):\n\
<content path=\"src/main.rs\">\nfn main() {}\n</content>";
let stripped = strip_file_context_from_summary(summary_with_context);
assert!(
!stripped.contains("FILE CONTEXT"),
"FILE CONTEXT sentinel must be stripped"
);
assert!(
!stripped.contains("fn main()"),
"File bytes must not appear in stripped output"
);
assert!(
stripped.contains("Some important history here."),
"Summary text before sentinel must be preserved"
);
}
#[test]
fn test_file_context_stripped_when_no_sentinel() {
let plain = "## Conversation Summary [COMPRESSED: abc]\nJust a summary.";
let stripped = strip_file_context_from_summary(plain);
assert_eq!(stripped, plain.trim());
}
#[test]
fn test_multiple_compression_cycles_anchor_never_moves() {
let first_prompt_idx = Some(1usize);
let mut messages: Vec<Message> = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); for i in 0..10 {
messages.push(msg(if i % 2 == 0 { "assistant" } else { "user" }));
}
let (s1, e1) = find_compression_range(&messages, first_prompt_idx, false).unwrap();
assert_eq!(s1, 1, "Cycle 1: start must be anchor (1)");
assert!(e1 > s1, "Cycle 1: end must be after anchor");
assert!(
e1 < messages.len(),
"Cycle 1: end must leave RECENT messages"
);
let drained: Vec<Message> = messages.drain(s1 + 1..=e1).collect();
assert!(!drained.is_empty(), "Cycle 1: must drain something");
let mut summary1 = msg("assistant");
summary1.content = "## Conversation Summary [COMPRESSED: c1]\nCycle 1 summary.".to_string();
messages.insert(s1 + 1, summary1);
for i in 0..10 {
messages.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (s2, e2) = find_compression_range(&messages, first_prompt_idx, false).unwrap();
assert_eq!(s2, 1, "Cycle 2: start must still be anchor (1)");
assert!(e2 > s2);
let drained2: Vec<Message> = messages.drain(s2 + 1..=e2).collect();
assert!(!drained2.is_empty(), "Cycle 2: must drain something");
let mut summary2 = msg("assistant");
summary2.content = "## Conversation Summary [COMPRESSED: c2]\nCycle 2 summary.".to_string();
messages.insert(s2 + 1, summary2);
for i in 0..10 {
messages.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (s3, e3) = find_compression_range(&messages, first_prompt_idx, false).unwrap();
assert_eq!(s3, 1, "Cycle 3: start must still be anchor (1)");
assert!(e3 > s3);
assert_eq!(s1, s2, "Anchor must not drift between cycles");
assert_eq!(s2, s3, "Anchor must not drift between cycles");
}
#[test]
fn compress_all_includes_last_message() {
let mut messages: Vec<Message> = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); for i in 0..20 {
messages.push(msg(if i % 2 == 0 { "assistant" } else { "user" }));
} messages.push(msg("user"));
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(end_idx, 22, "compress-all: end_idx must be last message");
}
#[test]
fn compress_all_with_tool_loop_after_user_prompt() {
let messages = vec![
msg("system"), msg("assistant"), msg("user"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("tool"), msg("tool"), msg("assistant"), msg("tool"), msg("assistant"), msg("assistant"), msg("tool"), msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, Some(3), false).unwrap();
assert_eq!(start_idx, 2, "anchor at instructions");
assert_eq!(end_idx, 14, "compress-all: end_idx = last message");
}
#[test]
fn test_recent_window_capped_at_8_for_large_session() {
let total_msgs: usize = 100;
let recent_count = (total_msgs / 4).clamp(4, 8);
assert_eq!(
recent_count, 8,
"RECENT window must be capped at 8 for large sessions"
);
let small = 12usize;
let recent_small = (small / 4).clamp(4, 8);
assert_eq!(recent_small, 4, "RECENT window must be at least 4");
let medium = 32usize;
let recent_medium = (medium / 4).clamp(4, 8);
assert_eq!(recent_medium, 8, "RECENT window must be 8 at 32 messages");
}
#[test]
fn compress_all_with_tool_cycles() {
let messages = vec![
msg("system"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(end_idx, 8, "compress-all: end_idx = last message");
let recent_users: Vec<Message> = messages[start_idx + 1..=end_idx]
.iter()
.rev()
.filter(|m| m.role == "user")
.take(2)
.cloned()
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
let mut after = messages.clone();
after.drain(start_idx + 1..=end_idx);
let mut summary = msg("assistant");
summary.content = "## Conversation Summary [COMPRESSED: test]".to_string();
after.insert(start_idx + 1, summary);
for (i, user_msg) in recent_users.iter().enumerate() {
after.insert(start_idx + 2 + i, user_msg.clone());
}
assert_eq!(after.len(), 5);
assert_eq!(after[0].role, "system");
assert_eq!(after[1].role, "user"); assert_eq!(after[2].role, "assistant"); assert_eq!(after[3].role, "user"); assert_eq!(after[4].role, "user"); }
#[test]
fn tool_loop_only_one_user_message_still_compresses() {
let mut messages = Vec::new();
messages.push(msg("system")); messages.push(msg("user"));
for i in 0..10 {
let mut asst = msg("assistant");
asst.tool_calls = Some(json!([
{"id": format!("call_{i}"), "type": "function", "function": {"name": "view", "arguments": "{}"}}
]));
messages.push(asst);
let mut tool = msg("tool");
tool.tool_call_id = Some(format!("call_{i}"));
messages.push(tool);
}
messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert!(
start_idx < end_idx,
"Tool-loop session must produce valid compression range, got ({start_idx}, {end_idx})"
);
assert_eq!(start_idx, 1, "start_idx must be first_prompt_idx = 1");
assert_eq!(
end_idx,
messages.len() - 1,
"compress-all: end_idx must be last message"
);
}
#[test]
fn test_triple_compression_only_one_summary_in_drain() {
let first_prompt_idx = Some(1usize);
let mut messages: Vec<Message> = Vec::new();
messages.push(msg("system")); messages.push(msg("user")); for i in 0..10 {
messages.push(msg(if i % 2 == 0 { "assistant" } else { "user" }));
}
for cycle in 1..=3usize {
for i in 0..8 {
messages.push(msg(if i % 2 == 0 { "user" } else { "assistant" }));
}
let (s, e) = find_compression_range(&messages, first_prompt_idx, false).unwrap();
let summaries_in_drain = messages[s + 1..=e]
.iter()
.filter(|m| {
m.content
.starts_with("## Conversation Summary [COMPRESSED:")
})
.count();
if cycle > 1 {
assert_eq!(
summaries_in_drain, 1,
"Cycle {}: drain range must contain exactly 1 prior summary, found {}",
cycle, summaries_in_drain
);
}
let _drained: Vec<Message> = messages.drain(s + 1..=e).collect();
let mut summary = msg("assistant");
summary.content =
format!("## Conversation Summary [COMPRESSED: c{cycle}]\nCycle {cycle} summary.");
messages.insert(s + 1, summary);
}
}
#[test]
fn bug_proof_invalid_range_must_set_cooldown() {
let messages = vec![
msg("system"), msg("user"), msg("assistant"), msg("user"), msg("assistant"), ];
let (start_idx, end_idx) = find_compression_range(&messages, Some(1), false).unwrap();
assert_eq!(
(start_idx, end_idx),
(0, 0),
"Must return (0,0) when not enough messages to compress"
);
let current_tokens: usize = 61_028;
let mut context_tokens_after_last_compression: usize = 19_442;
if start_idx >= end_idx {
context_tokens_after_last_compression = current_tokens;
}
let min_tokens_for_recompression =
(context_tokens_after_last_compression as f64 * 1.1) as usize;
assert!(
current_tokens < min_tokens_for_recompression,
"After setting cooldown to current_tokens={}, next check at same token count must be blocked (need {} for recompression)",
current_tokens,
min_tokens_for_recompression
);
let old_watermark: usize = 19_442;
let old_min = (old_watermark as f64 * 1.1) as usize;
assert!(
current_tokens >= old_min,
"Without fix, old watermark {} allows recompression at {} (min: {}) — the bug!",
old_watermark,
current_tokens,
old_min
);
}
#[test]
fn bug_proof_invalid_range_cooldown_allows_growth() {
let current_tokens: usize = 61_028;
let context_tokens_after_last_compression = current_tokens;
let grown_tokens: usize = 67_200; let min_required = (context_tokens_after_last_compression as f64 * 1.1) as usize;
assert!(
grown_tokens >= min_required,
"After 10%+ growth ({} → {}), compression should be allowed (min: {})",
current_tokens,
grown_tokens,
min_required
);
}
#[test]
fn knowledge_log_entry_uses_content_key() {
let entry = serde_json::json!({
"type": "KNOWLEDGE_ENTRY",
"timestamp": 0u64,
"content": "test knowledge"
});
assert!(
entry.get("content").is_some(),
"KNOWLEDGE_ENTRY must use 'content' key (not 'knowledge')"
);
assert!(
entry.get("knowledge").is_none(),
"'knowledge' key must not be present — persistence reads 'content'"
);
assert_eq!(entry["content"].as_str().unwrap(), "test knowledge");
}
}