use crate::config::Config;
use crate::session::chat::session::ChatSession;
use crate::session::estimate_tokens;
use crate::{log_debug, log_info};
use anyhow::Result;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
pub async fn should_check_compression(session: &mut ChatSession, config: &Config) -> (bool, f64) {
if !config.compression.adaptive_threshold {
log_debug!("Adaptive compression disabled (adaptive_threshold=false)");
return (false, 2.0);
}
if config.compression.pressure_levels.is_empty() {
log_debug!("No pressure levels configured - compression disabled");
return (false, 2.0);
}
let current_tokens = session.get_full_context_tokens(config).await;
log_debug!(
"Compression check: current_tokens={}, thresholds={:?}",
current_tokens,
config
.compression
.pressure_levels
.iter()
.map(|l| l.threshold)
.collect::<Vec<_>>()
);
let matching_level = config
.compression
.pressure_levels
.iter()
.rev() .find(|level| current_tokens >= level.threshold);
match matching_level {
Some(level) => {
log_debug!(
"✓ Threshold exceeded! Context tokens: {} → target compression: {:.1}x (threshold: {})",
current_tokens,
level.target_ratio,
level.threshold
);
let net_benefit = calculate_compression_net_benefit(
session,
config,
current_tokens,
level.target_ratio,
)
.await;
if net_benefit > 0.0 {
log_debug!(
"Cache-aware analysis: Net benefit ${:.5} → COMPRESS",
net_benefit
);
(true, level.target_ratio)
} else {
log_debug!(
"Cache-aware analysis: Net benefit ${:.5} → SKIP (would lose money)",
net_benefit
);
(false, 2.0)
}
}
None => {
log_debug!(
"No threshold exceeded (current: {}, lowest threshold: {})",
current_tokens,
config
.compression
.pressure_levels
.first()
.map(|l| l.threshold)
.unwrap_or(0)
);
(false, 2.0)
}
}
}
async fn calculate_compression_net_benefit(
session: &ChatSession,
config: &crate::config::Config,
current_tokens: usize,
compression_ratio: f64,
) -> f64 {
let total_tokens = current_tokens as f64;
let estimated_future_turns = estimate_future_turns(session);
let compressed_tokens = total_tokens / compression_ratio;
let decision_model = &config.compression.decision.model;
let session_model = &session.model;
let decision_pricing = get_model_pricing(decision_model, config);
let session_pricing = get_model_pricing(session_model, config);
let (decision_pricing, session_pricing) = match (decision_pricing, session_pricing) {
(Some(d), Some(s)) => (d, s),
_ => {
log_debug!(
"Cannot get pricing for models: decision='{}', session='{}' - skipping compression",
decision_model,
session_model
);
return -1.0; }
};
let total_api_calls = session.session.info.total_api_calls.max(1) as f64;
let avg_new_tokens_per_call =
(session.session.info.output_tokens as f64 / total_api_calls).max(2000.0);
let decision_prompt_tokens = estimate_tokens(
"Analyze the conversation history. Should older exchanges be compressed into a summary to save context space while preserving important information? Consider:\n\
- Are there repetitive or resolved topics that can be summarized?\n\
- Is there important context that must be preserved?\n\
- Would compression help focus on current topics?\n\n\
If YES, also provide a 2-3 sentence summary preserving logical structure (focus on what's needed to continue the conversation):\n\n\
[context chunks placeholder - ~500 tokens average]\n\n\
Respond with:\n\
'YES' followed by the summary on the next line, OR\n\
'NO' if compression is not beneficial."
) as f64;
let same_model = decision_model == session_model;
let decision_max_tokens = config.compression.decision.max_tokens;
let estimated_output_tokens = if decision_max_tokens > 0 {
(compressed_tokens as u64).min(decision_max_tokens as u64)
} else {
compressed_tokens as u64
};
let mut total_cost_no_compress = 0.0;
let mut accumulated_context = total_tokens;
for i in 0..estimated_future_turns as i32 {
let context_cost = if i == 0 {
session_pricing.calculate_cost(0, 0, accumulated_context as u64, 0)
} else {
session_pricing.calculate_cost(0, 0, accumulated_context as u64, 0)
};
total_cost_no_compress += context_cost;
accumulated_context += avg_new_tokens_per_call;
}
let compression_cost = if same_model {
decision_pricing.calculate_cost(
decision_prompt_tokens as u64, 0, (total_tokens - decision_prompt_tokens) as u64, estimated_output_tokens, )
} else {
decision_pricing.calculate_cost(
total_tokens as u64, 0, 0, estimated_output_tokens, )
};
let mut total_cost_with_compress = compression_cost;
let mut accumulated_context_compressed = compressed_tokens;
for i in 0..estimated_future_turns as i32 {
let context_cost = if i == 0 {
session_pricing.calculate_cost(accumulated_context_compressed as u64, 0, 0, 0)
} else {
session_pricing.calculate_cost(0, 0, accumulated_context_compressed as u64, 0)
};
total_cost_with_compress += context_cost;
accumulated_context_compressed += avg_new_tokens_per_call;
}
let net_benefit = total_cost_no_compress - total_cost_with_compress;
log_debug!(
"Compression analysis (REAL PRICING):\n \
Decision model: {} (input: ${:.2}/1M, output: ${:.2}/1M, cache_write: ${:.2}/1M, cache_read: ${:.2}/1M)\n \
Session model: {} (input: ${:.2}/1M, output: ${:.2}/1M, cache_write: ${:.2}/1M, cache_read: ${:.2}/1M)\n \
Models match: {} (cache reuse: {})\n \
Current: {:.0} tokens (decision prompt: ~{:.0} tokens)\n \
After compression: {:.0} tokens ({:.1}x ratio) - saves {:.0} tokens\n \
Avg new tokens/call: {:.0} (output_tokens={}, api_calls={})\n \
Future calls: {:.0}\n \
SCENARIO A (no compress): ${:.5}\n \
- Pays for growing context: {:.0} → {:.0} tokens over {} calls (all cached)\n \
SCENARIO B (compress): ${:.5}\n \
- Compression cost: ${:.5} (using {}, {} uncached, {} cached)\n \
- Pays for growing context: {:.0} → {:.0} tokens over {} calls\n \
Net benefit: ${:.5} → {}",
decision_model,
decision_pricing.input_price_per_1m,
decision_pricing.output_price_per_1m,
decision_pricing.cache_write_price_per_1m,
decision_pricing.cache_read_price_per_1m,
session_model,
session_pricing.input_price_per_1m,
session_pricing.output_price_per_1m,
session_pricing.cache_write_price_per_1m,
session_pricing.cache_read_price_per_1m,
if same_model { "YES" } else { "NO" },
if same_model { "YES" } else { "NO" },
total_tokens,
decision_prompt_tokens,
compressed_tokens,
compression_ratio,
total_tokens - compressed_tokens,
avg_new_tokens_per_call,
session.session.info.output_tokens,
session.session.info.total_api_calls,
estimated_future_turns,
total_cost_no_compress,
total_tokens,
total_tokens + (avg_new_tokens_per_call * (estimated_future_turns - 1.0)),
estimated_future_turns as i32,
total_cost_with_compress,
compression_cost,
decision_model,
if same_model { decision_prompt_tokens as u64 } else { total_tokens as u64 },
if same_model { (total_tokens - decision_prompt_tokens) as u64 } else { 0 },
compressed_tokens,
compressed_tokens + (avg_new_tokens_per_call * (estimated_future_turns - 1.0)),
estimated_future_turns as i32,
net_benefit,
if net_benefit > 0.0 {
"COMPRESS ✓"
} else {
"SKIP"
}
);
net_benefit
}
fn get_model_pricing(
model: &str,
_config: &crate::config::Config,
) -> Option<crate::providers::ModelPricing> {
let parts: Vec<&str> = model.split(':').collect();
if parts.len() != 2 {
log_debug!(
"Invalid model format: '{}' (expected 'provider:model')",
model
);
return None;
}
let provider_name = parts[0];
let model_name = parts[1];
let provider = crate::providers::ProviderFactory::create_provider(provider_name).ok()?;
provider.get_model_pricing(model_name)
}
fn estimate_future_turns(session: &ChatSession) -> f64 {
let current_api_calls = session.session.info.total_api_calls as f64;
if current_api_calls < 5.0 {
return 10.0; }
let session_start = session.session.info.created_at;
let current_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let session_duration_secs = (current_time - session_start).max(60); let session_duration_mins = session_duration_secs as f64 / 60.0;
let call_velocity = current_api_calls / session_duration_mins;
let continuation_factor = if session_duration_mins < 10.0 {
0.8 } else if session_duration_mins < 30.0 {
0.6 } else {
0.4 };
let estimated_remaining_mins = session_duration_mins * continuation_factor;
let velocity_decay = if call_velocity > 2.0 {
0.6 } else if call_velocity > 1.0 {
0.75 } else {
0.85 };
let estimated_remaining = call_velocity * estimated_remaining_mins * velocity_decay;
let tool_density = session.session.info.tool_calls as f64 / current_api_calls.max(1.0);
let has_plan = crate::mcp::dev::plan::core::has_active_plan();
let max_estimate = if has_plan || tool_density > 3.0 {
(current_api_calls * 3.0).min(200.0) } else {
(current_api_calls * 2.0).min(100.0) };
let final_estimate = estimated_remaining.clamp(5.0, max_estimate);
crate::log_debug!(
"Future calls estimation: current_calls={:.0}, velocity={:.2} calls/min, \
session_duration={:.1}min, continuation_factor={:.2}, \
estimated_remaining_mins={:.1}, velocity_decay={:.2}, \
tool_density={:.2}, has_plan={}, \
raw_estimate={:.1}, final_estimate={:.0} (bounds: 5.0-{:.0})",
current_api_calls,
call_velocity,
session_duration_mins,
continuation_factor,
estimated_remaining_mins,
velocity_decay,
tool_density,
has_plan,
estimated_remaining,
final_estimate,
max_estimate
);
final_estimate
}
pub async fn check_and_compress_conversation(
session: &mut ChatSession,
config: &Config,
) -> Result<bool> {
let (should_check, target_ratio) = should_check_compression(session, config).await;
if !should_check {
return Ok(false);
}
let animation_cancel = Arc::new(AtomicBool::new(false));
let animation_cancel_clone = animation_cancel.clone();
let current_cost = session.session.info.total_cost;
let max_threshold = config.max_session_tokens_threshold;
let current_context_tokens = session.get_full_context_tokens(config).await as u64;
let animation_task = tokio::spawn(async move {
let _ = crate::session::chat::animation::show_smart_animation(
animation_cancel_clone,
current_cost,
current_context_tokens,
max_threshold,
)
.await;
});
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
log_debug!("Compression check triggered - asking AI for decision and summary in one call");
let (start_idx, end_idx) = find_compression_range(&session.session.messages)?;
if start_idx >= end_idx {
log_debug!("No messages to compress (range invalid)");
animation_cancel.store(true, Ordering::SeqCst);
let _ = animation_task.await;
return Ok(false);
}
let tokens_before = calculate_range_tokens(session, start_idx, end_idx)?;
let messages_to_compress = &session.session.messages[start_idx..=end_idx];
let chunks = super::semantic_chunking::chunk_messages(messages_to_compress);
let target_tokens = (tokens_before as f64 / target_ratio) as usize;
let selected = super::semantic_chunking::select_chunks_within_budget(&chunks, target_tokens);
let (critical_text, reference_text, context_chunks) = group_chunks_by_type(&selected);
let preserved_text = if !critical_text.is_empty() && !reference_text.is_empty() {
format!("{}\n{}", critical_text, reference_text)
} else if !critical_text.is_empty() {
critical_text
} else {
reference_text
};
let (should_compress, context_summary) =
ask_ai_decision_and_summary(session, config, &context_chunks).await?;
if !should_compress {
log_debug!("AI decided compression not beneficial at this point");
animation_cancel.store(true, Ordering::SeqCst);
let _ = animation_task.await;
return Ok(false);
}
log_info!("AI decided to compress older conversation exchanges");
apply_compression(
session,
start_idx,
end_idx,
&preserved_text,
&context_summary,
tokens_before,
)?;
animation_cancel.store(true, Ordering::SeqCst);
let _ = animation_task.await;
Ok(true)
}
async fn ask_ai_decision_and_summary(
session: &mut ChatSession,
config: &Config,
context_chunks: &[&super::semantic_chunking::SemanticChunk],
) -> Result<(bool, String)> {
let mut decision_prompt = String::from(
"Analyze the conversation history. Should older exchanges be compressed into a summary to save context space while preserving important information?\n\n\
Consider:\n\
- Are there repetitive or resolved topics that can be summarized?\n\
- Is there important context that must be preserved?\n\
- Would compression help focus on current topics?\n\n"
);
if !context_chunks.is_empty() {
decision_prompt.push_str(
"If YES, provide:\n\
1. A 2-3 sentence summary preserving logical structure\n\
2. CRITICAL file contexts needed to continue work (if any)\n\n\
**Context chunks to analyze:**\n\n",
);
for chunk in context_chunks {
let relation_hint = match chunk.discourse_relation {
super::semantic_chunking::DiscourseRelation::Cause => "[REASONING]",
super::semantic_chunking::DiscourseRelation::Contrast => "[ALTERNATIVE]",
super::semantic_chunking::DiscourseRelation::Sequence => "[STEP]",
super::semantic_chunking::DiscourseRelation::Background => "[CONTEXT]",
super::semantic_chunking::DiscourseRelation::Elaboration => "[DETAIL]",
super::semantic_chunking::DiscourseRelation::None => "",
};
if relation_hint.is_empty() {
decision_prompt.push_str(&format!("- {}\n", chunk.content.trim()));
} else {
decision_prompt.push_str(&format!("{} {}\n", relation_hint, chunk.content.trim()));
}
}
decision_prompt.push_str(
"\n\n**Response format:**\n\
YES\n\
[Your 2-3 sentence summary here]\n\n\
**OPTIONAL: If specific file contexts are needed to continue work, include them:**\n\
<context>\n\
filename:startline:endline\n\
filename:startline:endline\n\
</context>\n\n\
**Format requirements for file contexts:**\n\
- Use <context> tags around file references\n\
- Each line: filepath:number:number (no spaces)\n\
- Use paths from project root (src/main.rs not ./src/main.rs)\n\
- Line numbers must be positive, start ≤ end ≤ 10000\n\
- Maximum 5 file ranges\n\
- Only include files CRITICAL for continuing the work\n\n\
OR respond with 'NO' if compression is not beneficial.",
);
} else {
decision_prompt.push_str("Respond with ONLY 'YES' to compress or 'NO' to keep as-is.");
}
let mut messages = session.session.messages.clone();
messages.push(crate::session::Message {
role: "user".to_string(),
content: decision_prompt,
timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
cached: false,
tool_call_id: None,
name: None,
tool_calls: None,
images: None,
thinking: None,
id: None,
});
let decision_config = &config.compression.decision;
crate::log_debug!(
"Using compression decision model '{}' (max_tokens={}, temp={}, ignore_cost={})",
decision_config.model,
decision_config.max_tokens,
decision_config.temperature,
decision_config.ignore_cost
);
let params = crate::session::ChatCompletionWithValidationParams::new(
&messages,
&decision_config.model,
decision_config.temperature,
decision_config.top_p,
decision_config.top_k,
decision_config.max_tokens,
config,
)
.with_max_retries(decision_config.max_retries)
.with_chat_session(session);
let response = crate::session::chat_completion_with_validation(params).await?;
let usage = response.exchange.usage;
let ignore_cost = decision_config.ignore_cost;
if !ignore_cost {
if let Some(ref u) = usage {
if let Some(cost) = u.cost {
session.session.info.total_cost += cost;
session.estimated_cost = session.session.info.total_cost;
log_debug!(
"Compression decision cost: ${:.5} (total: ${:.5})",
cost,
session.session.info.total_cost
);
}
}
} else {
log_debug!("Compression decision cost ignored (ignore_cost=true)");
}
let content = response.content.trim();
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
log_debug!("AI compression decision: NO (empty response)");
return Ok((false, String::new()));
}
let first_line = lines[0].trim().to_uppercase();
let decision = first_line.contains("YES");
if decision {
let summary = if lines.len() > 1 {
lines[1..].join("\n").trim().to_string()
} else {
String::new()
};
log_debug!(
"AI compression decision: YES with summary ({} chars)",
summary.len()
);
Ok((true, summary))
} else {
log_debug!("AI compression decision: NO");
Ok((false, String::new()))
}
}
fn apply_compression(
session: &mut ChatSession,
start_idx: usize,
end_idx: usize,
preserved_text: &str,
context_summary: &str,
tokens_before: u64,
) -> Result<()> {
let file_contexts = super::continuation::file_context::parse_file_contexts(context_summary);
let file_context_content = if !file_contexts.is_empty() {
crate::log_debug!(
"Compression: AI requested {} file context(s) for continuation",
file_contexts.len()
);
for (filepath, start, end) in &file_contexts {
crate::log_debug!(" - {} (lines {}-{})", filepath, start, end);
}
super::continuation::file_context::generate_file_context_content(&file_contexts)
} else {
String::new()
};
let compression_id = crate::mcp::dev::plan::compression::get_compression_id()
.unwrap_or_else(|| "unknown".to_string());
let compressed_entry = format_compressed_entry_with_context(
preserved_text,
context_summary,
&file_context_content,
compression_id,
);
let tokens_after = estimate_tokens(&compressed_entry) as u64;
let (messages_removed, had_cached) = session.remove_messages_in_range(start_idx, end_idx)?;
session.insert_compressed_knowledge(start_idx, compressed_entry, had_cached)?;
let tokens_saved = tokens_before.saturating_sub(tokens_after);
let metrics = crate::mcp::dev::plan::compression::CompressionMetrics::new(
messages_removed,
tokens_saved,
tokens_before,
);
crate::session::chat::cost_tracker::CostTracker::display_compression_result(
"Conversation",
&metrics,
);
session
.session
.info
.compression_stats
.add_conversation_compression(messages_removed, tokens_saved);
let _ = crate::session::logger::log_compression_point(
&session.session.info.name,
"conversation",
messages_removed,
tokens_saved,
);
session.session.info.current_non_cached_tokens = 0;
session.session.info.current_total_tokens = 0;
session.session.info.last_cache_checkpoint_time = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
Ok(())
}
fn format_chunks_verbatim(chunks: &[&super::semantic_chunking::SemanticChunk]) -> String {
chunks
.iter()
.map(|c| c.content.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("\n- ")
}
fn group_chunks_by_type(
selected: &[super::semantic_chunking::SemanticChunk],
) -> (
String,
String,
Vec<&super::semantic_chunking::SemanticChunk>,
) {
let critical: Vec<_> = selected
.iter()
.filter(|c| matches!(c.chunk_type, super::semantic_chunking::ChunkType::Critical))
.collect();
let reference: Vec<_> = selected
.iter()
.filter(|c| matches!(c.chunk_type, super::semantic_chunking::ChunkType::Reference))
.collect();
let context: Vec<_> = selected
.iter()
.filter(|c| matches!(c.chunk_type, super::semantic_chunking::ChunkType::Context))
.collect();
let critical_text = format_chunks_verbatim(&critical);
let reference_text = format_chunks_verbatim(&reference);
(critical_text, reference_text, context)
}
fn format_compressed_entry_with_context(
preserved: &str,
context: &str,
file_context: &str,
compression_id: String,
) -> String {
let mut sections = Vec::new();
if !preserved.is_empty() {
sections.push(format!(
"**CRITICAL** (preserved verbatim):\n- {}",
preserved
));
}
if !context.is_empty() {
sections.push(format!("**CONTEXT**: {}", context));
}
if !file_context.is_empty() {
sections.push(format!(
"**FILE CONTEXT** (auto-expanded):\n{}",
file_context
));
}
format!(
"## Conversation Summary [COMPRESSED: {}]\n\n{}\n\n\
**Compression Info**:\n\
- ID: `{}`\n\
- Type: Semantic compression with file context\n\
---\n\
*Compressed using importance-based semantic chunking with automatic file context expansion.*",
compression_id,
sections.join("\n\n"),
compression_id
)
}
#[allow(dead_code)]
fn format_compressed_entry(preserved: &str, context: &str, compression_id: String) -> String {
format_compressed_entry_with_context(preserved, context, "", compression_id)
}
fn find_compression_range(messages: &[crate::session::Message]) -> Result<(usize, usize)> {
let system_idx = messages
.iter()
.position(|m| m.role == "system")
.unwrap_or(0);
let conversation_indices: Vec<usize> = messages
.iter()
.enumerate()
.filter(|(_, m)| m.role == "user" || m.role == "assistant")
.map(|(idx, _)| idx)
.collect();
if conversation_indices.len() <= 4 {
return Ok((0, 0)); }
let preserve_count = 4;
let compress_count = conversation_indices.len() - preserve_count;
let start_idx = system_idx + 1;
let end_idx = conversation_indices[compress_count] - 1;
Ok((start_idx, end_idx))
}
fn calculate_range_tokens(session: &ChatSession, start_idx: usize, end_idx: usize) -> Result<u64> {
let mut total_tokens = 0u64;
if start_idx >= session.session.messages.len() {
return Err(anyhow::anyhow!("Invalid start_index in range"));
}
if end_idx >= session.session.messages.len() {
return Err(anyhow::anyhow!("Invalid end_index in range"));
}
for i in (start_idx + 1)..=end_idx {
if let Some(message) = session.session.messages.get(i) {
let tokens = crate::session::estimate_message_tokens(message) as u64;
total_tokens += tokens;
}
}
Ok(total_tokens)
}
#[cfg(test)]
mod tests {
use super::find_compression_range;
use crate::session::Message;
use serde_json::json;
fn msg(role: &str) -> Message {
Message {
role: role.to_string(),
content: String::new(),
..Default::default()
}
}
#[test]
#[allow(clippy::vec_init_then_push)]
fn extends_range_to_include_tool_results() {
let mut messages = Vec::new();
messages.push(msg("system"));
messages.push(msg("user")); let mut assistant1 = msg("assistant"); assistant1.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant1);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(
end_idx, 5,
"Must include all messages before first preserved conversation message"
);
}
#[test]
#[allow(clippy::vec_init_then_push)]
fn extends_when_ending_on_assistant_with_tools() {
let mut messages = Vec::new();
messages.push(msg("system"));
messages.push(msg("user")); messages.push(msg("assistant"));
messages.push(msg("user")); let mut assistant_with_tools = msg("assistant"); assistant_with_tools.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant_with_tools);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant"));
let (start_idx, end_idx) = find_compression_range(&messages).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(
end_idx, 5,
"Must include all messages (including tool results) before first preserved conversation message"
);
}
#[test]
#[allow(clippy::vec_init_then_push)]
fn handles_multiple_assistants_with_tools() {
let mut messages = Vec::new();
messages.push(msg("system"));
messages.push(msg("user"));
let mut assistant1 = msg("assistant"); assistant1.tool_calls = Some(json!([
{"id": "call_1", "type": "function", "function": {"name": "tool1"}}
]));
messages.push(assistant1);
let mut tool1 = msg("tool"); tool1.tool_call_id = Some("call_1".to_string());
messages.push(tool1);
let mut assistant2 = msg("assistant"); assistant2.tool_calls = Some(json!([
{"id": "call_2", "type": "function", "function": {"name": "tool2"}}
]));
messages.push(assistant2);
let mut tool2 = msg("tool"); tool2.tool_call_id = Some("call_2".to_string());
messages.push(tool2);
messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user")); messages.push(msg("assistant")); messages.push(msg("user"));
let (start_idx, end_idx) = find_compression_range(&messages).unwrap();
assert_eq!(start_idx, 1);
assert_eq!(
end_idx, 6,
"Must include all messages including tool results before first preserved"
);
}
}