use crate::ports::SummarizationPort;
use crate::types::{ChatMessage, Conversation, MessageRole};
use crate::budget::BudgetAllocation;
use crate::error::TokenOptError;
use crate::estimator::TokenEstimator;
use crate::history::summarizer::ExtractiveSummarizer;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompactionStrategy {
None,
Deduplication,
Lossless,
Extractive,
Paraphrasing,
LlmFallback,
}
#[derive(Debug, Clone)]
pub struct CompactionResult {
pub messages_removed: usize,
pub tokens_saved: u32,
pub summary_generated: bool,
pub strategy: CompactionStrategy,
}
#[derive(Debug)]
pub struct HistoryCompactor {
max_summary_tokens: u32,
}
impl HistoryCompactor {
#[must_use]
pub const fn new(max_summary_tokens: u32) -> Self {
Self { max_summary_tokens }
}
pub async fn compact(
&self,
conversation: &mut Conversation,
budget: &BudgetAllocation,
inference: Option<&dyn SummarizationPort>,
) -> Result<CompactionResult, TokenOptError> {
let initial_tokens = TokenEstimator::estimate_messages(&conversation.messages);
if !budget.requires_compaction {
return Ok(CompactionResult {
messages_removed: 0,
tokens_saved: 0,
summary_generated: false,
strategy: CompactionStrategy::None,
});
}
{
let dedup_result =
crate::history::dedup::deduplicate_adjacent(&conversation.messages, 0.7);
if dedup_result.merged_count > 0 {
conversation.messages = dedup_result.messages;
let after_dedup = TokenEstimator::estimate_messages(&conversation.messages);
if after_dedup <= budget.history {
return Ok(CompactionResult {
messages_removed: dedup_result.merged_count,
tokens_saved: initial_tokens.saturating_sub(after_dedup),
summary_generated: false,
strategy: CompactionStrategy::Deduplication,
});
}
}
}
{
let collapse_result =
crate::tools::chain_collapser::collapse_tool_chains(&conversation.messages);
if collapse_result.collapsed_count > 0 {
conversation.messages = collapse_result.messages;
let after_collapse = TokenEstimator::estimate_messages(&conversation.messages);
if after_collapse <= budget.history {
return Ok(CompactionResult {
messages_removed: collapse_result.collapsed_count,
tokens_saved: initial_tokens.saturating_sub(after_collapse),
summary_generated: false,
strategy: CompactionStrategy::Lossless,
});
}
}
}
for msg in &mut conversation.messages {
let trimmed = collapse_whitespace(&msg.content);
if trimmed.len() < msg.content.len() {
msg.content = trimmed;
}
}
let after_lossless = TokenEstimator::estimate_messages(&conversation.messages);
if after_lossless <= budget.history {
return Ok(CompactionResult {
messages_removed: 0,
tokens_saved: initial_tokens.saturating_sub(after_lossless),
summary_generated: false,
strategy: CompactionStrategy::Lossless,
});
}
let query: String = conversation
.messages
.iter()
.rev()
.find(|m| m.role == MessageRole::User)
.map_or_else(String::new, |m| m.content.clone());
let (pruned_messages, messages_removed) =
prune_by_relevance(conversation, budget.history, &query);
if !pruned_messages.is_empty() {
let summary =
ExtractiveSummarizer::summarize(&pruned_messages, self.max_summary_tokens);
if !summary.is_empty() {
let new_summary = if let Some(existing) = &conversation.summary {
format!("{existing} | {summary}")
} else {
summary
};
let compacted = crate::history::summary_compactor::compact_summary(
&new_summary,
self.max_summary_tokens,
inference,
)
.await;
conversation.summary = Some(truncate_summary(&compacted, self.max_summary_tokens));
}
}
let after_extractive = TokenEstimator::estimate_messages(&conversation.messages);
if after_extractive <= budget.history {
return Ok(CompactionResult {
messages_removed,
tokens_saved: initial_tokens.saturating_sub(after_extractive),
summary_generated: conversation.summary.is_some(),
strategy: CompactionStrategy::Extractive,
});
}
if let Some(port) = inference {
let pressure = f64::from(after_extractive) / f64::from(budget.history).max(1.0);
let current_turn = conversation.messages.len() / 2;
let (para_count, para_saved) = crate::history::paraphraser::paraphrase_old_messages(
&mut conversation.messages,
pressure,
current_turn,
port,
)
.await;
if para_count > 0 {
let after_para = TokenEstimator::estimate_messages(&conversation.messages);
if after_para <= budget.history {
return Ok(CompactionResult {
messages_removed,
tokens_saved: initial_tokens.saturating_sub(after_para).max(para_saved),
summary_generated: conversation.summary.is_some(),
strategy: CompactionStrategy::Paraphrasing,
});
}
}
}
if let Some(port) = inference {
if let Ok(result) = self.llm_summarize(port, &pruned_messages).await {
let new_summary = if let Some(existing) = &conversation.summary {
format!("{existing} | {result}")
} else {
result
};
let compacted = crate::history::summary_compactor::compact_summary(
&new_summary,
self.max_summary_tokens,
Some(port),
)
.await;
conversation.summary = Some(truncate_summary(&compacted, self.max_summary_tokens));
let after_llm = TokenEstimator::estimate_messages(&conversation.messages);
return Ok(CompactionResult {
messages_removed,
tokens_saved: initial_tokens.saturating_sub(after_llm),
summary_generated: true,
strategy: CompactionStrategy::LlmFallback,
});
}
}
let final_tokens = TokenEstimator::estimate_messages(&conversation.messages);
Ok(CompactionResult {
messages_removed,
tokens_saved: initial_tokens.saturating_sub(final_tokens),
summary_generated: conversation.summary.is_some(),
strategy: CompactionStrategy::Extractive,
})
}
async fn llm_summarize(
&self,
inference: &dyn SummarizationPort,
messages: &[ChatMessage],
) -> Result<String, TokenOptError> {
let content: String = messages
.iter()
.map(|m| format!("{}: {}", role_label(m.role), m.content))
.collect::<Vec<_>>()
.join("\n");
let prompt = format!(
"Summarize this conversation excerpt in 2-3 sentences. \
Preserve key facts, decisions, and context. Be concise.\n\n{content}"
);
inference
.summarize(
"You are a precise summarizer. Output only the summary, nothing else.",
&prompt,
)
.await
}
}
fn collapse_whitespace(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut prev_was_space = false;
for ch in text.chars() {
if ch.is_whitespace() {
if !prev_was_space {
result.push(' ');
prev_was_space = true;
}
} else {
result.push(ch);
prev_was_space = false;
}
}
result.trim().to_string()
}
fn prune_by_relevance(
conversation: &mut Conversation,
history_budget: u32,
query: &str,
) -> (Vec<ChatMessage>, usize) {
const PRESERVE_RECENT_TURNS: usize = 5;
let preserve_messages = PRESERVE_RECENT_TURNS * 2;
let messages = std::mem::take(&mut conversation.messages);
let (system_msgs, non_system_msgs): (Vec<_>, Vec<_>) = messages
.into_iter()
.partition(|m| m.role == MessageRole::System);
let total = non_system_msgs.len();
if total <= preserve_messages {
conversation.messages = system_msgs;
conversation.messages.extend(non_system_msgs);
return (Vec::new(), 0);
}
let split = total.saturating_sub(preserve_messages);
let older = &non_system_msgs[..split];
let recent = &non_system_msgs[split..];
let scores = crate::history::relevance::score_messages(query, older);
let recent_tokens = TokenEstimator::estimate_messages(recent);
let system_tokens = TokenEstimator::estimate_messages(&system_msgs);
let remaining_budget = history_budget.saturating_sub(recent_tokens + system_tokens);
let mut kept_older_indices: Vec<usize> = Vec::new();
let mut used_tokens = 0u32;
for &(idx, _score) in &scores {
let msg_tokens = TokenEstimator::estimate_tokens(&older[idx].content);
if used_tokens + msg_tokens <= remaining_budget {
kept_older_indices.push(idx);
used_tokens += msg_tokens;
}
}
kept_older_indices.sort_unstable();
let kept_set: std::collections::HashSet<usize> = kept_older_indices.iter().copied().collect();
let mut pruned: Vec<ChatMessage> = older
.iter()
.enumerate()
.filter(|(i, _)| !kept_set.contains(i))
.map(|(_, m)| m.clone())
.collect();
let messages_removed = pruned.len();
conversation.messages = system_msgs;
for idx in &kept_older_indices {
conversation.messages.push(older[*idx].clone());
}
conversation.messages.extend(recent.iter().cloned());
let kept_tokens = TokenEstimator::estimate_messages(&conversation.messages);
if kept_tokens > history_budget && conversation.messages.len() > 2 {
let all_msgs = std::mem::take(&mut conversation.messages);
let (sys, non_sys): (Vec<_>, Vec<_>) = all_msgs
.into_iter()
.partition(|m| m.role == MessageRole::System);
let keep = non_sys.len().min(2);
let extra_pruned = non_sys.len().saturating_sub(keep);
let (extra_removed, kept): (Vec<_>, Vec<_>) = non_sys
.into_iter()
.enumerate()
.partition(|(i, _)| *i < extra_pruned);
pruned.extend(extra_removed.into_iter().map(|(_, m)| m));
conversation.messages = sys;
conversation
.messages
.extend(kept.into_iter().map(|(_, m)| m));
return (pruned, messages_removed + extra_pruned);
}
(pruned, messages_removed)
}
fn truncate_summary(summary: &str, max_tokens: u32) -> String {
let current_tokens = TokenEstimator::estimate_tokens(summary);
if current_tokens <= max_tokens {
return summary.to_string();
}
let max_chars = (max_tokens as usize) * 4;
let truncated: String = summary.chars().take(max_chars.saturating_sub(3)).collect();
format!("{truncated}...")
}
const fn role_label(role: MessageRole) -> &'static str {
match role {
MessageRole::User => "User",
MessageRole::Assistant => "Assistant",
MessageRole::System => "System",
MessageRole::Tool => "Tool",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn collapse_whitespace_removes_extra_spaces() {
assert_eq!(collapse_whitespace(" hello world "), "hello world");
}
#[test]
fn collapse_whitespace_handles_newlines() {
assert_eq!(collapse_whitespace("hello\n\n world"), "hello world");
}
#[test]
fn truncate_summary_within_budget_unchanged() {
let text = "Short summary.";
assert_eq!(truncate_summary(text, 100), text);
}
#[test]
fn truncate_summary_over_budget() {
let text = "A".repeat(2000);
let result = truncate_summary(&text, 10);
assert!(result.len() < 50);
assert!(result.ends_with("..."));
}
#[test]
fn prune_preserves_system_messages() {
let mut conv = Conversation::with_system_prompt("System prompt");
for i in 0..20 {
conv.add_user_message(format!("Question {i}"));
conv.add_assistant_message(format!("Answer {i}"));
}
let system_count_before = conv
.messages
.iter()
.filter(|m| m.role == MessageRole::System)
.count();
let (_, _) = prune_by_relevance(&mut conv, 500, "Question");
let system_count_after = conv
.messages
.iter()
.filter(|m| m.role == MessageRole::System)
.count();
assert_eq!(system_count_before, system_count_after);
}
#[test]
fn prune_keeps_recent_messages() {
let mut conv = Conversation::new();
for i in 0..20 {
conv.add_user_message(format!("Question {i} with some extra padding text"));
conv.add_assistant_message(format!("Answer {i} with a longer response body here"));
}
let (pruned, removed) = prune_by_relevance(&mut conv, 100, "Question 19");
assert!(removed > 0);
assert_eq!(pruned.len(), removed);
let last = conv.messages.last().unwrap();
assert!(last.content.starts_with("Answer 19"));
}
}