use crate::llm::{Completion, LLMClientDyn, TokenUsage};
use eyre::Result;
use rig::completion::Message;
use std::sync::Arc;
use tracing::warn;
const COMPACTION_MAX_OUTPUT_TOKENS: u64 = 8_192;
const COMPACTION_MAX_ATTEMPTS: usize = 2;
pub async fn compact_history(
client: Arc<dyn LLMClientDyn>,
model: &str,
system_prompt: &str,
history: &mut Vec<Message>,
prompt: &mut Message,
turn: usize,
usage: TokenUsage,
) -> Result<Option<TokenUsage>> {
if history.is_empty() {
return Ok(None);
}
match summarize_history(client, model, system_prompt, history.clone(), turn, usage).await {
Ok((response, summary)) => {
let continue_msg = Message::user("Continue from where you left off.".to_string());
*history = vec![
Message::user(format!(
"compacted conversation summary before turn {turn}:\n\n{summary}"
)),
continue_msg.clone(),
];
*prompt = continue_msg;
Ok(Some(response.usage))
}
Err(err) => {
warn!("compaction summarization failed ({err}), falling back to hard truncation");
let keep = (history.len() / 2).max(2);
let from = history.len().saturating_sub(keep);
let from = (from..history.len())
.find(|&i| matches!(history[i], Message::User { .. }))
.unwrap_or_else(|| history.len().saturating_sub(1));
*history = history.split_off(from);
if history.is_empty() {
history.push(Message::user("Continue.".to_string()));
}
*prompt = history.last().cloned().expect("history is non-empty");
Ok(None)
}
}
}
async fn summarize_history(
client: Arc<dyn LLMClientDyn>,
model: &str,
system_prompt: &str,
thread: Vec<Message>,
turn: usize,
usage: TokenUsage,
) -> Result<(crate::llm::CompletionResponse, String)> {
let mut last_text = None;
for attempt in 1..=COMPACTION_MAX_ATTEMPTS {
let response = client
.completion(Completion {
model: model.to_string(),
prompt: Message::user(build_compaction_prompt(turn, usage, attempt)),
preamble: Some(system_prompt.to_string()),
history: thread.clone(),
tools: Vec::new(),
tool_choice: None,
max_tokens: Some(COMPACTION_MAX_OUTPUT_TOKENS),
additional_params: None,
})
.await?;
let text = response.text();
if let Some(summary) = extract_tag(&text, "summary") {
return Ok((response, summary));
}
last_text = Some(text);
}
Err(eyre::eyre!(
"compaction output missing <summary> block after {COMPACTION_MAX_ATTEMPTS} attempts: {}",
last_text.unwrap_or_default()
))
}
fn build_compaction_prompt(turn: usize, usage: TokenUsage, attempt: usize) -> String {
format!(
"You are an expert software engineer summarizing a code exploration and review session \
to free up context window space.\n\n\
Your goal is to capture the full context needed to continue the exploration/review \
seamlessly, including what was just being done.\n\
Focus strictly on information vital for code analysis. Omit conversational filler, raw \
search/tool outputs, and large blocks of code.\n\
When constructing the summary, you MUST use the following exact markdown structure \
inside the tags:\n\
<summary>\n\
## Review Goal\n\
[1-2 sentences on the core objective of this code exploration/review. What are we \
looking for?]\n\
## Key Findings & Discoveries\n\
- [List major architectural insights, design patterns discovered, or critical \
issues/bugs identified so far.]\n\
- [Keep these concise but highly technical.]\n\
## Codebase Map (Relevant Files)\n\
- [List the critical files and directories that have been identified as relevant \
to the goal.]\n\
- [Briefly note why they are relevant (e.g., `src/auth/token.rs`: handles JWT \
validation and is where the bug likely resides).]\n\
## Explored Territory\n\
- [Briefly list what areas, files, or concepts have already been thoroughly \
investigated so we do not repeat work.]\n\
## Last Action & Immediate Context\n\
- [Describe the most recent tool calls and their results. What was the agent trying \
to find or verify? What did it just learn?]\n\
## Open Questions & Next Steps\n\
- [List any unresolved anomalies, pending review items, constraints to remember, \
or specific files that still need to be examined.]\n\
</summary>\n\
This compaction is happening before turn {turn}. Usage since the previous compaction: \
{} input, {} output, {} total tokens.\n\
CRITICAL: This is attempt {attempt} of {COMPACTION_MAX_ATTEMPTS}. Return EXACTLY ONE \
tagged block starting with <summary> and ending with </summary>. Do not include any \
text, pleasantries, or explanations outside of these tags. If a prior attempt failed, \
ensure you output ONLY the XML tags this time.",
usage.input_tokens, usage.output_tokens, usage.total_tokens
)
}
fn extract_tag(text: &str, tag: &str) -> Option<String> {
let start_tag = format!("<{tag}>");
let end_tag = format!("</{tag}>");
let start = text.find(&start_tag)? + start_tag.len();
let end = start + text[start..].find(&end_tag)?;
if end < start {
return None;
}
let content = text[start..end].trim();
if content.is_empty() {
None
} else {
Some(content.to_string())
}
}