use async_trait::async_trait;
use genai::chat::{ChatMessage, ChatOptions, ChatRequest};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use thiserror::Error;
use tirea_contract::runtime::inference::ContextCompactionMode;
use tirea_contract::runtime::tool_call::{SuspendedCall, ToolCallState, ToolResult};
use tirea_contract::thread::{Message, Role, Thread};
use tirea_state::State;
use crate::engine::token_estimator::estimate_message_tokens;
use crate::runtime::loop_runner::LlmExecutor;
use super::state::ContextState;
use super::transform::is_pending_approval_notice;
use super::{SUMMARY_MESSAGE_CLOSE, SUMMARY_MESSAGE_OPEN};
const SUMMARY_SYSTEM_PROMPT: &str = "You maintain a durable conversation summary for an agent runtime. Produce a concise but lossless working summary for future turns. Preserve user goals, constraints, preferences, decisions, completed work, important tool findings, file paths, identifiers, and unresolved follow-ups. Output plain text only; do not mention the summarization process.";
const SUMMARY_RESPONSE_MAX_TOKENS: u32 = 1024;
pub(super) const MIN_COMPACTION_GAIN_TOKENS: usize = 1024;
pub(super) const DEFAULT_ARTIFACT_COMPACT_THRESHOLD_TOKENS: usize = 2048;
const ARTIFACT_PREVIEW_MAX_CHARS: usize = 1600;
const ARTIFACT_PREVIEW_MAX_LINES: usize = 24;
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct CompactionPlan {
pub(super) start_index: usize,
pub(super) boundary_index: usize,
pub(super) boundary_message_id: String,
pub(super) covered_token_count: usize,
pub(super) covered_message_ids: Vec<String>,
pub(super) covered_tool_call_ids: Vec<String>,
}
#[derive(Debug, Clone)]
pub(super) struct SummaryPayload {
pub(super) previous_summary: Option<String>,
pub(super) transcript: String,
}
#[derive(Debug, Error)]
pub(super) enum ContextError {
#[error("summary model returned no text")]
EmptySummary,
#[error("failed to execute summary request: {0}")]
SummaryExecution(#[from] genai::Error),
}
#[async_trait]
pub(super) trait ContextSummarizer: Send + Sync {
async fn summarize(&self, payload: SummaryPayload) -> Result<String, ContextError>;
}
#[derive(Clone)]
pub(super) struct LlmContextSummarizer {
model: String,
executor: Arc<dyn LlmExecutor>,
chat_options: Option<ChatOptions>,
}
impl LlmContextSummarizer {
pub(super) fn new(
model: String,
executor: Arc<dyn LlmExecutor>,
chat_options: Option<ChatOptions>,
) -> Self {
Self {
model,
executor,
chat_options,
}
}
}
#[async_trait]
impl ContextSummarizer for LlmContextSummarizer {
async fn summarize(&self, payload: SummaryPayload) -> Result<String, ContextError> {
let prompt = render_summary_prompt(&payload);
let request = ChatRequest::new(vec![
ChatMessage::system(SUMMARY_SYSTEM_PROMPT),
ChatMessage::user(prompt),
]);
let mut options = self.chat_options.clone().unwrap_or_default();
options = options
.with_capture_usage(true)
.with_max_tokens(SUMMARY_RESPONSE_MAX_TOKENS);
let response = self
.executor
.exec_chat_response(&self.model, request, Some(&options))
.await?;
let summary = response
.first_text()
.map(str::trim)
.filter(|text| !text.is_empty())
.ok_or(ContextError::EmptySummary)?;
Ok(summary.to_string())
}
}
fn render_summary_prompt(payload: &SummaryPayload) -> String {
match payload.previous_summary.as_deref() {
Some(previous) if !previous.trim().is_empty() => format!(
"Update the cumulative summary with the new conversation span. Preserve all still-relevant facts from the existing summary, then merge in the new span.\n\n<existing-summary>\n{}\n</existing-summary>\n\n<new-conversation>\n{}\n</new-conversation>",
previous.trim(),
payload.transcript.trim(),
),
_ => format!(
"Summarize the following conversation span for continued execution.\n\n<conversation>\n{}\n</conversation>",
payload.transcript.trim(),
),
}
}
pub(super) fn estimate_arc_messages_tokens(messages: &[Arc<Message>]) -> usize {
messages
.iter()
.map(|message| estimate_message_tokens(message))
.sum()
}
pub(super) fn unsummarized_start_index(
messages: &[Arc<Message>],
state: &ContextState,
) -> Option<usize> {
let Some(boundary) = state.latest_boundary() else {
return Some(0);
};
Some(
messages
.iter()
.position(|message| {
message.id.as_deref() == Some(boundary.covers_through_message_id.as_str())
})
.map(|idx| idx + 1)
.unwrap_or(0),
)
}
fn protected_tail_start(
messages: &[Arc<Message>],
start_index: usize,
raw_suffix_messages: usize,
) -> usize {
if start_index >= messages.len() {
return messages.len();
}
let min_tail_start = messages.len().saturating_sub(raw_suffix_messages);
let last_user_index = messages
.iter()
.enumerate()
.skip(start_index)
.filter(|(_, message)| message.role == Role::User)
.map(|(idx, _)| idx)
.next_back();
let candidate = last_user_index.map_or(min_tail_start, |idx| idx.min(min_tail_start));
candidate.max(start_index)
}
fn compaction_search_end(
messages: &[Arc<Message>],
start_index: usize,
mode: ContextCompactionMode,
raw_suffix_messages: usize,
) -> usize {
match mode {
ContextCompactionMode::KeepRecentRawSuffix => {
protected_tail_start(messages, start_index, raw_suffix_messages)
}
ContextCompactionMode::CompactToSafeFrontier => messages.len(),
}
}
pub(super) fn find_compaction_plan(
messages: &[Arc<Message>],
state: &ContextState,
tool_states: &HashMap<String, ToolCallState>,
suspended_calls: &HashMap<String, SuspendedCall>,
mode: ContextCompactionMode,
raw_suffix_messages: usize,
) -> Option<CompactionPlan> {
let start_index = unsummarized_start_index(messages, state)?;
let search_end = compaction_search_end(messages, start_index, mode, raw_suffix_messages);
if search_end <= start_index {
return None;
}
let mut open_calls = HashSet::<String>::new();
let mut best_boundary = None;
for (idx, message) in messages
.iter()
.enumerate()
.skip(start_index)
.take(search_end - start_index)
{
if let Some(calls) = &message.tool_calls {
for call in calls {
open_calls.insert(call.id.clone());
}
}
if message.role == Role::Tool {
if let Some(call_id) = message.tool_call_id.as_deref() {
if open_calls.contains(call_id)
&& is_terminal_tool_result(call_id, message, tool_states, suspended_calls)
{
open_calls.remove(call_id);
}
}
}
let next_is_tool = messages
.get(idx + 1)
.is_some_and(|next| next.role == Role::Tool);
if !open_calls.is_empty() || next_is_tool {
continue;
}
if let Some(boundary_message_id) = message.id.clone() {
best_boundary = Some((idx, boundary_message_id));
}
}
let (boundary_index, boundary_message_id) = best_boundary?;
let covered_slice = &messages[start_index..=boundary_index];
let covered_token_count = estimate_arc_messages_tokens(covered_slice);
let covered_message_ids = covered_slice
.iter()
.filter_map(|message| message.id.clone())
.collect();
let covered_tool_call_ids = covered_slice
.iter()
.filter_map(|message| message.tool_call_id.clone())
.collect();
Some(CompactionPlan {
start_index,
boundary_index,
boundary_message_id,
covered_token_count,
covered_message_ids,
covered_tool_call_ids,
})
}
fn is_terminal_tool_result(
call_id: &str,
message: &Message,
tool_states: &HashMap<String, ToolCallState>,
suspended_calls: &HashMap<String, SuspendedCall>,
) -> bool {
if is_pending_approval_notice(message) || suspended_calls.contains_key(call_id) {
return false;
}
tool_states
.get(call_id)
.map(|state| state.status.is_terminal())
.unwrap_or(true)
}
pub(super) fn render_messages_for_summary(messages: &[Message]) -> String {
messages
.iter()
.map(render_message_for_summary)
.collect::<Vec<_>>()
.join("\n\n")
}
fn render_message_for_summary(message: &Message) -> String {
match message.role {
Role::System => format!("[system]\n{}", message.content.trim()),
Role::User => format!("[user]\n{}", message.content.trim()),
Role::Assistant => {
let mut body = String::new();
if !message.content.trim().is_empty() {
body.push_str(message.content.trim());
}
if let Some(tool_calls) = &message.tool_calls {
if !body.is_empty() {
body.push_str("\n\n");
}
body.push_str("tool_calls:\n");
for call in tool_calls {
body.push_str("- ");
body.push_str(&call.name);
body.push(' ');
body.push_str(&call.arguments.to_string());
body.push('\n');
}
body.truncate(body.trim_end_matches('\n').len());
}
format!("[assistant]\n{}", body.trim())
}
Role::Tool => format!(
"[tool:{}]\n{}",
message.tool_call_id.as_deref().unwrap_or("unknown"),
message.content.trim()
),
}
}
pub(super) fn now_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|duration| duration.as_millis() as u64)
.unwrap_or(0)
}
fn truncate_preview(text: &str, max_chars: usize, max_lines: usize) -> String {
let mut out = String::new();
for (line_idx, line) in text.lines().enumerate() {
if line_idx >= max_lines {
break;
}
if !out.is_empty() {
out.push('\n');
}
out.push_str(line);
if out.chars().count() >= max_chars {
break;
}
}
let mut chars = out.chars();
let mut truncated = String::new();
for _ in 0..max_chars {
let Some(ch) = chars.next() else {
return out.trim().to_string();
};
truncated.push(ch);
}
if chars.next().is_some() || text.lines().count() > max_lines {
truncated.push_str("\n…");
}
truncated.trim().to_string()
}
pub(super) fn build_artifact_preview(result: &ToolResult) -> String {
let mut sections = Vec::new();
sections.push(format!("tool: {}", result.tool_name));
sections.push(format!("status: {:?}", result.status).to_lowercase());
if let Some(message) = result
.message
.as_deref()
.map(str::trim)
.filter(|text| !text.is_empty())
{
sections.push(format!("message: {message}"));
}
if !result.data.is_null() {
let data_text =
serde_json::to_string_pretty(&result.data).unwrap_or_else(|_| result.data.to_string());
let preview = truncate_preview(
&data_text,
ARTIFACT_PREVIEW_MAX_CHARS,
ARTIFACT_PREVIEW_MAX_LINES,
);
if !preview.is_empty() {
sections.push(format!("data preview:\n{preview}"));
}
}
sections.join("\n")
}
pub(crate) fn trim_thread_to_latest_boundary(thread: &mut Thread) {
let state = match thread.rebuild_state() {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = %e, "skipping lazy trim: state rebuild failed");
return;
}
};
let ctx_value = match state.get(<ContextState as State>::PATH) {
Some(v) => v,
None => return,
};
let cm_state: ContextState = match serde_json::from_value(ctx_value.clone()) {
Ok(s) => s,
Err(_) => return,
};
let Some(boundary) = cm_state.latest_boundary() else {
return;
};
let Some(idx) = thread
.messages
.iter()
.position(|m| m.id.as_deref() == Some(boundary.covers_through_message_id.as_str()))
else {
return;
};
let summary = Message::internal_system(format!(
"{SUMMARY_MESSAGE_OPEN}\n{}\n{SUMMARY_MESSAGE_CLOSE}",
boundary.summary
));
thread.messages.drain(..=idx);
thread.messages.insert(0, Arc::new(summary));
}