use tiktoken_rs::{bpe_for_model, cl100k_base};
use super::transcript::StoredMessage;
fn model_tokenizer_name(model: &str) -> &str {
model
.rsplit_once("::")
.map(|(_, name)| name)
.unwrap_or(model)
}
pub(crate) fn count_tokens(model: &str, text: &str) -> usize {
let model_name = model_tokenizer_name(model);
if let Ok(bpe) = bpe_for_model(model_name) {
return bpe.encode_with_special_tokens(text).len();
}
cl100k_base()
.ok()
.map(|bpe| bpe.encode_with_special_tokens(text).len())
.unwrap_or_else(|| text.split_whitespace().count())
}
fn take_chars(text: &str, max_chars: usize) -> String {
text.chars().take(max_chars).collect()
}
fn take_last_chars(text: &str, max_chars: usize) -> String {
let mut chars = text.chars().rev().take(max_chars).collect::<Vec<_>>();
chars.reverse();
chars.into_iter().collect()
}
pub(super) fn compact_text(text: &str, model: &str, max_tokens: usize, label: &str) -> String {
if count_tokens(model, text) <= max_tokens {
return text.to_string();
}
let target_chars = max_tokens.saturating_mul(3).max(512);
let half = target_chars / 2;
let head = take_chars(text, half);
let tail = take_last_chars(text, half);
format!(
"[{label}] original ~{} tokens, {} bytes. Preserved head/tail.\n\n--- head ---\n{}\n\n--- tail ---\n{}",
count_tokens(model, text),
text.len(),
head.trim_end(),
tail.trim_start()
)
}
fn message_label(message: &StoredMessage) -> &'static str {
match message {
StoredMessage::User { .. } => "user",
StoredMessage::Summary { .. } => "summary",
StoredMessage::Assistant { .. } => "assistant",
StoredMessage::AssistantToolCalls { .. } => "assistant_tool_calls",
StoredMessage::Tool { .. } => "tool",
}
}
pub(super) fn message_content_text(message: &StoredMessage) -> String {
match message {
StoredMessage::User { content }
| StoredMessage::Summary { content }
| StoredMessage::Assistant { content, .. } => content.clone(),
StoredMessage::AssistantToolCalls { tool_calls, .. } => tool_calls
.iter()
.map(|call| format!("{} {}", call.fn_name, call.fn_arguments))
.collect::<Vec<_>>()
.join("\n"),
StoredMessage::Tool { content, .. } => content.clone(),
}
}
pub(super) fn deterministic_summary(
messages: &[StoredMessage],
model: &str,
max_tokens: usize,
) -> String {
let mut out = String::from(
"This summary was produced deterministically to fit the context budget. Prefer exact recent messages that follow over this summary.\n\n",
);
let per_message = (max_tokens / messages.len().max(1)).clamp(128, 1024);
for (idx, message) in messages.iter().enumerate() {
let text = message_content_text(message);
out.push_str(&format!(
"## {} {} (~{} tokens)\n",
idx + 1,
message_label(message),
count_tokens(model, &text)
));
match message {
StoredMessage::AssistantToolCalls { tool_calls, .. } => {
for call in tool_calls {
out.push_str(&format!(
"- tool call `{}` args: {}\n",
call.fn_name, call.fn_arguments
));
}
}
StoredMessage::Tool { call_id, .. } => {
out.push_str(&format!("call_id: `{call_id}`\n"));
out.push_str(&compact_text(
&text,
model,
per_message,
"old tool output summarized",
));
out.push('\n');
}
_ => {
out.push_str(&compact_text(
&text,
model,
per_message,
"old message summarized",
));
out.push('\n');
}
}
out.push('\n');
}
compact_text(&out, model, max_tokens, "deterministic transcript summary")
}
fn transcript_for_summary(messages: &[StoredMessage], model: &str, max_tokens: usize) -> String {
let mut out = String::new();
let per_message = (max_tokens / messages.len().max(1)).clamp(256, 2048);
for (idx, message) in messages.iter().enumerate() {
let text = message_content_text(message);
out.push_str(&format!(
"\n<message index=\"{}\" role=\"{}\">\n{}\n</message>\n",
idx + 1,
message_label(message),
compact_text(
&text,
model,
per_message,
"message pre-truncated for summarization"
)
));
}
compact_text(
&out,
model,
max_tokens,
"transcript pre-truncated for summarization",
)
}
pub(super) fn msg_reasoning_content(message: &StoredMessage) -> Option<String> {
match message {
StoredMessage::Assistant {
reasoning_content, ..
}
| StoredMessage::AssistantToolCalls {
reasoning_content, ..
} => reasoning_content
.as_ref()
.filter(|reasoning| !reasoning.trim().is_empty())
.cloned(),
_ => None,
}
}
pub(super) fn has_following_tool_response(messages: &[StoredMessage], call_id: &str) -> bool {
for message in messages {
match message {
StoredMessage::Tool { call_id: id, .. } if id == call_id => return true,
StoredMessage::Tool { .. } => continue,
_ => return false,
}
}
false
}
pub(super) fn compaction_prompt(
existing_summary: Option<&str>,
messages: &[StoredMessage],
model: &str,
) -> String {
let prior = existing_summary.unwrap_or("");
let transcript = transcript_for_summary(messages, model, 48_000);
format!(
r#"You are compacting a coding-agent transcript so future requests stay under a context limit.
Preserve facts needed to continue work:
- user goals, constraints, preferences, and explicit instructions
- exact filenames, commands, APIs, errors, test results, and config/env names
- decisions made and rationale when important
- design constraints, invariants, and rejected abstractions when they affect next actions
- tool results that affect next actions
- changes already made
- active todos/current plan/open questions
Prefer preserving human input over assistant prose. Drop filler, repeated logs, and irrelevant verbose output. Do not invent facts.
Return concise markdown with sections:
## User intent
## Constraints
## Repo facts
## Changes made
## Commands/results
## Current plan
## Open issues
Existing summary, if any:
{prior}
Transcript to compact:
{transcript}
"#
)
}