use crate::llm::{Message, MessageContent, ToolResultContent};
use regex::Regex;
use std::sync::LazyLock;
use tiktoken_rs::{bpe_for_model, cl100k_base_singleton};
fn model_tokenizer_name(model: &str) -> &str {
model
.rsplit_once("::")
.map(|(_, name)| name)
.unwrap_or(model)
}
pub(crate) fn count_tokens(model: &str, text: &str) -> usize {
let model_name = model_tokenizer_name(model);
let bpe = bpe_for_model(model_name).unwrap_or_else(|_| cl100k_base_singleton());
bpe.encode_with_special_tokens(text).len()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum CompressionMode {
Auto,
Light,
Normal,
Aggressive,
}
pub(crate) fn compress(text: &str, max_bytes: usize, mode: CompressionMode) -> (String, f64) {
let original_len = text.len();
if original_len == 0 {
return (String::new(), 0.0);
}
let mode = resolve_mode(mode, original_len);
let mut result = text.to_string();
result = strip_ansi(&result);
if mode >= CompressionMode::Normal {
result = collapse_blank_lines(&result);
}
if mode >= CompressionMode::Normal {
result = dedup_repeated_lines(&result);
}
if mode >= CompressionMode::Normal {
result = compact_noisy_patterns(&result);
}
if mode >= CompressionMode::Aggressive && result.len() > max_bytes {
result = preserve_head_tail(&result, max_bytes);
}
let ratio = if original_len > 0 {
1.0 - (result.len() as f64 / original_len as f64)
} else {
0.0
};
(result, ratio)
}
fn resolve_mode(mode: CompressionMode, len: usize) -> CompressionMode {
if mode != CompressionMode::Auto {
return mode;
}
if len < 2048 {
CompressionMode::Light
} else if len < 16384 {
CompressionMode::Normal
} else {
CompressionMode::Aggressive
}
}
fn strip_ansi(text: &str) -> String {
let bytes = strip_ansi_escapes::strip(text);
String::from_utf8_lossy(&bytes).to_string()
}
fn collapse_blank_lines(text: &str) -> String {
static BLANK_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("\n{3,}").unwrap());
BLANK_RE.replace_all(text, "\n\n").to_string()
}
fn dedup_repeated_lines(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
if lines.len() < 3 {
return text.to_string();
}
let mut out: Vec<String> = Vec::with_capacity(lines.len());
let mut run_start = 0usize;
for i in 1..=lines.len() {
let run_ended =
i == lines.len() || lines[i] != lines[i - 1] || lines[i - 1].trim().is_empty();
if run_ended {
let run_len = i - run_start;
if run_len == 1 {
out.push(lines[run_start].to_string());
} else if run_len == 2 {
out.push(lines[run_start].to_string());
out.push(lines[run_start].to_string());
} else {
out.push(lines[run_start].to_string());
out.push(format!("[… {} more identical lines …]", run_len - 1));
}
run_start = i;
}
}
out.join("\n")
}
fn compact_noisy_patterns(text: &str) -> String {
static PATH_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?:/[^\s:{}\[\]]+){4,}").unwrap());
let text = PATH_RE.replace_all(text, |caps: ®ex::Captures| {
let path = &caps[0];
if path.len() <= 60 {
return path.to_string();
}
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
if parts.len() <= 3 {
return path.to_string();
}
format!("/{}/…/{}", parts[0], parts[parts.len() - 1])
});
static HASH_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\b([0-9a-fA-F]{7})[0-9a-fA-F]{33,}\b").unwrap());
let text = HASH_RE.replace_all(&text, "$1..");
text.to_string()
}
fn preserve_head_tail(text: &str, max_bytes: usize) -> String {
if text.len() <= max_bytes {
return text.to_string();
}
let target = max_bytes.max(512);
let half = target / 2;
let (head, tail) = partition_error_aware(text, half);
format!(
"… head …\n{}\n\n… tail …\n{}",
head.trim_end(),
tail.trim_start(),
)
}
fn partition_error_aware(text: &str, half: usize) -> (String, String) {
static ERROR_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)(\b(?:error|FAILED|panicked at|fatal:|FAIL|ABORTED)\b)|(^error\[)")
.unwrap()
});
let lines: Vec<&str> = text.lines().collect();
if lines.len() <= 2 {
let head: String = text.chars().take(half).collect();
let tail: String = text
.chars()
.rev()
.take(half)
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
return (head, tail);
}
let mut error_lines: Vec<&str> = Vec::new();
let mut head_lines: Vec<&str> = Vec::new();
let mut tail_lines: Vec<&str> = Vec::new();
let mut head_bytes = 0usize;
let mut tail_bytes = 0usize;
for &line in &lines {
if ERROR_RE.is_match(line) {
error_lines.push(line);
}
}
let error_budget = half / 3;
for &line in &error_lines {
if head_bytes >= error_budget {
break;
}
head_lines.push(line);
head_bytes += line.len() + 1;
}
for &line in &lines {
if head_bytes >= half {
break;
}
if ERROR_RE.is_match(line) {
continue; }
head_lines.push(line);
head_bytes += line.len() + 1;
}
for &line in lines.iter().rev() {
if tail_bytes >= half {
break;
}
tail_lines.push(line);
tail_bytes += line.len() + 1;
}
tail_lines.reverse();
(head_lines.join("\n"), tail_lines.join("\n"))
}
pub(super) fn compact_text(text: &str, max_bytes: usize, label: &str) -> String {
if text.len() <= max_bytes {
return text.to_string();
}
let (compressed, ratio) = compress(text, max_bytes, CompressionMode::Aggressive);
let body = if ratio < 0.05 {
preserve_head_tail(text, max_bytes)
} else {
compressed
};
format!(
"[{label} — compacted {}→{} bytes ({:.0}%); re-run tool for full output]\n{body}",
text.len(),
body.len(),
100.0 * (1.0 - body.len() as f64 / text.len() as f64),
)
}
pub(super) fn message_content_text(message: &Message) -> String {
match message {
Message::System { content } => content.clone(),
Message::User { content } | Message::Assistant { content, .. } => content
.iter()
.map(message_content_part_text)
.collect::<Vec<_>>()
.join("\n"),
}
}
fn message_content_part_text(content: &MessageContent) -> String {
match content {
MessageContent::Text { text } => text.clone(),
MessageContent::ToolCall {
name, arguments, ..
} => format!("{name} {arguments}"),
MessageContent::ToolResult { content, .. } => content
.iter()
.map(tool_result_content_text)
.collect::<Vec<_>>()
.join("\n"),
MessageContent::Reasoning { value } | MessageContent::Opaque { value } => {
value_to_text(value)
}
}
}
fn tool_result_content_text(content: &ToolResultContent) -> String {
match content {
ToolResultContent::Text { text } => text.clone(),
ToolResultContent::Opaque { value } => value_to_text(value),
}
}
fn value_to_text(value: &serde_json::Value) -> String {
if let Some(text) = value.as_str() {
return text.to_string();
}
if let Some(text) = value.get("text").and_then(serde_json::Value::as_str) {
return text.to_string();
}
serde_json::to_string(value).unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strip_ansi_removes_color_codes() {
let input = "\x1b[32mgreen\x1b[0m text";
assert_eq!(strip_ansi(input), "green text");
}
#[test]
fn collapse_blank_lines_squashes_gaps() {
let input = "a\n\n\n\nb\n\n\nc";
let result = collapse_blank_lines(input);
assert_eq!(result.matches('\n').count(), 4); }
#[test]
fn dedup_repeated_lines_collapses_runs() {
let input = "error\nwarning\nerror\nerror\nerror\nerror\nfatal";
let result = dedup_repeated_lines(input);
assert!(
result.contains("more identical lines"),
"expected count; got: {result}"
);
assert!(result.contains("fatal"));
}
#[test]
fn compact_noisy_patterns_shortens_paths_and_hashes() {
let input = "at /home/user/projects/rust/oy-cli/src/agent/subdir/another/compaction.rs:42\ncommit a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c";
let result = compact_noisy_patterns(input);
assert!(result.contains("…"));
assert!(!result.contains("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c"));
assert!(result.contains("a1b2c3d.."));
}
#[test]
fn compress_under_budget_returns_unchanged() {
let input = "short text";
let (result, ratio) = compress(input, 1024, CompressionMode::Normal);
assert_eq!(result, input);
assert_eq!(ratio, 0.0);
}
#[test]
fn compress_over_budget_preserves_head_tail() {
let input = "x".repeat(5000);
let (result, ratio) = compress(&input, 512, CompressionMode::Aggressive);
assert!(result.len() <= 1024); assert!(ratio > 0.5);
}
#[test]
fn error_lines_survive_compaction() {
let mut lines: Vec<String> = (0..200).map(|i| format!("info: line {i}")).collect();
lines.insert(50, "error: something broke".into());
lines.push("fatal: unrecoverable".into());
let input = lines.join("\n");
let (result, _) = compress(&input, 512, CompressionMode::Aggressive);
assert!(
result.contains("error: something broke"),
"error line was dropped"
);
assert!(
result.contains("fatal: unrecoverable"),
"fatal line was dropped"
);
}
}