use super::ExperimentalStats;
use crate::provider::{ContentPart, Message};
pub const RECENCY_WINDOW: usize = 8;
pub const SNIPPET_THRESHOLD_BYTES: usize = 4096;
pub const HEAD_BYTES: usize = 1024;
pub const TAIL_BYTES: usize = 1024;
pub fn snippet_stale_tool_outputs(messages: &mut [Message]) -> ExperimentalStats {
let mut stats = ExperimentalStats::default();
let total = messages.len();
if total <= RECENCY_WINDOW {
return stats;
}
let stale_upto = total - RECENCY_WINDOW;
for msg in messages[..stale_upto].iter_mut() {
for part in msg.content.iter_mut() {
let ContentPart::ToolResult { content, .. } = part else {
continue;
};
if content.len() < SNIPPET_THRESHOLD_BYTES {
continue;
}
if content.contains("[...elided") || content.starts_with("[DEDUP]") {
continue;
}
let original_len = content.len();
let head_end = floor_char_boundary(content, HEAD_BYTES);
let tail_start = ceil_char_boundary(content, original_len - TAIL_BYTES);
let elided = tail_start - head_end;
let mut rebuilt = String::with_capacity(HEAD_BYTES + TAIL_BYTES + 64);
rebuilt.push_str(&content[..head_end]);
rebuilt.push_str(&format!("\n[...elided {elided} bytes...]\n"));
rebuilt.push_str(&content[tail_start..]);
let saved = original_len.saturating_sub(rebuilt.len());
*content = rebuilt;
stats.snippet_hits += 1;
stats.total_bytes_saved += saved;
}
}
stats
}
fn floor_char_boundary(s: &str, mut idx: usize) -> usize {
idx = idx.min(s.len());
while idx > 0 && !s.is_char_boundary(idx) {
idx -= 1;
}
idx
}
fn ceil_char_boundary(s: &str, mut idx: usize) -> usize {
idx = idx.min(s.len());
while idx < s.len() && !s.is_char_boundary(idx) {
idx += 1;
}
idx
}
#[cfg(test)]
mod tests {
use super::*;
fn tool_msg(content: &str) -> Message {
Message {
role: crate::provider::Role::Tool,
content: vec![ContentPart::ToolResult {
tool_call_id: "x".into(),
content: content.into(),
}],
}
}
fn user_msg(text: &str) -> Message {
Message {
role: crate::provider::Role::User,
content: vec![ContentPart::Text { text: text.into() }],
}
}
#[test]
fn short_histories_are_untouched() {
let mut msgs = vec![tool_msg(&"x".repeat(10_000))];
let stats = snippet_stale_tool_outputs(&mut msgs);
assert_eq!(stats.snippet_hits, 0);
}
#[test]
fn recent_tool_outputs_are_protected() {
let mut msgs: Vec<Message> = (0..RECENCY_WINDOW)
.map(|i| user_msg(&format!("m{i}")))
.collect();
msgs.insert(0, tool_msg(&"x".repeat(10_000)));
let stats = snippet_stale_tool_outputs(&mut msgs);
assert_eq!(stats.snippet_hits, 1);
}
#[test]
fn utf8_boundaries_are_respected() {
let emoji = "🦀".repeat(4000); let mut msgs = vec![tool_msg(&emoji)];
msgs.extend((0..RECENCY_WINDOW + 2).map(|i| user_msg(&format!("m{i}"))));
let stats = snippet_stale_tool_outputs(&mut msgs);
assert_eq!(stats.snippet_hits, 1);
let ContentPart::ToolResult { content, .. } = &msgs[0].content[0] else {
panic!();
};
assert!(content.is_char_boundary(0));
assert!(content.contains("[...elided"));
}
#[test]
fn already_snipped_outputs_are_not_resnipped() {
let mut msgs = vec![tool_msg(&format!(
"{}\n[...elided 99 bytes...]\n{}",
"H".repeat(2000),
"T".repeat(2000)
))];
msgs.extend((0..RECENCY_WINDOW + 2).map(|i| user_msg(&format!("m{i}"))));
let before_len = match &msgs[0].content[0] {
ContentPart::ToolResult { content, .. } => content.len(),
_ => unreachable!(),
};
let stats = snippet_stale_tool_outputs(&mut msgs);
assert_eq!(stats.snippet_hits, 0);
let after_len = match &msgs[0].content[0] {
ContentPart::ToolResult { content, .. } => content.len(),
_ => unreachable!(),
};
assert_eq!(before_len, after_len);
}
}