use super::ExperimentalStats;
use crate::provider::{ContentPart, Message, Role};
pub const KEEP_LAST_MESSAGES: usize = 6;
pub fn prune_low_entropy(messages: &mut [Message]) -> ExperimentalStats {
let mut stats = ExperimentalStats::default();
let total = messages.len();
if total <= KEEP_LAST_MESSAGES {
return stats;
}
let eligible = total - KEEP_LAST_MESSAGES;
for msg in messages[..eligible].iter_mut() {
if msg.role != Role::Assistant {
continue;
}
for part in msg.content.iter_mut() {
let ContentPart::Text { text } = part else {
continue;
};
let original_len = text.len();
let pruned = prune_text(text);
if pruned.len() < original_len {
let saved = original_len - pruned.len();
*text = pruned;
stats.total_bytes_saved += saved;
stats.snippet_hits += 1;
}
}
}
stats
}
fn prune_text(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut in_code = false;
let mut blank_run = 0usize;
for line in input.split_inclusive('\n') {
let stripped_nl = line.trim_end_matches('\n');
let fence = stripped_nl.trim_start().starts_with("```");
if fence {
in_code = !in_code;
out.push_str(line);
blank_run = 0;
continue;
}
if in_code {
out.push_str(line);
continue;
}
let trimmed = stripped_nl.trim_end();
let is_noise = trimmed.is_empty()
|| trimmed.chars().all(|c| c.is_ascii_whitespace())
|| trimmed
.chars()
.all(|c| matches!(c, '.' | '…' | '-' | '_' | '*' | '='));
if is_noise {
blank_run += 1;
if blank_run <= 1 {
out.push('\n');
}
continue;
}
blank_run = 0;
let collapsed = collapse_spaces(trimmed);
out.push_str(&collapsed);
if line.ends_with('\n') {
out.push('\n');
}
}
out
}
fn collapse_spaces(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut prev_space = false;
for ch in s.chars() {
if ch == ' ' {
if !prev_space {
out.push(' ');
}
prev_space = true;
} else {
out.push(ch);
prev_space = false;
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn asst(t: &str) -> Message {
Message {
role: Role::Assistant,
content: vec![ContentPart::Text { text: t.into() }],
}
}
fn user(t: &str) -> Message {
Message {
role: Role::User,
content: vec![ContentPart::Text { text: t.into() }],
}
}
#[test]
fn recent_messages_are_untouched() {
let noisy = "a b\n\n\n\nc";
let mut msgs = vec![asst(noisy)];
let stats = prune_low_entropy(&mut msgs);
assert_eq!(stats.total_bytes_saved, 0);
}
#[test]
fn code_fences_are_preserved() {
let content = "text with spaces\n```\nfn foo() {}\n```\ntail";
let mut msgs = vec![asst(content)];
for i in 0..KEEP_LAST_MESSAGES + 1 {
msgs.push(user(&format!("q{i}")));
}
prune_low_entropy(&mut msgs);
let ContentPart::Text { text } = &msgs[0].content[0] else {
panic!();
};
assert!(text.contains("text with spaces"));
assert!(text.contains("fn foo() {}"));
}
#[test]
fn user_text_never_pruned() {
let noisy = "a b\n\n\n\nc";
let mut msgs = vec![user(noisy)];
for i in 0..KEEP_LAST_MESSAGES + 1 {
msgs.push(asst(&format!("r{i}")));
}
let stats = prune_low_entropy(&mut msgs);
assert_eq!(stats.total_bytes_saved, 0);
let ContentPart::Text { text } = &msgs[0].content[0] else {
panic!();
};
assert_eq!(text, noisy);
}
#[test]
fn ellipsis_only_lines_removed() {
let content = "real content\n...\n...\nmore content\n";
let mut msgs = vec![asst(content)];
for i in 0..KEEP_LAST_MESSAGES + 1 {
msgs.push(user(&format!("q{i}")));
}
prune_low_entropy(&mut msgs);
let ContentPart::Text { text } = &msgs[0].content[0] else {
panic!();
};
assert!(!text.contains("..."));
assert!(text.contains("real content"));
assert!(text.contains("more content"));
}
}