use once_cell::sync::Lazy;
use regex::Regex;
static WHITESPACE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+").unwrap());
static DECORATIVE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[▶→←▼▲•·◆◇◊✓✗✔✘‣⁃◦]").unwrap());
pub fn compress_text(text: &str) -> String {
let clean = DECORATIVE_RE.replace_all(text, "");
let collapsed = WHITESPACE_RE.replace_all(&clean, " ");
collapsed.trim().to_string()
}
pub fn compress_block(text: &str) -> String {
let mut lines: Vec<String> = Vec::new();
let mut prev_blank = false;
for raw in text.lines() {
let line = compress_text(raw);
let blank = line.is_empty();
if blank && prev_blank {
continue;
}
lines.push(line);
prev_blank = blank;
}
lines.join("\n").trim().to_string()
}
pub fn estimate_tokens(text: &str) -> usize {
text.len() / 4
}
pub fn truncate_to_tokens(text: &str, max_tokens: usize) -> String {
let max_chars = max_tokens.saturating_mul(4);
if text.len() <= max_chars {
return text.to_string();
}
let mut end = max_chars;
while end > 0 && !text.is_char_boundary(end) {
end -= 1;
}
format!("{}\n…[truncated]", &text[..end])
}