webfetch_core/
compress.rs1use once_cell::sync::Lazy;
2use regex::Regex;
3
4static WHITESPACE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+").unwrap());
5static DECORATIVE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[▶→←▼▲•·◆◇◊✓✗✔✘‣⁃◦]").unwrap());
6
7pub fn compress_text(text: &str) -> String {
14 let clean = DECORATIVE_RE.replace_all(text, "");
15 let collapsed = WHITESPACE_RE.replace_all(&clean, " ");
16 collapsed.trim().to_string()
17}
18
19pub fn compress_block(text: &str) -> String {
22 let mut lines: Vec<String> = Vec::new();
23 let mut prev_blank = false;
24 for raw in text.lines() {
25 let line = compress_text(raw);
26 let blank = line.is_empty();
27 if blank && prev_blank {
28 continue;
29 }
30 lines.push(line);
31 prev_blank = blank;
32 }
33 lines.join("\n").trim().to_string()
34}
35
36pub fn estimate_tokens(text: &str) -> usize {
39 text.len() / 4
40}
41
42pub fn truncate_to_tokens(text: &str, max_tokens: usize) -> String {
45 let max_chars = max_tokens.saturating_mul(4);
46 if text.len() <= max_chars {
47 return text.to_string();
48 }
49 let mut end = max_chars;
50 while end > 0 && !text.is_char_boundary(end) {
51 end -= 1;
52 }
53 format!("{}\n…[truncated]", &text[..end])
54}