lean_ctx/proxy/
compress.rs1use crate::core::tokens::count_tokens;
2use crate::core::web::distill;
3
4const RESEARCH_PROSE_CAP: usize = 24_000;
7
8pub fn compress_tool_result(content: &str, tool_name: Option<&str>) -> String {
20 if content.trim().is_empty() || content.len() < 200 {
21 return content.to_string();
22 }
23
24 if is_cited_research_output(content) {
25 return content.to_string();
26 }
27
28 if extract_command_hint(content).is_none() && looks_like_prose(content) {
29 if let Some(out) = squeeze_research_prose(content) {
30 return out;
31 }
32 }
33
34 let cmd = infer_command(content, tool_name);
35 crate::shell::compress::engine::compress_if_beneficial(&cmd, content)
36}
37
38fn is_cited_research_output(content: &str) -> bool {
41 content.contains("· Retrieved: ") && content.contains("\nSource: ")
42}
43
44const CODE_SYMBOLS: &str = "{}<>;=|\\$`";
46
47fn looks_like_prose(content: &str) -> bool {
50 let sample: String = content.chars().take(4000).collect();
51 let total = sample.chars().count();
52 if total < 600 {
53 return false;
54 }
55 let total_f = total as f32;
56 let alpha = sample.chars().filter(|c| c.is_alphabetic()).count() as f32;
57 let spaces = sample.chars().filter(|c| *c == ' ').count() as f32;
58 let symbols = sample.chars().filter(|c| CODE_SYMBOLS.contains(*c)).count() as f32;
59
60 if alpha / total_f < 0.6 || spaces / total_f < 0.12 || symbols / total_f > 0.06 {
61 return false;
62 }
63 if sample.matches(['.', '!', '?']).count() < 4 {
64 return false;
65 }
66
67 let non_empty: Vec<&str> = sample.lines().filter(|l| !l.trim().is_empty()).collect();
68 if non_empty.is_empty() {
69 return false;
70 }
71 let avg_len =
72 non_empty.iter().map(|l| l.chars().count()).sum::<usize>() as f32 / non_empty.len() as f32;
73 avg_len >= 40.0
74}
75
76fn squeeze_research_prose(content: &str) -> Option<String> {
79 let before = count_tokens(content);
80 let squeezed = distill::squeeze_prose(content, RESEARCH_PROSE_CAP);
81 if squeezed.trim().is_empty() {
82 return None;
83 }
84 let after = count_tokens(&squeezed);
85 if after + 2 >= before {
86 return None;
87 }
88 Some(crate::core::protocol::append_savings_with_info(
89 &squeezed,
90 before,
91 after,
92 Some("research"),
93 None,
94 ))
95}
96
97fn infer_command(content: &str, tool_name: Option<&str>) -> String {
98 if let Some(cmd) = extract_command_hint(content) {
99 return cmd;
100 }
101
102 if let Some(name) = tool_name {
103 let nl = name.to_lowercase();
104 if nl.contains("bash") || nl.contains("shell") || nl.contains("terminal") {
105 return "shell".to_string();
106 }
107 if nl.contains("search") || nl.contains("grep") || nl.contains("find") {
108 return "grep".to_string();
109 }
110 }
111
112 String::new()
113}
114
115fn extract_command_hint(content: &str) -> Option<String> {
116 for line in content.lines().take(3) {
117 let trimmed = line.trim();
118 if let Some(cmd) = trimmed.strip_prefix("$ ") {
119 return Some(cmd.to_string());
120 }
121 if let Some(cmd) = trimmed.strip_prefix("% ") {
122 return Some(cmd.to_string());
123 }
124 }
125 None
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131
132 #[test]
133 fn short_content_unchanged() {
134 let short = "hello world";
135 assert_eq!(compress_tool_result(short, None), short);
136 }
137
138 #[test]
139 fn empty_content_unchanged() {
140 assert_eq!(compress_tool_result("", None), "");
141 assert_eq!(compress_tool_result(" ", None), " ");
142 }
143
144 #[test]
145 fn command_hint_extraction() {
146 assert_eq!(
147 extract_command_hint("$ cargo build\nCompiling foo"),
148 Some("cargo build".to_string())
149 );
150 assert_eq!(extract_command_hint("no prefix here"), None);
151 }
152
153 #[test]
154 fn tool_name_inference() {
155 assert_eq!(infer_command("some text", Some("bash_execute")), "shell");
156 assert_eq!(infer_command("some text", Some("search_files")), "grep");
157 assert_eq!(infer_command("some text", Some("unknown_tool")), "");
158 }
159
160 #[test]
161 fn cited_research_output_is_preserved_verbatim() {
162 let cited = format!(
163 "Rust is a language.\n\n---\nSource: Rust — https://x.com/a\n\
164 Site: x.com · Retrieved: 2026-06-06T00:00:00Z\n{}",
165 "Extra body line that would otherwise be touched. ".repeat(20)
166 );
167 assert_eq!(compress_tool_result(&cited, Some("ctx_url_read")), cited);
168 }
169
170 #[test]
171 fn prose_is_squeezed_and_deduped() {
172 let para = "Rust is a multi-paradigm systems programming language that \
173 emphasizes performance, type safety, and fearless concurrency, \
174 achieving memory safety without a garbage collector at runtime.";
175 let input = format!("{}\n", [para; 8].join("\n\n"));
177 assert!(input.len() > 600);
178 let out = compress_tool_result(&input, Some("web_fetch"));
179 assert_eq!(out.matches("fearless concurrency").count(), 1);
180 assert!(out.contains("performance, type safety"));
181 }
182
183 #[test]
184 fn code_output_is_not_treated_as_prose() {
185 let code = "fn main() {\n let x = vec![1, 2, 3];\n \
186 for i in &x { println!(\"{}\", i); }\n}\n"
187 .repeat(20);
188 assert!(!looks_like_prose(&code));
189 }
190
191 #[test]
192 fn shell_log_is_not_treated_as_prose() {
193 let log = "$ cargo build\n Compiling foo v0.1.0\n Finished dev\n".repeat(20);
194 assert!(!looks_like_prose(&log));
195 }
196}