lean_ctx/tools/
ctx_compress_memory.rs1use std::path::Path;
2
3use crate::core::tokens::count_tokens;
4
5pub fn handle(path: &str) -> String {
6 let content = match std::fs::read_to_string(path) {
7 Ok(c) => c,
8 Err(e) => return format!("ERROR: Cannot read {path}: {e}"),
9 };
10
11 let original_tokens = count_tokens(&content);
12
13 let backup_path = build_backup_path(path);
14 if !Path::new(&backup_path).exists() {
15 if let Err(e) = std::fs::write(&backup_path, &content) {
16 return format!("ERROR: Cannot create backup {backup_path}: {e}");
17 }
18 }
19
20 let compressed = compress_memory_file(&content);
21 let compressed_tokens = count_tokens(&compressed);
22
23 if let Err(e) = std::fs::write(path, &compressed) {
24 return format!("ERROR: Cannot write compressed file: {e}");
25 }
26
27 let saved = original_tokens.saturating_sub(compressed_tokens);
28 let pct = if original_tokens > 0 {
29 (saved as f64 / original_tokens as f64 * 100.0).round() as usize
30 } else {
31 0
32 };
33
34 format!(
35 "Compressed {}: {} → {} tokens ({saved} saved, {pct}%)\n\
36 Backup: {backup_path}",
37 Path::new(path)
38 .file_name()
39 .and_then(|f| f.to_str())
40 .unwrap_or(path),
41 original_tokens,
42 compressed_tokens,
43 )
44}
45
46fn build_backup_path(path: &str) -> String {
47 let p = Path::new(path);
48 let stem = p.file_stem().and_then(|s| s.to_str()).unwrap_or("file");
49 let parent = p.parent().unwrap_or_else(|| Path::new("."));
50 parent
51 .join(format!("{stem}.original.md"))
52 .to_string_lossy()
53 .to_string()
54}
55
56fn compress_memory_file(content: &str) -> String {
57 let mut output = Vec::new();
58 let mut in_code_block = false;
59 let mut code_fence = String::new();
60
61 for line in content.lines() {
62 let trimmed = line.trim();
63
64 if !in_code_block && is_code_fence_start(trimmed) {
65 in_code_block = true;
66 code_fence = trimmed
67 .chars()
68 .take_while(|c| *c == '`' || *c == '~')
69 .collect();
70 output.push(line.to_string());
71 continue;
72 }
73
74 if in_code_block {
75 output.push(line.to_string());
76 if trimmed.starts_with(&code_fence) && trimmed.len() <= code_fence.len() + 1 {
77 in_code_block = false;
78 code_fence.clear();
79 }
80 continue;
81 }
82
83 if is_protected_line(trimmed) {
84 output.push(line.to_string());
85 continue;
86 }
87
88 if trimmed.is_empty() {
89 if output.last().map(|l| l.trim().is_empty()).unwrap_or(false) {
90 continue;
91 }
92 output.push(String::new());
93 continue;
94 }
95
96 let compressed = compress_prose_line(line);
97 if !compressed.trim().is_empty() {
98 output.push(compressed);
99 }
100 }
101
102 output.join("\n")
103}
104
105fn is_code_fence_start(line: &str) -> bool {
106 line.starts_with("```") || line.starts_with("~~~")
107}
108
109fn is_protected_line(line: &str) -> bool {
110 if line.starts_with('#') {
111 return true;
112 }
113 if line.starts_with("- ") || line.starts_with("* ") || line.starts_with("> ") {
114 return true;
115 }
116 if line.starts_with('|') {
117 return true;
118 }
119 if contains_url_or_path(line) && line.split_whitespace().count() <= 3 {
120 return true;
121 }
122 false
123}
124
125fn contains_url_or_path(line: &str) -> bool {
126 line.contains("http://")
127 || line.contains("https://")
128 || line.contains("ftp://")
129 || (line.contains('/') && line.contains('.') && !line.contains(' '))
130}
131
132fn compress_prose_line(line: &str) -> String {
133 let leading_ws: String = line.chars().take_while(|c| c.is_whitespace()).collect();
134 let trimmed = line.trim();
135
136 let mut words: Vec<&str> = trimmed.split_whitespace().collect();
137
138 words.retain(|w| !is_filler_word(w));
139
140 let mut result: Vec<String> = Vec::new();
141 let mut i = 0;
142 while i < words.len() {
143 if let Some((replacement, skip)) = try_shorten_phrase(&words, i) {
144 result.push(replacement.to_string());
145 i += skip;
146 } else {
147 result.push(words[i].to_string());
148 i += 1;
149 }
150 }
151
152 format!("{}{}", leading_ws, result.join(" "))
153}
154
155fn is_filler_word(word: &str) -> bool {
156 let w = word.to_lowercase();
157 let w = w.trim_matches(|c: char| c.is_ascii_punctuation());
158 matches!(
159 w,
160 "just" | "really" | "basically" | "actually" | "simply" | "please" | "very" | "quite"
161 )
162}
163
164fn try_shorten_phrase(words: &[&str], pos: usize) -> Option<(&'static str, usize)> {
165 if pos + 2 < words.len() {
166 let three = format!(
167 "{} {} {}",
168 words[pos].to_lowercase(),
169 words[pos + 1].to_lowercase(),
170 words[pos + 2].to_lowercase()
171 );
172 match three.as_str() {
173 "in order to" => return Some(("to", 3)),
174 "as well as" => return Some(("and", 3)),
175 "due to the" => return Some(("because", 3)),
176 "make sure to" => return Some(("ensure", 3)),
177 "a lot of" => return Some(("many", 3)),
178 "on top of" => return Some(("besides", 3)),
179 _ => {}
180 }
181 }
182
183 if pos + 1 < words.len() {
184 let two = format!(
185 "{} {}",
186 words[pos].to_lowercase(),
187 words[pos + 1].to_lowercase()
188 );
189 match two.as_str() {
190 "make sure" => return Some(("ensure", 2)),
191 "a lot" => return Some(("many", 2)),
192 "as well" => return Some(("also", 2)),
193 "in order" => return Some(("to", 2)),
194 "prior to" => return Some(("before", 2)),
195 "due to" => return Some(("because", 2)),
196 _ => {}
197 }
198 }
199
200 None
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206
207 #[test]
208 fn preserves_code_blocks() {
209 let input = "Some text just really here.\n\n```rust\nfn main() {\n println!(\"hello\");\n}\n```\n\nMore text.";
210 let result = compress_memory_file(input);
211 assert!(result.contains("fn main()"));
212 assert!(result.contains("println!"));
213 }
214
215 #[test]
216 fn preserves_headings() {
217 let input = "# Main Heading\n\nJust some filler text here.\n\n## Sub Heading";
218 let result = compress_memory_file(input);
219 assert!(result.contains("# Main Heading"));
220 assert!(result.contains("## Sub Heading"));
221 }
222
223 #[test]
224 fn preserves_urls() {
225 let input = "Visit https://example.com for details.\nJust some really basic text.";
226 let result = compress_memory_file(input);
227 assert!(result.contains("https://example.com"));
228 }
229
230 #[test]
231 fn removes_filler_words() {
232 let input = "You should just really basically make sure to check this.";
233 let result = compress_prose_line(input);
234 assert!(!result.contains("just"));
235 assert!(!result.contains("really"));
236 assert!(!result.contains("basically"));
237 assert!(result.contains("ensure"));
238 }
239
240 #[test]
241 fn shortens_phrases() {
242 let input = "In order to fix this, make sure to check the config.";
243 let result = compress_prose_line(input);
244 assert!(!result.contains("In order to"));
245 assert!(result.contains("to"));
246 assert!(result.contains("ensure"));
247 }
248
249 #[test]
250 fn collapses_blank_lines() {
251 let input = "Line 1\n\n\n\nLine 2\n\n\nLine 3";
252 let result = compress_memory_file(input);
253 assert!(!result.contains("\n\n\n"));
254 }
255
256 #[test]
257 fn preserves_tables() {
258 let input = "| Col A | Col B |\n|-------|-------|\n| val1 | val2 |";
259 let result = compress_memory_file(input);
260 assert!(result.contains("| Col A | Col B |"));
261 }
262
263 #[test]
264 fn backup_path_computed_correctly() {
265 assert_eq!(
266 Path::new(&build_backup_path("/home/user/.cursorrules")),
267 Path::new("/home/user")
268 .join(".cursorrules.original.md")
269 .as_path()
270 );
271 assert_eq!(
272 Path::new(&build_backup_path("/project/CLAUDE.md")),
273 Path::new("/project").join("CLAUDE.original.md").as_path()
274 );
275 }
276}