lean_ctx/core/
compressor.rs1use similar::{ChangeTag, TextDiff};
2
3pub fn aggressive_compress(content: &str, ext: Option<&str>) -> String {
4 let mut result: Vec<String> = Vec::new();
5 let is_python = matches!(ext, Some("py"));
6 let is_html = matches!(ext, Some("html" | "htm" | "xml" | "svg"));
7 let is_sql = matches!(ext, Some("sql"));
8 let is_shell = matches!(ext, Some("sh" | "bash" | "zsh" | "fish"));
9
10 let mut in_block_comment = false;
11
12 for line in content.lines() {
13 let trimmed = line.trim();
14
15 if trimmed.is_empty() {
16 continue;
17 }
18
19 if in_block_comment {
20 if trimmed.contains("*/") || (is_html && trimmed.contains("-->")) {
21 in_block_comment = false;
22 }
23 continue;
24 }
25
26 if trimmed.starts_with("/*") || (is_html && trimmed.starts_with("<!--")) {
27 if !(trimmed.contains("*/") || trimmed.contains("-->")) {
28 in_block_comment = true;
29 }
30 continue;
31 }
32
33 if trimmed.starts_with("//") && !trimmed.starts_with("///") {
34 continue;
35 }
36 if trimmed.starts_with('*') || trimmed.starts_with("*/") {
37 continue;
38 }
39 if is_python && trimmed.starts_with('#') {
40 continue;
41 }
42 if is_sql && trimmed.starts_with("--") {
43 continue;
44 }
45 if is_shell && trimmed.starts_with('#') && !trimmed.starts_with("#!") {
46 continue;
47 }
48 if !is_python && trimmed.starts_with('#') && trimmed.contains('[') {
49 continue;
50 }
51
52 if trimmed == "}" || trimmed == "};" || trimmed == ");" || trimmed == "});" {
53 if let Some(last) = result.last() {
54 let last_trimmed = last.trim();
55 if matches!(last_trimmed, "}" | "};" | ");" | "});") {
56 if let Some(last_mut) = result.last_mut() {
57 last_mut.push_str(trimmed);
58 }
59 continue;
60 }
61 }
62 result.push(trimmed.to_string());
63 continue;
64 }
65
66 let normalized = normalize_indentation(line);
67 result.push(normalized);
68 }
69
70 result.join("\n")
71}
72
73pub fn lightweight_cleanup(content: &str) -> String {
76 let mut result: Vec<String> = Vec::new();
77 let mut blank_count = 0u32;
78 let mut close_brace_count = 0u32;
79
80 for line in content.lines() {
81 let trimmed = line.trim();
82
83 if trimmed.is_empty() {
84 close_brace_count = 0;
85 blank_count += 1;
86 if blank_count <= 1 {
87 result.push(String::new());
88 }
89 continue;
90 }
91 blank_count = 0;
92
93 if matches!(trimmed, "}" | "};" | ");" | "});" | ")") {
94 close_brace_count += 1;
95 if close_brace_count <= 2 {
96 result.push(trimmed.to_string());
97 }
98 continue;
99 }
100 close_brace_count = 0;
101
102 result.push(line.to_string());
103 }
104
105 result.join("\n")
106}
107
108pub fn safeguard_ratio(original: &str, compressed: &str) -> String {
111 let orig_tokens = super::tokens::count_tokens(original);
112 let comp_tokens = super::tokens::count_tokens(compressed);
113
114 if orig_tokens == 0 {
115 return compressed.to_string();
116 }
117
118 let ratio = comp_tokens as f64 / orig_tokens as f64;
119 if ratio < 0.15 || comp_tokens > orig_tokens {
120 original.to_string()
121 } else {
122 compressed.to_string()
123 }
124}
125
126fn normalize_indentation(line: &str) -> String {
127 let content = line.trim_start();
128 let leading = line.len() - content.len();
129 let has_tabs = line.starts_with('\t');
130 let reduced = if has_tabs { leading } else { leading / 2 };
131 format!("{}{}", " ".repeat(reduced), content)
132}
133
134pub fn diff_content(old_content: &str, new_content: &str) -> String {
135 if old_content == new_content {
136 return "(no changes)".to_string();
137 }
138
139 let diff = TextDiff::from_lines(old_content, new_content);
140 let mut changes = Vec::new();
141 let mut additions = 0usize;
142 let mut deletions = 0usize;
143
144 for change in diff.iter_all_changes() {
145 let line_no = change.new_index().or(change.old_index()).map(|i| i + 1);
146 let text = change.value().trim_end_matches('\n');
147 match change.tag() {
148 ChangeTag::Insert => {
149 additions += 1;
150 if let Some(n) = line_no {
151 changes.push(format!("+{n}: {text}"));
152 }
153 }
154 ChangeTag::Delete => {
155 deletions += 1;
156 if let Some(n) = line_no {
157 changes.push(format!("-{n}: {text}"));
158 }
159 }
160 ChangeTag::Equal => {}
161 }
162 }
163
164 if changes.is_empty() {
165 return "(no changes)".to_string();
166 }
167
168 changes.push(format!("\ndiff +{additions}/-{deletions} lines"));
169 changes.join("\n")
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn test_diff_insertion() {
178 let old = "line1\nline2\nline3";
179 let new = "line1\nline2\nnew_line\nline3";
180 let result = diff_content(old, new);
181 assert!(result.contains("+"), "should show additions");
182 assert!(result.contains("new_line"));
183 }
184
185 #[test]
186 fn test_diff_deletion() {
187 let old = "line1\nline2\nline3";
188 let new = "line1\nline3";
189 let result = diff_content(old, new);
190 assert!(result.contains("-"), "should show deletions");
191 assert!(result.contains("line2"));
192 }
193
194 #[test]
195 fn test_diff_no_changes() {
196 let content = "same\ncontent";
197 assert_eq!(diff_content(content, content), "(no changes)");
198 }
199
200 #[test]
201 fn test_lightweight_cleanup_collapses_braces() {
202 let input = "fn main() {\n inner()\n}\n}\n}\n}\n}\nfn next() {}";
203 let result = lightweight_cleanup(input);
204 assert!(
205 result.matches('}').count() <= 3,
206 "should collapse consecutive closing braces"
207 );
208 assert!(result.contains("fn next()"));
209 }
210
211 #[test]
212 fn test_lightweight_cleanup_blank_lines() {
213 let input = "line1\n\n\n\n\nline2";
214 let result = lightweight_cleanup(input);
215 let blank_runs = result.split("line1").nth(1).unwrap();
216 let blanks = blank_runs.matches('\n').count();
217 assert!(blanks <= 2, "should collapse multiple blank lines");
218 }
219
220 #[test]
221 fn test_safeguard_ratio_prevents_over_compression() {
222 let original = "a ".repeat(100);
223 let too_compressed = "a";
224 let result = safeguard_ratio(&original, too_compressed);
225 assert_eq!(result, original, "should return original when ratio < 0.15");
226 }
227
228 #[test]
229 fn test_aggressive_strips_comments() {
230 let code = "fn main() {\n // a comment\n let x = 1;\n}";
231 let result = aggressive_compress(code, Some("rs"));
232 assert!(!result.contains("// a comment"));
233 assert!(result.contains("let x = 1"));
234 }
235
236 #[test]
237 fn test_aggressive_python_comments() {
238 let code = "def main():\n # comment\n x = 1";
239 let result = aggressive_compress(code, Some("py"));
240 assert!(!result.contains("# comment"));
241 assert!(result.contains("x = 1"));
242 }
243
244 #[test]
245 fn test_aggressive_preserves_doc_comments() {
246 let code = "/// Doc comment\nfn main() {}";
247 let result = aggressive_compress(code, Some("rs"));
248 assert!(result.contains("/// Doc comment"));
249 }
250
251 #[test]
252 fn test_aggressive_block_comment() {
253 let code = "/* start\n * middle\n */ end\nfn main() {}";
254 let result = aggressive_compress(code, Some("rs"));
255 assert!(!result.contains("start"));
256 assert!(!result.contains("middle"));
257 assert!(result.contains("fn main()"));
258 }
259}