Skip to main content

lean_ctx/core/
compressor.rs

1use similar::{ChangeTag, TextDiff};
2
3pub fn aggressive_compress(content: &str, ext: Option<&str>) -> String {
4    let mut result: Vec<String> = Vec::new();
5    let is_python = matches!(ext, Some("py"));
6    let is_html = matches!(ext, Some("html" | "htm" | "xml" | "svg"));
7    let is_sql = matches!(ext, Some("sql"));
8    let is_shell = matches!(ext, Some("sh" | "bash" | "zsh" | "fish"));
9
10    let mut in_block_comment = false;
11
12    for line in content.lines() {
13        let trimmed = line.trim();
14
15        if trimmed.is_empty() {
16            continue;
17        }
18
19        if in_block_comment {
20            if trimmed.contains("*/") || (is_html && trimmed.contains("-->")) {
21                in_block_comment = false;
22            }
23            continue;
24        }
25
26        if trimmed.starts_with("/*") || (is_html && trimmed.starts_with("<!--")) {
27            if !(trimmed.contains("*/") || trimmed.contains("-->")) {
28                in_block_comment = true;
29            }
30            continue;
31        }
32
33        if trimmed.starts_with("//") && !trimmed.starts_with("///") {
34            continue;
35        }
36        if trimmed.starts_with('*') || trimmed.starts_with("*/") {
37            continue;
38        }
39        if is_python && trimmed.starts_with('#') {
40            continue;
41        }
42        if is_sql && trimmed.starts_with("--") {
43            continue;
44        }
45        if is_shell && trimmed.starts_with('#') && !trimmed.starts_with("#!") {
46            continue;
47        }
48        if !is_python && trimmed.starts_with('#') && trimmed.contains('[') {
49            continue;
50        }
51
52        if trimmed == "}" || trimmed == "};" || trimmed == ");" || trimmed == "});" {
53            if let Some(last) = result.last() {
54                let last_trimmed = last.trim();
55                if matches!(last_trimmed, "}" | "};" | ");" | "});") {
56                    if let Some(last_mut) = result.last_mut() {
57                        last_mut.push_str(trimmed);
58                    }
59                    continue;
60                }
61            }
62            result.push(trimmed.to_string());
63            continue;
64        }
65
66        let normalized = normalize_indentation(line);
67        result.push(normalized);
68    }
69
70    result.join("\n")
71}
72
73/// Lightweight post-processing cleanup: collapses consecutive closing braces,
74/// removes whitespace-only lines, and limits consecutive blank lines to 1.
75pub fn lightweight_cleanup(content: &str) -> String {
76    let mut result: Vec<String> = Vec::new();
77    let mut blank_count = 0u32;
78    let mut close_brace_count = 0u32;
79
80    for line in content.lines() {
81        let trimmed = line.trim();
82
83        if trimmed.is_empty() {
84            close_brace_count = 0;
85            blank_count += 1;
86            if blank_count <= 1 {
87                result.push(String::new());
88            }
89            continue;
90        }
91        blank_count = 0;
92
93        if matches!(trimmed, "}" | "};" | ");" | "});" | ")") {
94            close_brace_count += 1;
95            if close_brace_count <= 2 {
96                result.push(trimmed.to_string());
97            }
98            continue;
99        }
100        close_brace_count = 0;
101
102        result.push(line.to_string());
103    }
104
105    result.join("\n")
106}
107
108/// Safeguard: ensures compression ratio stays within safe bounds.
109/// Returns the compressed content if ratio is in [0.15, 1.0], otherwise the original.
110pub fn safeguard_ratio(original: &str, compressed: &str) -> String {
111    let orig_tokens = super::tokens::count_tokens(original);
112    let comp_tokens = super::tokens::count_tokens(compressed);
113
114    if orig_tokens == 0 {
115        return compressed.to_string();
116    }
117
118    let ratio = comp_tokens as f64 / orig_tokens as f64;
119    if ratio < 0.15 || comp_tokens > orig_tokens {
120        original.to_string()
121    } else {
122        compressed.to_string()
123    }
124}
125
126fn normalize_indentation(line: &str) -> String {
127    let content = line.trim_start();
128    let leading = line.len() - content.len();
129    let has_tabs = line.starts_with('\t');
130    let reduced = if has_tabs { leading } else { leading / 2 };
131    format!("{}{}", " ".repeat(reduced), content)
132}
133
134pub fn diff_content(old_content: &str, new_content: &str) -> String {
135    if old_content == new_content {
136        return "(no changes)".to_string();
137    }
138
139    let diff = TextDiff::from_lines(old_content, new_content);
140    let mut changes = Vec::new();
141    let mut additions = 0usize;
142    let mut deletions = 0usize;
143
144    for change in diff.iter_all_changes() {
145        let line_no = change.new_index().or(change.old_index()).map(|i| i + 1);
146        let text = change.value().trim_end_matches('\n');
147        match change.tag() {
148            ChangeTag::Insert => {
149                additions += 1;
150                if let Some(n) = line_no {
151                    changes.push(format!("+{n}: {text}"));
152                }
153            }
154            ChangeTag::Delete => {
155                deletions += 1;
156                if let Some(n) = line_no {
157                    changes.push(format!("-{n}: {text}"));
158                }
159            }
160            ChangeTag::Equal => {}
161        }
162    }
163
164    if changes.is_empty() {
165        return "(no changes)".to_string();
166    }
167
168    changes.push(format!("\ndiff +{additions}/-{deletions} lines"));
169    changes.join("\n")
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_diff_insertion() {
178        let old = "line1\nline2\nline3";
179        let new = "line1\nline2\nnew_line\nline3";
180        let result = diff_content(old, new);
181        assert!(result.contains("+"), "should show additions");
182        assert!(result.contains("new_line"));
183    }
184
185    #[test]
186    fn test_diff_deletion() {
187        let old = "line1\nline2\nline3";
188        let new = "line1\nline3";
189        let result = diff_content(old, new);
190        assert!(result.contains("-"), "should show deletions");
191        assert!(result.contains("line2"));
192    }
193
194    #[test]
195    fn test_diff_no_changes() {
196        let content = "same\ncontent";
197        assert_eq!(diff_content(content, content), "(no changes)");
198    }
199
200    #[test]
201    fn test_lightweight_cleanup_collapses_braces() {
202        let input = "fn main() {\n    inner()\n}\n}\n}\n}\n}\nfn next() {}";
203        let result = lightweight_cleanup(input);
204        assert!(
205            result.matches('}').count() <= 3,
206            "should collapse consecutive closing braces"
207        );
208        assert!(result.contains("fn next()"));
209    }
210
211    #[test]
212    fn test_lightweight_cleanup_blank_lines() {
213        let input = "line1\n\n\n\n\nline2";
214        let result = lightweight_cleanup(input);
215        let blank_runs = result.split("line1").nth(1).unwrap();
216        let blanks = blank_runs.matches('\n').count();
217        assert!(blanks <= 2, "should collapse multiple blank lines");
218    }
219
220    #[test]
221    fn test_safeguard_ratio_prevents_over_compression() {
222        let original = "a ".repeat(100);
223        let too_compressed = "a";
224        let result = safeguard_ratio(&original, too_compressed);
225        assert_eq!(result, original, "should return original when ratio < 0.15");
226    }
227
228    #[test]
229    fn test_aggressive_strips_comments() {
230        let code = "fn main() {\n    // a comment\n    let x = 1;\n}";
231        let result = aggressive_compress(code, Some("rs"));
232        assert!(!result.contains("// a comment"));
233        assert!(result.contains("let x = 1"));
234    }
235
236    #[test]
237    fn test_aggressive_python_comments() {
238        let code = "def main():\n    # comment\n    x = 1";
239        let result = aggressive_compress(code, Some("py"));
240        assert!(!result.contains("# comment"));
241        assert!(result.contains("x = 1"));
242    }
243
244    #[test]
245    fn test_aggressive_preserves_doc_comments() {
246        let code = "/// Doc comment\nfn main() {}";
247        let result = aggressive_compress(code, Some("rs"));
248        assert!(result.contains("/// Doc comment"));
249    }
250
251    #[test]
252    fn test_aggressive_block_comment() {
253        let code = "/* start\n * middle\n */ end\nfn main() {}";
254        let result = aggressive_compress(code, Some("rs"));
255        assert!(!result.contains("start"));
256        assert!(!result.contains("middle"));
257        assert!(result.contains("fn main()"));
258    }
259}