lean_ctx/core/terse/
engine.rs1use super::counter;
7use super::dictionaries::{self, DictLevel};
8use super::quality::{self, QualityConfig, QualityReport};
9use super::scoring;
10use crate::core::config::CompressionLevel;
11
12const LOW_SCORE_THRESHOLD: f32 = 2.5;
14
15const STANDARD_SCORE_THRESHOLD: f32 = 3.0;
16const MAX_SCORE_THRESHOLD: f32 = 3.5;
17
18#[derive(Debug)]
20pub struct EngineResult {
21 pub output: String,
22 pub tokens_before: u32,
23 pub tokens_after: u32,
24 pub quality: QualityReport,
25 pub lines_removed: usize,
26 pub lines_total: usize,
27}
28
29const MIN_LINES_FOR_COMPRESSION: usize = 5;
30
31pub fn compress(text: &str, level: &CompressionLevel) -> EngineResult {
33 let tokens_before = counter::count(text);
34 let lines_total = text.lines().count();
35
36 if !level.is_active() || text.is_empty() || lines_total < MIN_LINES_FOR_COMPRESSION {
37 return EngineResult {
38 output: text.to_string(),
39 tokens_before,
40 tokens_after: tokens_before,
41 quality: quality::check(
42 text,
43 text,
44 tokens_before,
45 tokens_before,
46 &QualityConfig::default(),
47 ),
48 lines_removed: 0,
49 lines_total,
50 };
51 }
52
53 let result = compress_at_level(text, tokens_before, level);
54
55 if result.quality.passed {
56 return result;
57 }
58
59 if *level == CompressionLevel::Max {
60 let fallback = compress_at_level(text, tokens_before, &CompressionLevel::Standard);
61 if fallback.quality.passed {
62 return fallback;
63 }
64 }
65
66 EngineResult {
67 output: text.to_string(),
68 tokens_before,
69 tokens_after: tokens_before,
70 quality: result.quality,
71 lines_removed: 0,
72 lines_total: text.lines().count(),
73 }
74}
75
76fn compress_at_level(text: &str, tokens_before: u32, level: &CompressionLevel) -> EngineResult {
77 let scores = scoring::score_lines(text);
78 let lines: Vec<&str> = text.lines().collect();
79 let lines_total = lines.len();
80
81 let threshold = match level {
82 CompressionLevel::Max => MAX_SCORE_THRESHOLD,
83 CompressionLevel::Standard => STANDARD_SCORE_THRESHOLD,
84 CompressionLevel::Lite | CompressionLevel::Off => LOW_SCORE_THRESHOLD,
85 };
86
87 let mut kept_lines = Vec::new();
88 let mut lines_removed = 0;
89
90 for (score, line) in scores.iter().zip(lines.iter()) {
91 let trimmed = line.trim();
92
93 if trimmed.is_empty() {
94 lines_removed += 1;
95 continue;
96 }
97
98 if is_pure_decoration(trimmed) {
99 lines_removed += 1;
100 continue;
101 }
102
103 if is_filler_line(trimmed) && !score.has_structural_marker {
104 lines_removed += 1;
105 continue;
106 }
107
108 if score.combined < threshold && !score.has_structural_marker {
109 lines_removed += 1;
110 continue;
111 }
112
113 kept_lines.push(*line);
114 }
115
116 let filtered = kept_lines.join("\n");
117
118 let quality_config = match level {
119 CompressionLevel::Max => QualityConfig {
120 min_identifier_preservation: 0.80,
121 ..QualityConfig::default()
122 },
123 _ => QualityConfig::default(),
124 };
125
126 let filtered_tokens = counter::count(&filtered);
127 let quality_report = quality::check(
128 text,
129 &filtered,
130 tokens_before,
131 filtered_tokens,
132 &quality_config,
133 );
134
135 if !quality_report.passed {
136 return EngineResult {
137 output: text.to_string(),
138 tokens_before,
139 tokens_after: tokens_before,
140 quality: quality_report,
141 lines_removed: 0,
142 lines_total,
143 };
144 }
145
146 let dict_level = match level {
147 CompressionLevel::Max | CompressionLevel::Standard => DictLevel::Full,
148 CompressionLevel::Lite | CompressionLevel::Off => DictLevel::General,
149 };
150 let compressed = dictionaries::apply_dictionaries(&filtered, dict_level);
151 let tokens_after = counter::count(&compressed);
152
153 EngineResult {
154 output: compressed,
155 tokens_before,
156 tokens_after,
157 quality: quality_report,
158 lines_removed,
159 lines_total,
160 }
161}
162
163fn is_filler_line(line: &str) -> bool {
164 let trimmed = line.trim();
165
166 if trimmed == "|" || trimmed == "| " {
167 return true;
168 }
169
170 let lower = line.to_lowercase();
171 const FILLER_PATTERNS: &[&str] = &[
172 "use \"git add",
173 "use \"git restore",
174 "(use \"git",
175 "run with `rust_backtrace",
176 "for more information about this error",
177 "try `rustc --explain",
178 "run `npm fund`",
179 "run `npm audit`",
180 "to address all issues",
181 "sending build context",
182 "using cache",
183 "packages are looking for funding",
184 "no changes added to commit",
185 "help: ",
186 "= note: ",
187 "---> running in",
188 ];
189 FILLER_PATTERNS.iter().any(|p| lower.contains(p))
190}
191
192fn is_pure_decoration(line: &str) -> bool {
193 let trimmed = line.trim();
194
195 if trimmed.is_empty() {
196 return true;
197 }
198
199 if trimmed.chars().all(|c| c == '|' || c.is_whitespace()) {
200 return true;
201 }
202
203 if line.len() < 3 {
204 return false;
205 }
206
207 if line.starts_with("//") || line.starts_with('#') || line.starts_with("--") {
208 let content = line
209 .trim_start_matches('/')
210 .trim_start_matches('#')
211 .trim_start_matches('-')
212 .trim();
213 return content.is_empty() || is_banner_chars(content);
214 }
215
216 is_banner_chars(line)
217}
218
219fn is_banner_chars(line: &str) -> bool {
220 let chars: Vec<char> = line.chars().collect();
221 if chars.len() < 4 {
222 return false;
223 }
224 let first = chars[0];
225 if matches!(
226 first,
227 '=' | '-' | '*' | '─' | '━' | '▀' | '▄' | '╔' | '╚' | '║' | '░' | '█' | '═'
228 ) {
229 let same_count = chars.iter().filter(|c| **c == first).count();
230 return same_count as f64 / chars.len() as f64 > 0.6;
231 }
232 false
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn compress_off_returns_original() {
241 let text = "hello world\n\nsome blank lines\n\n";
242 let result = compress(text, &CompressionLevel::Off);
243 assert_eq!(result.output, text);
244 assert_eq!(result.lines_removed, 0);
245 }
246
247 #[test]
248 fn compress_lite_removes_blank_lines() {
249 let text = "line one\n\n\nline two\n\n";
250 let result = compress(text, &CompressionLevel::Lite);
251 assert!(
252 !result.output.contains("\n\n"),
253 "blank lines should be removed"
254 );
255 }
256
257 #[test]
258 fn compress_preserves_paths() {
259 let text = "error in src/main.rs at line 42\n\nsome blank\n\n";
260 let result = compress(text, &CompressionLevel::Standard);
261 assert!(
262 result.output.contains("src/main.rs"),
263 "path must be preserved"
264 );
265 }
266
267 #[test]
268 fn decoration_detection() {
269 assert!(is_pure_decoration("════════════════════"));
270 assert!(is_pure_decoration("--------------------"));
271 assert!(is_pure_decoration("// ================"));
272 assert!(!is_pure_decoration("error: mismatched types"));
273 }
274
275 #[test]
276 fn compress_returns_token_counts() {
277 let text = "Hello world from the compression engine test";
278 let result = compress(text, &CompressionLevel::Lite);
279 assert!(result.tokens_before > 0);
280 }
281}