Skip to main content

lean_ctx/core/terse/
engine.rs

1//! Layer 1: Deterministic output compression.
2//!
3//! Replaces the legacy `compress_terse`/`compress_ultra` with a scoring-based
4//! approach that preserves information-dense lines and removes low-value content.
5
6use super::counter;
7use super::dictionaries::{self, DictLevel};
8use super::quality::{self, QualityConfig, QualityReport};
9use super::scoring;
10use crate::core::config::CompressionLevel;
11
12/// Threshold below which a line is considered low-information and may be removed.
13const LOW_SCORE_THRESHOLD: f32 = 2.5;
14
15const STANDARD_SCORE_THRESHOLD: f32 = 3.0;
16const MAX_SCORE_THRESHOLD: f32 = 3.5;
17
18/// Result of Layer 1 compression.
19#[derive(Debug)]
20pub struct EngineResult {
21    pub output: String,
22    pub tokens_before: u32,
23    pub tokens_after: u32,
24    pub quality: QualityReport,
25    pub lines_removed: usize,
26    pub lines_total: usize,
27}
28
29const MIN_LINES_FOR_COMPRESSION: usize = 5;
30
31/// Runs Layer 1 deterministic compression on the input text.
32pub fn compress(text: &str, level: &CompressionLevel) -> EngineResult {
33    let tokens_before = counter::count(text);
34    let lines_total = text.lines().count();
35
36    if !level.is_active() || text.is_empty() || lines_total < MIN_LINES_FOR_COMPRESSION {
37        return EngineResult {
38            output: text.to_string(),
39            tokens_before,
40            tokens_after: tokens_before,
41            quality: quality::check(
42                text,
43                text,
44                tokens_before,
45                tokens_before,
46                &QualityConfig::default(),
47            ),
48            lines_removed: 0,
49            lines_total,
50        };
51    }
52
53    let result = compress_at_level(text, tokens_before, level);
54
55    if result.quality.passed {
56        return result;
57    }
58
59    if *level == CompressionLevel::Max {
60        let fallback = compress_at_level(text, tokens_before, &CompressionLevel::Standard);
61        if fallback.quality.passed {
62            return fallback;
63        }
64    }
65
66    EngineResult {
67        output: text.to_string(),
68        tokens_before,
69        tokens_after: tokens_before,
70        quality: result.quality,
71        lines_removed: 0,
72        lines_total: text.lines().count(),
73    }
74}
75
76fn compress_at_level(text: &str, tokens_before: u32, level: &CompressionLevel) -> EngineResult {
77    let scores = scoring::score_lines(text);
78    let lines: Vec<&str> = text.lines().collect();
79    let lines_total = lines.len();
80
81    let threshold = match level {
82        CompressionLevel::Max => MAX_SCORE_THRESHOLD,
83        CompressionLevel::Standard => STANDARD_SCORE_THRESHOLD,
84        CompressionLevel::Lite | CompressionLevel::Off => LOW_SCORE_THRESHOLD,
85    };
86
87    let mut kept_lines = Vec::new();
88    let mut lines_removed = 0;
89
90    for (score, line) in scores.iter().zip(lines.iter()) {
91        let trimmed = line.trim();
92
93        if trimmed.is_empty() {
94            lines_removed += 1;
95            continue;
96        }
97
98        if is_pure_decoration(trimmed) {
99            lines_removed += 1;
100            continue;
101        }
102
103        if is_filler_line(trimmed) && !score.has_structural_marker {
104            lines_removed += 1;
105            continue;
106        }
107
108        if score.combined < threshold && !score.has_structural_marker {
109            lines_removed += 1;
110            continue;
111        }
112
113        kept_lines.push(*line);
114    }
115
116    let filtered = kept_lines.join("\n");
117
118    let quality_config = match level {
119        CompressionLevel::Max => QualityConfig {
120            min_identifier_preservation: 0.80,
121            ..QualityConfig::default()
122        },
123        _ => QualityConfig::default(),
124    };
125
126    let filtered_tokens = counter::count(&filtered);
127    let quality_report = quality::check(
128        text,
129        &filtered,
130        tokens_before,
131        filtered_tokens,
132        &quality_config,
133    );
134
135    if !quality_report.passed {
136        return EngineResult {
137            output: text.to_string(),
138            tokens_before,
139            tokens_after: tokens_before,
140            quality: quality_report,
141            lines_removed: 0,
142            lines_total,
143        };
144    }
145
146    let dict_level = match level {
147        CompressionLevel::Max | CompressionLevel::Standard => DictLevel::Full,
148        CompressionLevel::Lite | CompressionLevel::Off => DictLevel::General,
149    };
150    let compressed = dictionaries::apply_dictionaries(&filtered, dict_level);
151    let tokens_after = counter::count(&compressed);
152
153    EngineResult {
154        output: compressed,
155        tokens_before,
156        tokens_after,
157        quality: quality_report,
158        lines_removed,
159        lines_total,
160    }
161}
162
163fn is_filler_line(line: &str) -> bool {
164    let trimmed = line.trim();
165
166    if trimmed == "|" || trimmed == "| " {
167        return true;
168    }
169
170    let lower = line.to_lowercase();
171    const FILLER_PATTERNS: &[&str] = &[
172        "use \"git add",
173        "use \"git restore",
174        "(use \"git",
175        "run with `rust_backtrace",
176        "for more information about this error",
177        "try `rustc --explain",
178        "run `npm fund`",
179        "run `npm audit`",
180        "to address all issues",
181        "sending build context",
182        "using cache",
183        "packages are looking for funding",
184        "no changes added to commit",
185        "help: ",
186        "= note: ",
187        "---> running in",
188    ];
189    FILLER_PATTERNS.iter().any(|p| lower.contains(p))
190}
191
192fn is_pure_decoration(line: &str) -> bool {
193    let trimmed = line.trim();
194
195    if trimmed.is_empty() {
196        return true;
197    }
198
199    if trimmed.chars().all(|c| c == '|' || c.is_whitespace()) {
200        return true;
201    }
202
203    if line.len() < 3 {
204        return false;
205    }
206
207    if line.starts_with("//") || line.starts_with('#') || line.starts_with("--") {
208        let content = line
209            .trim_start_matches('/')
210            .trim_start_matches('#')
211            .trim_start_matches('-')
212            .trim();
213        return content.is_empty() || is_banner_chars(content);
214    }
215
216    is_banner_chars(line)
217}
218
219fn is_banner_chars(line: &str) -> bool {
220    let chars: Vec<char> = line.chars().collect();
221    if chars.len() < 4 {
222        return false;
223    }
224    let first = chars[0];
225    if matches!(
226        first,
227        '=' | '-' | '*' | '─' | '━' | '▀' | '▄' | '╔' | '╚' | '║' | '░' | '█' | '═'
228    ) {
229        let same_count = chars.iter().filter(|c| **c == first).count();
230        return same_count as f64 / chars.len() as f64 > 0.6;
231    }
232    false
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn compress_off_returns_original() {
241        let text = "hello world\n\nsome blank lines\n\n";
242        let result = compress(text, &CompressionLevel::Off);
243        assert_eq!(result.output, text);
244        assert_eq!(result.lines_removed, 0);
245    }
246
247    #[test]
248    fn compress_lite_removes_blank_lines() {
249        let text = "line one\n\n\nline two\n\n";
250        let result = compress(text, &CompressionLevel::Lite);
251        assert!(
252            !result.output.contains("\n\n"),
253            "blank lines should be removed"
254        );
255    }
256
257    #[test]
258    fn compress_preserves_paths() {
259        let text = "error in src/main.rs at line 42\n\nsome blank\n\n";
260        let result = compress(text, &CompressionLevel::Standard);
261        assert!(
262            result.output.contains("src/main.rs"),
263            "path must be preserved"
264        );
265    }
266
267    #[test]
268    fn decoration_detection() {
269        assert!(is_pure_decoration("════════════════════"));
270        assert!(is_pure_decoration("--------------------"));
271        assert!(is_pure_decoration("// ================"));
272        assert!(!is_pure_decoration("error: mismatched types"));
273    }
274
275    #[test]
276    fn compress_returns_token_counts() {
277        let text = "Hello world from the compression engine test";
278        let result = compress(text, &CompressionLevel::Lite);
279        assert!(result.tokens_before > 0);
280    }
281}