Skip to main content

lean_ctx/core/terse/
quality.rs

1//! Quality gate for terse compression.
2//!
3//! Ensures compression does not destroy critical information:
4//! - File paths must be preserved
5//! - Code identifiers (>= 6 chars) must be preserved
6//! - Minimum savings threshold (default 10%) must be met
7
8use std::collections::HashSet;
9
10#[derive(Debug, Clone)]
11pub struct QualityReport {
12    pub passed: bool,
13    pub savings_pct: f32,
14    pub paths_preserved: bool,
15    pub identifiers_preserved: bool,
16    pub paths_total: usize,
17    pub paths_found: usize,
18    pub identifiers_total: usize,
19    pub identifiers_found: usize,
20}
21
22pub struct QualityConfig {
23    pub min_savings_pct: f32,
24    pub min_path_preservation: f32,
25    pub min_identifier_preservation: f32,
26    pub min_identifier_len: usize,
27}
28
29impl Default for QualityConfig {
30    fn default() -> Self {
31        Self {
32            min_savings_pct: 3.0,
33            min_path_preservation: 1.0,
34            min_identifier_preservation: 0.90,
35            min_identifier_len: 6,
36        }
37    }
38}
39
40/// Checks whether a compression result meets quality thresholds.
41pub fn check(
42    original: &str,
43    compressed: &str,
44    tokens_before: u32,
45    tokens_after: u32,
46    config: &QualityConfig,
47) -> QualityReport {
48    let savings_pct = if tokens_before > 0 && tokens_before >= tokens_after {
49        ((tokens_before - tokens_after) as f32 / tokens_before as f32) * 100.0
50    } else {
51        0.0
52    };
53
54    let orig_paths = extract_paths(original);
55    let comp_paths = extract_paths(compressed);
56    let paths_found = orig_paths
57        .iter()
58        .filter(|p| comp_paths.contains(*p))
59        .count();
60    let paths_preserved = orig_paths.is_empty()
61        || (paths_found as f32 / orig_paths.len() as f32) >= config.min_path_preservation;
62
63    let orig_idents = extract_identifiers(original, config.min_identifier_len);
64    let comp_words: HashSet<String> = compressed
65        .split(|c: char| !c.is_alphanumeric() && c != '_')
66        .filter(|w| w.len() >= config.min_identifier_len)
67        .map(str::to_lowercase)
68        .collect();
69    let idents_found = orig_idents
70        .iter()
71        .filter(|id| comp_words.contains(&id.to_lowercase()))
72        .count();
73    let identifiers_preserved = orig_idents.is_empty()
74        || (idents_found as f32 / orig_idents.len() as f32) >= config.min_identifier_preservation;
75
76    let passed = paths_preserved && identifiers_preserved;
77
78    QualityReport {
79        passed,
80        savings_pct,
81        paths_preserved,
82        identifiers_preserved,
83        paths_total: orig_paths.len(),
84        paths_found,
85        identifiers_total: orig_idents.len(),
86        identifiers_found: idents_found,
87    }
88}
89
90fn extract_paths(text: &str) -> HashSet<String> {
91    let mut paths = HashSet::new();
92    for word in text.split_whitespace() {
93        let cleaned = word.trim_matches(|c: char| c == '\'' || c == '"' || c == ',' || c == ';');
94        if looks_like_path(cleaned) {
95            paths.insert(cleaned.to_string());
96        }
97    }
98    paths
99}
100
101fn looks_like_path(s: &str) -> bool {
102    if s.len() < 3 {
103        return false;
104    }
105    let has_separator = s.contains('/') || s.contains('\\');
106    let has_extension = s.rfind('.').is_some_and(|dot| {
107        let ext = &s[dot + 1..];
108        !ext.is_empty() && ext.len() <= 6 && ext.chars().all(|c| c.is_ascii_alphanumeric())
109    });
110    has_separator || (has_extension && s.chars().filter(|c| *c == '.').count() <= 2)
111}
112
113const MAX_IDENTIFIERS: usize = 200;
114
115fn extract_identifiers(text: &str, min_len: usize) -> HashSet<String> {
116    let mut idents = HashSet::new();
117    for word in text.split(|c: char| !c.is_alphanumeric() && c != '_') {
118        if word.len() >= min_len && word.chars().any(char::is_alphabetic) {
119            idents.insert(word.to_string());
120            if idents.len() >= MAX_IDENTIFIERS {
121                break;
122            }
123        }
124    }
125    idents
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn path_detection() {
134        assert!(looks_like_path("src/main.rs"));
135        assert!(looks_like_path("config.toml"));
136        assert!(!looks_like_path("hello"));
137        assert!(!looks_like_path("a"));
138    }
139
140    #[test]
141    fn extract_paths_from_text() {
142        let paths = extract_paths("error in src/lib.rs at line 42");
143        assert!(paths.contains("src/lib.rs"));
144    }
145
146    #[test]
147    fn extract_identifiers_min_len() {
148        let idents = extract_identifiers("fn configure_premium_features(home: Path)", 6);
149        assert!(idents.contains("configure_premium_features"));
150        assert!(!idents.contains("home"));
151    }
152
153    #[test]
154    fn quality_passes_with_good_compression() {
155        let original =
156            "src/main.rs: error[E0308]: mismatched types\nlong description here that is verbose";
157        let compressed = "src/main.rs: err[E0308]: mismatched types";
158        let report = check(original, compressed, 100, 60, &QualityConfig::default());
159        assert!(report.paths_preserved);
160    }
161
162    #[test]
163    fn quality_passes_when_identifiers_preserved() {
164        let report = check("hello", "hello", 100, 98, &QualityConfig::default());
165        assert!(
166            report.passed,
167            "should pass when paths and identifiers are preserved"
168        );
169        assert!(
170            report.savings_pct < 3.0,
171            "savings should still be tracked as low"
172        );
173    }
174
175    #[test]
176    fn quality_fails_missing_path() {
177        let original = "error in src/config.rs";
178        let compressed = "error occurred";
179        let report = check(original, compressed, 100, 50, &QualityConfig::default());
180        assert!(!report.paths_preserved);
181    }
182}