Skip to main content

lean_ctx/core/terse/
quality.rs

1//! Quality gate for terse compression.
2//!
3//! Ensures compression does not destroy critical information:
4//! - File paths must be preserved
5//! - Code identifiers (>= 6 chars) must be preserved
6//! - Minimum savings threshold (default 10%) must be met
7
8use std::collections::HashSet;
9
10#[derive(Debug, Clone)]
11pub struct QualityReport {
12    pub passed: bool,
13    pub savings_pct: f32,
14    pub paths_preserved: bool,
15    pub identifiers_preserved: bool,
16    pub paths_total: usize,
17    pub paths_found: usize,
18    pub identifiers_total: usize,
19    pub identifiers_found: usize,
20}
21
22pub struct QualityConfig {
23    pub min_savings_pct: f32,
24    pub min_path_preservation: f32,
25    pub min_identifier_preservation: f32,
26    pub min_identifier_len: usize,
27}
28
29impl Default for QualityConfig {
30    fn default() -> Self {
31        Self {
32            min_savings_pct: 3.0,
33            min_path_preservation: 1.0,
34            min_identifier_preservation: 0.90,
35            min_identifier_len: 6,
36        }
37    }
38}
39
40/// Checks whether a compression result meets quality thresholds.
41pub fn check(
42    original: &str,
43    compressed: &str,
44    tokens_before: u32,
45    tokens_after: u32,
46    config: &QualityConfig,
47) -> QualityReport {
48    let savings_pct = if tokens_before > 0 && tokens_before >= tokens_after {
49        ((tokens_before - tokens_after) as f32 / tokens_before as f32) * 100.0
50    } else {
51        0.0
52    };
53
54    let orig_paths = extract_paths(original);
55    let comp_paths = extract_paths(compressed);
56    let paths_found = orig_paths
57        .iter()
58        .filter(|p| comp_paths.contains(*p))
59        .count();
60    let paths_preserved = orig_paths.is_empty()
61        || (paths_found as f32 / orig_paths.len() as f32) >= config.min_path_preservation;
62
63    let orig_idents = extract_identifiers(original, config.min_identifier_len);
64    let comp_text_lower = compressed.to_lowercase();
65    let idents_found = orig_idents
66        .iter()
67        .filter(|id| comp_text_lower.contains(&id.to_lowercase()))
68        .count();
69    let identifiers_preserved = orig_idents.is_empty()
70        || (idents_found as f32 / orig_idents.len() as f32) >= config.min_identifier_preservation;
71
72    let passed = paths_preserved && identifiers_preserved;
73
74    QualityReport {
75        passed,
76        savings_pct,
77        paths_preserved,
78        identifiers_preserved,
79        paths_total: orig_paths.len(),
80        paths_found,
81        identifiers_total: orig_idents.len(),
82        identifiers_found: idents_found,
83    }
84}
85
86fn extract_paths(text: &str) -> HashSet<String> {
87    let mut paths = HashSet::new();
88    for word in text.split_whitespace() {
89        let cleaned = word.trim_matches(|c: char| c == '\'' || c == '"' || c == ',' || c == ';');
90        if looks_like_path(cleaned) {
91            paths.insert(cleaned.to_string());
92        }
93    }
94    paths
95}
96
97fn looks_like_path(s: &str) -> bool {
98    if s.len() < 3 {
99        return false;
100    }
101    let has_separator = s.contains('/') || s.contains('\\');
102    let has_extension = s.rfind('.').is_some_and(|dot| {
103        let ext = &s[dot + 1..];
104        !ext.is_empty() && ext.len() <= 6 && ext.chars().all(|c| c.is_ascii_alphanumeric())
105    });
106    has_separator || (has_extension && s.chars().filter(|c| *c == '.').count() <= 2)
107}
108
109fn extract_identifiers(text: &str, min_len: usize) -> HashSet<String> {
110    let mut idents = HashSet::new();
111    for word in text.split(|c: char| !c.is_alphanumeric() && c != '_') {
112        if word.len() >= min_len && word.chars().any(char::is_alphabetic) {
113            idents.insert(word.to_string());
114        }
115    }
116    idents
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn path_detection() {
125        assert!(looks_like_path("src/main.rs"));
126        assert!(looks_like_path("config.toml"));
127        assert!(!looks_like_path("hello"));
128        assert!(!looks_like_path("a"));
129    }
130
131    #[test]
132    fn extract_paths_from_text() {
133        let paths = extract_paths("error in src/lib.rs at line 42");
134        assert!(paths.contains("src/lib.rs"));
135    }
136
137    #[test]
138    fn extract_identifiers_min_len() {
139        let idents = extract_identifiers("fn configure_premium_features(home: Path)", 6);
140        assert!(idents.contains("configure_premium_features"));
141        assert!(!idents.contains("home"));
142    }
143
144    #[test]
145    fn quality_passes_with_good_compression() {
146        let original =
147            "src/main.rs: error[E0308]: mismatched types\nlong description here that is verbose";
148        let compressed = "src/main.rs: err[E0308]: mismatched types";
149        let report = check(original, compressed, 100, 60, &QualityConfig::default());
150        assert!(report.paths_preserved);
151    }
152
153    #[test]
154    fn quality_passes_when_identifiers_preserved() {
155        let report = check("hello", "hello", 100, 98, &QualityConfig::default());
156        assert!(
157            report.passed,
158            "should pass when paths and identifiers are preserved"
159        );
160        assert!(
161            report.savings_pct < 3.0,
162            "savings should still be tracked as low"
163        );
164    }
165
166    #[test]
167    fn quality_fails_missing_path() {
168        let original = "error in src/config.rs";
169        let compressed = "error occurred";
170        let report = check(original, compressed, 100, 50, &QualityConfig::default());
171        assert!(!report.paths_preserved);
172    }
173}