lean_ctx/core/terse/
quality.rs1use std::collections::HashSet;
9
10#[derive(Debug, Clone)]
11pub struct QualityReport {
12 pub passed: bool,
13 pub savings_pct: f32,
14 pub paths_preserved: bool,
15 pub identifiers_preserved: bool,
16 pub paths_total: usize,
17 pub paths_found: usize,
18 pub identifiers_total: usize,
19 pub identifiers_found: usize,
20}
21
22pub struct QualityConfig {
23 pub min_savings_pct: f32,
24 pub min_path_preservation: f32,
25 pub min_identifier_preservation: f32,
26 pub min_identifier_len: usize,
27}
28
29impl Default for QualityConfig {
30 fn default() -> Self {
31 Self {
32 min_savings_pct: 3.0,
33 min_path_preservation: 1.0,
34 min_identifier_preservation: 0.90,
35 min_identifier_len: 6,
36 }
37 }
38}
39
40pub fn check(
42 original: &str,
43 compressed: &str,
44 tokens_before: u32,
45 tokens_after: u32,
46 config: &QualityConfig,
47) -> QualityReport {
48 let savings_pct = if tokens_before > 0 && tokens_before >= tokens_after {
49 ((tokens_before - tokens_after) as f32 / tokens_before as f32) * 100.0
50 } else {
51 0.0
52 };
53
54 let orig_paths = extract_paths(original);
55 let comp_paths = extract_paths(compressed);
56 let paths_found = orig_paths
57 .iter()
58 .filter(|p| comp_paths.contains(*p))
59 .count();
60 let paths_preserved = orig_paths.is_empty()
61 || (paths_found as f32 / orig_paths.len() as f32) >= config.min_path_preservation;
62
63 let orig_idents = extract_identifiers(original, config.min_identifier_len);
64 let comp_words: HashSet<String> = compressed
65 .split(|c: char| !c.is_alphanumeric() && c != '_')
66 .filter(|w| w.len() >= config.min_identifier_len)
67 .map(str::to_lowercase)
68 .collect();
69 let idents_found = orig_idents
70 .iter()
71 .filter(|id| comp_words.contains(&id.to_lowercase()))
72 .count();
73 let identifiers_preserved = orig_idents.is_empty()
74 || (idents_found as f32 / orig_idents.len() as f32) >= config.min_identifier_preservation;
75
76 let passed = paths_preserved && identifiers_preserved;
77
78 QualityReport {
79 passed,
80 savings_pct,
81 paths_preserved,
82 identifiers_preserved,
83 paths_total: orig_paths.len(),
84 paths_found,
85 identifiers_total: orig_idents.len(),
86 identifiers_found: idents_found,
87 }
88}
89
90fn extract_paths(text: &str) -> HashSet<String> {
91 let mut paths = HashSet::new();
92 for word in text.split_whitespace() {
93 let cleaned = word.trim_matches(|c: char| c == '\'' || c == '"' || c == ',' || c == ';');
94 if looks_like_path(cleaned) {
95 paths.insert(cleaned.to_string());
96 }
97 }
98 paths
99}
100
101fn looks_like_path(s: &str) -> bool {
102 if s.len() < 3 {
103 return false;
104 }
105 let has_separator = s.contains('/') || s.contains('\\');
106 let has_extension = s.rfind('.').is_some_and(|dot| {
107 let ext = &s[dot + 1..];
108 !ext.is_empty() && ext.len() <= 6 && ext.chars().all(|c| c.is_ascii_alphanumeric())
109 });
110 has_separator || (has_extension && s.chars().filter(|c| *c == '.').count() <= 2)
111}
112
113const MAX_IDENTIFIERS: usize = 200;
114
115fn extract_identifiers(text: &str, min_len: usize) -> HashSet<String> {
116 let mut idents = HashSet::new();
117 for word in text.split(|c: char| !c.is_alphanumeric() && c != '_') {
118 if word.len() >= min_len && word.chars().any(char::is_alphabetic) {
119 idents.insert(word.to_string());
120 if idents.len() >= MAX_IDENTIFIERS {
121 break;
122 }
123 }
124 }
125 idents
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131
132 #[test]
133 fn path_detection() {
134 assert!(looks_like_path("src/main.rs"));
135 assert!(looks_like_path("config.toml"));
136 assert!(!looks_like_path("hello"));
137 assert!(!looks_like_path("a"));
138 }
139
140 #[test]
141 fn extract_paths_from_text() {
142 let paths = extract_paths("error in src/lib.rs at line 42");
143 assert!(paths.contains("src/lib.rs"));
144 }
145
146 #[test]
147 fn extract_identifiers_min_len() {
148 let idents = extract_identifiers("fn configure_premium_features(home: Path)", 6);
149 assert!(idents.contains("configure_premium_features"));
150 assert!(!idents.contains("home"));
151 }
152
153 #[test]
154 fn quality_passes_with_good_compression() {
155 let original =
156 "src/main.rs: error[E0308]: mismatched types\nlong description here that is verbose";
157 let compressed = "src/main.rs: err[E0308]: mismatched types";
158 let report = check(original, compressed, 100, 60, &QualityConfig::default());
159 assert!(report.paths_preserved);
160 }
161
162 #[test]
163 fn quality_passes_when_identifiers_preserved() {
164 let report = check("hello", "hello", 100, 98, &QualityConfig::default());
165 assert!(
166 report.passed,
167 "should pass when paths and identifiers are preserved"
168 );
169 assert!(
170 report.savings_pct < 3.0,
171 "savings should still be tracked as low"
172 );
173 }
174
175 #[test]
176 fn quality_fails_missing_path() {
177 let original = "error in src/config.rs";
178 let compressed = "error occurred";
179 let report = check(original, compressed, 100, 50, &QualityConfig::default());
180 assert!(!report.paths_preserved);
181 }
182}