1use crate::core::preservation;
2use crate::core::tokens::count_tokens;
3
4const QUALITY_THRESHOLD: f64 = 0.95;
5const MIN_DENSITY: f64 = 0.15;
6
7#[derive(Debug, Clone)]
8pub struct QualityScore {
9 pub ast_score: f64,
10 pub identifier_score: f64,
11 pub line_score: f64,
12 pub density: f64,
13 pub composite: f64,
14 pub passed: bool,
15}
16
17impl QualityScore {
18 pub fn format_compact(&self) -> String {
19 if self.passed {
20 format!(
21 "Q:{:.0}% (ast:{:.0} id:{:.0} ln:{:.0} ρ:{:.0}) ✓",
22 self.composite * 100.0,
23 self.ast_score * 100.0,
24 self.identifier_score * 100.0,
25 self.line_score * 100.0,
26 self.density * 100.0,
27 )
28 } else {
29 format!(
30 "Q:{:.0}% (ast:{:.0} id:{:.0} ln:{:.0} ρ:{:.0}) ✗ BELOW THRESHOLD",
31 self.composite * 100.0,
32 self.ast_score * 100.0,
33 self.identifier_score * 100.0,
34 self.line_score * 100.0,
35 self.density * 100.0,
36 )
37 }
38 }
39}
40
41pub fn score(original: &str, compressed: &str, ext: &str) -> QualityScore {
42 let pres = preservation::measure(original, compressed, ext);
43 let ast_score = pres.overall();
44
45 let identifier_score = measure_identifier_preservation(original, compressed);
46 let line_score = measure_line_preservation(original, compressed);
47 let density = information_density(original, compressed, ext);
48
49 let composite = ast_score * 0.5 + identifier_score * 0.3 + line_score * 0.2;
50
51 let compression_ratio = measure_line_preservation(original, compressed);
52 let adaptive_threshold = QUALITY_THRESHOLD - 0.05 * (1.0 - compression_ratio);
53 let passed = composite >= adaptive_threshold && density >= MIN_DENSITY;
54
55 QualityScore {
56 ast_score,
57 identifier_score,
58 line_score,
59 density,
60 composite,
61 passed,
62 }
63}
64
65pub fn information_density(original: &str, compressed: &str, ext: &str) -> f64 {
68 let output_tokens = count_tokens(compressed);
69 if output_tokens == 0 {
70 return 1.0;
71 }
72
73 let pres = preservation::measure(original, compressed, ext);
74 let semantic_items = pres.functions_preserved + pres.exports_preserved + pres.imports_preserved;
75 let ident_re = regex::Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]{3,}\b").unwrap();
76 let unique_idents: std::collections::HashSet<&str> =
77 ident_re.find_iter(compressed).map(|m| m.as_str()).collect();
78 let semantic_token_estimate = semantic_items + unique_idents.len();
79
80 (semantic_token_estimate as f64 / output_tokens as f64).min(1.0)
81}
82
83pub fn guard(original: &str, compressed: &str, ext: &str) -> (String, QualityScore) {
85 let q = score(original, compressed, ext);
86 if q.passed {
87 (compressed.to_string(), q)
88 } else {
89 (original.to_string(), q)
90 }
91}
92
93fn measure_identifier_preservation(original: &str, compressed: &str) -> f64 {
94 let ident_re = regex::Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]{3,}\b").unwrap();
95
96 let original_idents: std::collections::HashSet<&str> =
97 ident_re.find_iter(original).map(|m| m.as_str()).collect();
98
99 if original_idents.is_empty() {
100 return 1.0;
101 }
102
103 let preserved = original_idents
104 .iter()
105 .filter(|id| compressed.contains(*id))
106 .count();
107
108 preserved as f64 / original_idents.len() as f64
109}
110
111fn measure_line_preservation(original: &str, compressed: &str) -> f64 {
112 let original_lines: usize = original.lines().filter(|l| !l.trim().is_empty()).count();
113 if original_lines == 0 {
114 return 1.0;
115 }
116
117 let compressed_lines: usize = compressed.lines().filter(|l| !l.trim().is_empty()).count();
118 let ratio = compressed_lines as f64 / original_lines as f64;
119
120 ratio.min(1.0)
121}
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126
127 #[test]
128 fn test_perfect_score_identity() {
129 let code = "fn main() {\n println!(\"hello\");\n}\n";
130 let q = score(code, code, "rs");
131 assert!(q.composite >= 0.99);
132 assert!(q.passed);
133 }
134
135 #[test]
136 fn test_score_below_threshold_returns_original() {
137 let original = "fn validate_token() {\n let result = check();\n return result;\n}\n";
138 let bad_compressed = "removed everything";
139 let (output, q) = guard(original, bad_compressed, "rs");
140 assert!(!q.passed);
141 assert_eq!(output, original);
142 }
143
144 #[test]
145 fn test_good_compression_passes() {
146 let original = "fn validate_token() {\n let result = check();\n return result;\n}\n";
147 let compressed = "fn validate_token() { let result = check(); return result; }";
148 let q = score(original, compressed, "rs");
149 assert!(q.ast_score >= 0.9);
150 assert!(q.identifier_score >= 0.9);
151 }
152
153 #[test]
154 fn test_score_format_compact() {
155 let code = "fn main() {}\n";
156 let q = score(code, code, "rs");
157 let formatted = q.format_compact();
158 assert!(formatted.contains("Q:"));
159 assert!(formatted.contains("✓"));
160 }
161
162 #[test]
163 fn test_empty_content_scores_perfect() {
164 let q = score("", "", "rs");
165 assert!(q.passed);
166 assert!(q.composite >= 0.99);
167 }
168
169 #[test]
170 fn test_rust_file_with_structs() {
171 let original = "pub struct Config {\n pub name: String,\n pub value: usize,\n}\n\nimpl Config {\n pub fn new() -> Self {\n Self { name: String::new(), value: 0 }\n }\n}\n";
172 let compressed = "pub struct Config { pub name: String, pub value: usize }\nimpl Config { pub fn new() -> Self { Self { name: String::new(), value: 0 } } }";
173 let q = score(original, compressed, "rs");
174 assert!(q.identifier_score >= 0.9);
175 }
176
177 #[test]
178 fn test_typescript_file() {
179 let original = "export function fetchData(url: string): Promise<Response> {\n return fetch(url);\n}\n\nexport const API_URL = 'https://api.example.com';\n";
180 let compressed = "export function fetchData(url: string): Promise<Response> { return fetch(url); }\nexport const API_URL = 'https://api.example.com';";
181 let q = score(original, compressed, "ts");
182 assert!(q.identifier_score >= 0.9);
183 }
184
185 #[test]
186 fn test_python_file() {
187 let original = "def validate_credentials(username: str, password: str) -> bool:\n user = find_user(username)\n return verify_hash(user.password_hash, password)\n";
188 let compressed = "def validate_credentials(username, password): user = find_user(username); return verify_hash(user.password_hash, password)";
189 let q = score(original, compressed, "py");
190 assert!(q.identifier_score >= 0.8);
191 }
192
193 #[test]
194 fn test_density_high_for_meaningful_compression() {
195 let original = "pub fn calculate_total(items: Vec<Item>) -> f64 {\n items.iter().map(|i| i.price * i.quantity as f64).sum()\n}\n";
196 let d = information_density(original, original, "rs");
197 assert!(d > 0.15, "identity should have high density: {d}");
198 }
199
200 #[test]
201 fn test_density_low_for_garbage() {
202 let original = "pub fn calculate_total(items: Vec<Item>) -> f64 {\n items.iter().map(|i| i.price).sum()\n}\n";
203 let garbage = "xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx";
204 let d = information_density(original, garbage, "rs");
205 assert!(d < 0.5, "garbage output should have low density: {d}");
206 }
207
208 #[test]
209 fn test_density_in_quality_score() {
210 let code = "fn main() {\n println!(\"hello\");\n}\n";
211 let q = score(code, code, "rs");
212 assert!(q.density > 0.0, "density should be computed");
213 }
214
215 #[test]
216 fn test_adaptive_threshold_looser_for_heavy_compression() {
217 let original = "fn validate_token() {\n let result = check();\n return result;\n}\nfn other() {\n let x = 1;\n}\n";
218 let compressed = "fn validate_token() { let result = check(); return result; }";
219 let q = score(original, compressed, "rs");
220 assert!(
221 q.density >= MIN_DENSITY,
222 "compressed code should maintain minimum density"
223 );
224 }
225}