1#![allow(dead_code)]
2use crate::core::preservation;
3use crate::core::tokens::count_tokens;
4
5const QUALITY_THRESHOLD: f64 = 0.95;
6const MIN_DENSITY: f64 = 0.15;
7
8#[derive(Debug, Clone)]
9pub struct QualityScore {
10 pub ast_score: f64,
11 pub identifier_score: f64,
12 pub line_score: f64,
13 pub density: f64,
14 pub composite: f64,
15 pub passed: bool,
16}
17
18impl QualityScore {
19 pub fn format_compact(&self) -> String {
20 if self.passed {
21 format!(
22 "Q:{:.0}% (ast:{:.0} id:{:.0} ln:{:.0} ρ:{:.0}) ✓",
23 self.composite * 100.0,
24 self.ast_score * 100.0,
25 self.identifier_score * 100.0,
26 self.line_score * 100.0,
27 self.density * 100.0,
28 )
29 } else {
30 format!(
31 "Q:{:.0}% (ast:{:.0} id:{:.0} ln:{:.0} ρ:{:.0}) ✗ BELOW THRESHOLD",
32 self.composite * 100.0,
33 self.ast_score * 100.0,
34 self.identifier_score * 100.0,
35 self.line_score * 100.0,
36 self.density * 100.0,
37 )
38 }
39 }
40}
41
42pub fn score(original: &str, compressed: &str, ext: &str) -> QualityScore {
43 let pres = preservation::measure(original, compressed, ext);
44 let ast_score = pres.overall();
45
46 let identifier_score = measure_identifier_preservation(original, compressed);
47 let line_score = measure_line_preservation(original, compressed);
48 let density = information_density(original, compressed, ext);
49
50 let composite = ast_score * 0.5 + identifier_score * 0.3 + line_score * 0.2;
51
52 let compression_ratio = measure_line_preservation(original, compressed);
53 let adaptive_threshold = QUALITY_THRESHOLD - 0.05 * (1.0 - compression_ratio);
54 let passed = composite >= adaptive_threshold && density >= MIN_DENSITY;
55
56 QualityScore {
57 ast_score,
58 identifier_score,
59 line_score,
60 density,
61 composite,
62 passed,
63 }
64}
65
66pub fn information_density(original: &str, compressed: &str, ext: &str) -> f64 {
69 let output_tokens = count_tokens(compressed);
70 if output_tokens == 0 {
71 return 1.0;
72 }
73
74 let pres = preservation::measure(original, compressed, ext);
75 let semantic_items = pres.functions_preserved + pres.exports_preserved + pres.imports_preserved;
76 let ident_re = regex::Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]{3,}\b").unwrap();
77 let unique_idents: std::collections::HashSet<&str> =
78 ident_re.find_iter(compressed).map(|m| m.as_str()).collect();
79 let semantic_token_estimate = semantic_items + unique_idents.len();
80
81 (semantic_token_estimate as f64 / output_tokens as f64).min(1.0)
82}
83
84pub fn guard(original: &str, compressed: &str, ext: &str) -> (String, QualityScore) {
86 let q = score(original, compressed, ext);
87 if q.passed {
88 (compressed.to_string(), q)
89 } else {
90 (original.to_string(), q)
91 }
92}
93
94fn measure_identifier_preservation(original: &str, compressed: &str) -> f64 {
95 let ident_re = regex::Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]{3,}\b").unwrap();
96
97 let original_idents: std::collections::HashSet<&str> =
98 ident_re.find_iter(original).map(|m| m.as_str()).collect();
99
100 if original_idents.is_empty() {
101 return 1.0;
102 }
103
104 let preserved = original_idents
105 .iter()
106 .filter(|id| compressed.contains(*id))
107 .count();
108
109 preserved as f64 / original_idents.len() as f64
110}
111
112fn measure_line_preservation(original: &str, compressed: &str) -> f64 {
113 let original_lines: usize = original.lines().filter(|l| !l.trim().is_empty()).count();
114 if original_lines == 0 {
115 return 1.0;
116 }
117
118 let compressed_lines: usize = compressed.lines().filter(|l| !l.trim().is_empty()).count();
119 let ratio = compressed_lines as f64 / original_lines as f64;
120
121 ratio.min(1.0)
122}
123
124#[cfg(test)]
125mod tests {
126 use super::*;
127
128 #[test]
129 fn test_perfect_score_identity() {
130 let code = "fn main() {\n println!(\"hello\");\n}\n";
131 let q = score(code, code, "rs");
132 assert!(q.composite >= 0.99);
133 assert!(q.passed);
134 }
135
136 #[test]
137 fn test_score_below_threshold_returns_original() {
138 let original = "fn validate_token() {\n let result = check();\n return result;\n}\n";
139 let bad_compressed = "removed everything";
140 let (output, q) = guard(original, bad_compressed, "rs");
141 assert!(!q.passed);
142 assert_eq!(output, original);
143 }
144
145 #[test]
146 fn test_good_compression_passes() {
147 let original = "fn validate_token() {\n let result = check();\n return result;\n}\n";
148 let compressed = "fn validate_token() { let result = check(); return result; }";
149 let q = score(original, compressed, "rs");
150 assert!(q.ast_score >= 0.9);
151 assert!(q.identifier_score >= 0.9);
152 }
153
154 #[test]
155 fn test_score_format_compact() {
156 let code = "fn main() {}\n";
157 let q = score(code, code, "rs");
158 let formatted = q.format_compact();
159 assert!(formatted.contains("Q:"));
160 assert!(formatted.contains("✓"));
161 }
162
163 #[test]
164 fn test_empty_content_scores_perfect() {
165 let q = score("", "", "rs");
166 assert!(q.passed);
167 assert!(q.composite >= 0.99);
168 }
169
170 #[test]
171 fn test_rust_file_with_structs() {
172 let original = "pub struct Config {\n pub name: String,\n pub value: usize,\n}\n\nimpl Config {\n pub fn new() -> Self {\n Self { name: String::new(), value: 0 }\n }\n}\n";
173 let compressed = "pub struct Config { pub name: String, pub value: usize }\nimpl Config { pub fn new() -> Self { Self { name: String::new(), value: 0 } } }";
174 let q = score(original, compressed, "rs");
175 assert!(q.identifier_score >= 0.9);
176 }
177
178 #[test]
179 fn test_typescript_file() {
180 let original = "export function fetchData(url: string): Promise<Response> {\n return fetch(url);\n}\n\nexport const API_URL = 'https://api.example.com';\n";
181 let compressed = "export function fetchData(url: string): Promise<Response> { return fetch(url); }\nexport const API_URL = 'https://api.example.com';";
182 let q = score(original, compressed, "ts");
183 assert!(q.identifier_score >= 0.9);
184 }
185
186 #[test]
187 fn test_python_file() {
188 let original = "def validate_credentials(username: str, password: str) -> bool:\n user = find_user(username)\n return verify_hash(user.password_hash, password)\n";
189 let compressed = "def validate_credentials(username, password): user = find_user(username); return verify_hash(user.password_hash, password)";
190 let q = score(original, compressed, "py");
191 assert!(q.identifier_score >= 0.8);
192 }
193
194 #[test]
195 fn test_density_high_for_meaningful_compression() {
196 let original = "pub fn calculate_total(items: Vec<Item>) -> f64 {\n items.iter().map(|i| i.price * i.quantity as f64).sum()\n}\n";
197 let d = information_density(original, original, "rs");
198 assert!(d > 0.15, "identity should have high density: {d}");
199 }
200
201 #[test]
202 fn test_density_low_for_garbage() {
203 let original = "pub fn calculate_total(items: Vec<Item>) -> f64 {\n items.iter().map(|i| i.price).sum()\n}\n";
204 let garbage = "xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx";
205 let d = information_density(original, garbage, "rs");
206 assert!(d < 0.5, "garbage output should have low density: {d}");
207 }
208
209 #[test]
210 fn test_density_in_quality_score() {
211 let code = "fn main() {\n println!(\"hello\");\n}\n";
212 let q = score(code, code, "rs");
213 assert!(q.density > 0.0, "density should be computed");
214 }
215
216 #[test]
217 fn test_adaptive_threshold_looser_for_heavy_compression() {
218 let original = "fn validate_token() {\n let result = check();\n return result;\n}\nfn other() {\n let x = 1;\n}\n";
219 let compressed = "fn validate_token() { let result = check(); return result; }";
220 let q = score(original, compressed, "rs");
221 assert!(
222 q.density >= MIN_DENSITY,
223 "compressed code should maintain minimum density"
224 );
225 }
226}