1use std::path::Path;
2
3use crate::core::compressor;
4use crate::core::entropy;
5use crate::core::quality;
6use crate::core::signatures;
7use crate::core::symbol_map::{self, SymbolMap};
8use crate::core::tokens::count_tokens;
9use crate::tools::CrpMode;
10
11pub fn handle(path: &str, crp_mode: CrpMode) -> String {
12 let content = match std::fs::read_to_string(path) {
13 Ok(c) => c,
14 Err(e) => return format!("ERROR: {e}"),
15 };
16
17 let line_count = content.lines().count();
18 let short = crate::core::protocol::shorten_path(path);
19 let ext = Path::new(path)
20 .extension()
21 .and_then(|e| e.to_str())
22 .unwrap_or("");
23
24 let raw_tokens = count_tokens(&content);
25
26 let aggressive = compressor::aggressive_compress(&content, Some(ext));
27 let aggressive_tokens = count_tokens(&aggressive);
28
29 let sigs = signatures::extract_signatures(&content, ext);
30 let sig_compact: String = sigs
31 .iter()
32 .map(super::super::core::signatures::Signature::to_compact)
33 .collect::<Vec<_>>()
34 .join("\n");
35 let sig_tokens = count_tokens(&sig_compact);
36
37 let sig_tdd: String = sigs
38 .iter()
39 .map(super::super::core::signatures::Signature::to_tdd)
40 .collect::<Vec<_>>()
41 .join("\n");
42 let sig_tdd_tokens = count_tokens(&sig_tdd);
43 let sig_tdd_ascii = crate::core::tokenizer_translation_driver::translate_text(
44 &sig_tdd,
45 crate::core::tokenizer_translation_driver::TranslationRulesetV1::Ascii,
46 );
47 let sig_tdd_ascii_tokens = count_tokens(&sig_tdd_ascii);
48
49 let entropy_result = entropy::entropy_compress(&content);
50 let entropy_tokens = entropy_result.compressed_tokens;
51
52 let cache_hit = format!("F? cached 2t {line_count}L");
53 let cache_tokens = count_tokens(&cache_hit);
54
55 let mut sym = SymbolMap::new();
56 let idents = symbol_map::extract_identifiers(&content, ext);
57 for ident in &idents {
58 sym.register(ident);
59 }
60 let tdd_full = sym.apply(&content);
61 let tdd_table = sym.format_table();
62 let tdd_full_tokens = count_tokens(&tdd_full) + count_tokens(&tdd_table);
63
64 let tdd_agg = sym.apply(&aggressive);
65 let tdd_agg_tokens = count_tokens(&tdd_agg) + count_tokens(&tdd_table);
66
67 let mut rows = Vec::new();
68 rows.push(format!("Benchmark: {short} ({line_count}L)\n"));
69
70 let q_aggressive = quality_cell(&content, &aggressive, ext);
71 let q_sig_compact = quality_cell(&content, &sig_compact, ext);
72 let q_sig_tdd = quality_cell(&content, &sig_tdd, ext);
73 let q_sig_tdd_ascii = quality_cell(&content, &sig_tdd_ascii, ext);
74 let q_entropy = quality_cell(&content, &entropy_result.output, ext);
75
76 if crp_mode.is_tdd() {
77 rows.push(format!(
78 "{:<28} {:>6} {:>8} {:>7}",
79 "Strategy", "Tokens", "Savings", "Quality"
80 ));
81 rows.push("─".repeat(57));
82 rows.push(format_row("raw", raw_tokens, raw_tokens, "—"));
83 rows.push(format_row(
84 "aggressive",
85 aggressive_tokens,
86 raw_tokens,
87 &q_aggressive,
88 ));
89 rows.push(format_row(
90 "signatures (compact)",
91 sig_tokens,
92 raw_tokens,
93 &q_sig_compact,
94 ));
95 rows.push(format_row(
96 "signatures (tdd)",
97 sig_tdd_tokens,
98 raw_tokens,
99 &q_sig_tdd,
100 ));
101 rows.push(format_row(
102 "signatures (tdd, ascii)",
103 sig_tdd_ascii_tokens,
104 raw_tokens,
105 &q_sig_tdd_ascii,
106 ));
107 rows.push(format_row(
108 "entropy",
109 entropy_tokens,
110 raw_tokens,
111 &q_entropy,
112 ));
113 rows.push(format_row(
114 "full + §MAP (tdd)",
115 tdd_full_tokens,
116 raw_tokens,
117 "—",
118 ));
119 rows.push(format_row(
120 "aggressive + §MAP (tdd)",
121 tdd_agg_tokens,
122 raw_tokens,
123 "—",
124 ));
125 rows.push(format_row("cache hit", cache_tokens, raw_tokens, "—"));
126 rows.push("─".repeat(57));
127
128 let strategies = [
129 ("aggressive", aggressive_tokens),
130 ("signatures (compact)", sig_tokens),
131 ("signatures (tdd)", sig_tdd_tokens),
132 ("signatures (tdd, ascii)", sig_tdd_ascii_tokens),
133 ("entropy", entropy_tokens),
134 ("full + §MAP", tdd_full_tokens),
135 ("aggressive + §MAP", tdd_agg_tokens),
136 ("cache hit", cache_tokens),
137 ];
138 if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
139 let saved = raw_tokens.saturating_sub(best.1);
140 let pct = if raw_tokens > 0 {
141 (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
142 } else {
143 0
144 };
145 rows.push(format!(
146 "Best: \"{}\" saves {} tokens ({}%)",
147 best.0, saved, pct
148 ));
149 }
150
151 let tdd_extra = sig_tokens.saturating_sub(sig_tdd_tokens);
152 let tdd_pct = if sig_tokens > 0 {
153 (tdd_extra as f64 / sig_tokens as f64 * 100.0).round() as usize
154 } else {
155 0
156 };
157 rows.push(format!(
158 "TDD bonus (signatures): {tdd_extra} extra tokens saved ({tdd_pct}%)"
159 ));
160
161 let ascii_extra = sig_tokens.saturating_sub(sig_tdd_ascii_tokens);
162 let ascii_pct = if sig_tokens > 0 {
163 (ascii_extra as f64 / sig_tokens as f64 * 100.0).round() as usize
164 } else {
165 0
166 };
167 rows.push(format!(
168 "ASCII ruleset bonus (signatures): {ascii_extra} extra tokens saved ({ascii_pct}%)"
169 ));
170 } else {
171 rows.push(format!(
172 "{:<24} {:>6} {:>8} {:>7}",
173 "Strategy", "Tokens", "Savings", "Quality"
174 ));
175 rows.push("─".repeat(53));
176 rows.push(format_row("raw", raw_tokens, raw_tokens, "—"));
177 rows.push(format_row(
178 "aggressive",
179 aggressive_tokens,
180 raw_tokens,
181 &q_aggressive,
182 ));
183 rows.push(format_row(
184 "signatures (compact)",
185 sig_tokens,
186 raw_tokens,
187 &q_sig_compact,
188 ));
189 rows.push(format_row(
190 "entropy",
191 entropy_tokens,
192 raw_tokens,
193 &q_entropy,
194 ));
195 rows.push(format_row("cache hit", cache_tokens, raw_tokens, "—"));
196 rows.push("─".repeat(53));
197
198 let strategies = [
199 ("aggressive", aggressive_tokens),
200 ("signatures", sig_tokens),
201 ("entropy", entropy_tokens),
202 ("cache hit", cache_tokens),
203 ];
204 if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
205 let saved = raw_tokens.saturating_sub(best.1);
206 let pct = if raw_tokens > 0 {
207 (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
208 } else {
209 0
210 };
211 rows.push(format!(
212 "Best: \"{}\" saves {} tokens ({}%)",
213 best.0, saved, pct
214 ));
215 }
216 }
217
218 rows.join("\n")
219}
220
221fn format_row(name: &str, tokens: usize, baseline: usize, quality: &str) -> String {
222 if tokens >= baseline {
223 format!("{name:<28} {tokens:>6} — {quality:>7}")
224 } else {
225 let saved = baseline - tokens;
226 let pct = (saved as f64 / baseline as f64 * 100.0).round() as usize;
227 format!("{name:<28} {tokens:>6} -{saved} ({pct}%) {quality:>7}")
228 }
229}
230
231fn quality_cell(original: &str, compressed: &str, ext: &str) -> String {
232 let q = quality::score(original, compressed, ext);
233 let pct = (q.composite * 100.0).round() as u32;
234 let pass = if q.passed { "✓" } else { "✗" };
235 format!("{pct:>3}%{pass}")
236}