Skip to main content

lean_ctx/tools/
ctx_benchmark.rs

1use std::path::Path;
2
3use crate::core::compressor;
4use crate::core::entropy;
5use crate::core::quality;
6use crate::core::signatures;
7use crate::core::symbol_map::{self, SymbolMap};
8use crate::core::tokens::count_tokens;
9use crate::tools::CrpMode;
10
11pub fn handle(path: &str, crp_mode: CrpMode) -> String {
12    let content = match std::fs::read_to_string(path) {
13        Ok(c) => c,
14        Err(e) => return format!("ERROR: {e}"),
15    };
16
17    let line_count = content.lines().count();
18    let short = crate::core::protocol::shorten_path(path);
19    let ext = Path::new(path)
20        .extension()
21        .and_then(|e| e.to_str())
22        .unwrap_or("");
23
24    let raw_tokens = count_tokens(&content);
25
26    let aggressive = compressor::aggressive_compress(&content, Some(ext));
27    let aggressive_tokens = count_tokens(&aggressive);
28
29    let sigs = signatures::extract_signatures(&content, ext);
30    let sig_compact: String = sigs
31        .iter()
32        .map(super::super::core::signatures::Signature::to_compact)
33        .collect::<Vec<_>>()
34        .join("\n");
35    let sig_tokens = count_tokens(&sig_compact);
36
37    let sig_tdd: String = sigs
38        .iter()
39        .map(super::super::core::signatures::Signature::to_tdd)
40        .collect::<Vec<_>>()
41        .join("\n");
42    let sig_tdd_tokens = count_tokens(&sig_tdd);
43    let sig_tdd_ascii = crate::core::tokenizer_translation_driver::translate_text(
44        &sig_tdd,
45        crate::core::tokenizer_translation_driver::TranslationRulesetV1::Ascii,
46    );
47    let sig_tdd_ascii_tokens = count_tokens(&sig_tdd_ascii);
48
49    let entropy_result = entropy::entropy_compress(&content);
50    let entropy_tokens = entropy_result.compressed_tokens;
51
52    let cache_hit = format!("F? cached 2t {line_count}L");
53    let cache_tokens = count_tokens(&cache_hit);
54
55    let mut sym = SymbolMap::new();
56    let idents = symbol_map::extract_identifiers(&content, ext);
57    for ident in &idents {
58        sym.register(ident);
59    }
60    let tdd_full = sym.apply(&content);
61    let tdd_table = sym.format_table();
62    let tdd_full_tokens = count_tokens(&tdd_full) + count_tokens(&tdd_table);
63
64    let tdd_agg = sym.apply(&aggressive);
65    let tdd_agg_tokens = count_tokens(&tdd_agg) + count_tokens(&tdd_table);
66
67    let mut rows = Vec::new();
68    rows.push(format!("Benchmark: {short} ({line_count}L)\n"));
69
70    let q_aggressive = quality_cell(&content, &aggressive, ext);
71    let q_sig_compact = quality_cell(&content, &sig_compact, ext);
72    let q_sig_tdd = quality_cell(&content, &sig_tdd, ext);
73    let q_sig_tdd_ascii = quality_cell(&content, &sig_tdd_ascii, ext);
74    let q_entropy = quality_cell(&content, &entropy_result.output, ext);
75
76    if crp_mode.is_tdd() {
77        rows.push(format!(
78            "{:<28} {:>6}  {:>8}  {:>7}",
79            "Strategy", "Tokens", "Savings", "Quality"
80        ));
81        rows.push("─".repeat(57));
82        rows.push(format_row("raw", raw_tokens, raw_tokens, "—"));
83        rows.push(format_row(
84            "aggressive",
85            aggressive_tokens,
86            raw_tokens,
87            &q_aggressive,
88        ));
89        rows.push(format_row(
90            "signatures (compact)",
91            sig_tokens,
92            raw_tokens,
93            &q_sig_compact,
94        ));
95        rows.push(format_row(
96            "signatures (tdd)",
97            sig_tdd_tokens,
98            raw_tokens,
99            &q_sig_tdd,
100        ));
101        rows.push(format_row(
102            "signatures (tdd, ascii)",
103            sig_tdd_ascii_tokens,
104            raw_tokens,
105            &q_sig_tdd_ascii,
106        ));
107        rows.push(format_row(
108            "entropy",
109            entropy_tokens,
110            raw_tokens,
111            &q_entropy,
112        ));
113        rows.push(format_row(
114            "full + §MAP (tdd)",
115            tdd_full_tokens,
116            raw_tokens,
117            "—",
118        ));
119        rows.push(format_row(
120            "aggressive + §MAP (tdd)",
121            tdd_agg_tokens,
122            raw_tokens,
123            "—",
124        ));
125        rows.push(format_row("cache hit", cache_tokens, raw_tokens, "—"));
126        rows.push("─".repeat(57));
127
128        let strategies = [
129            ("aggressive", aggressive_tokens),
130            ("signatures (compact)", sig_tokens),
131            ("signatures (tdd)", sig_tdd_tokens),
132            ("signatures (tdd, ascii)", sig_tdd_ascii_tokens),
133            ("entropy", entropy_tokens),
134            ("full + §MAP", tdd_full_tokens),
135            ("aggressive + §MAP", tdd_agg_tokens),
136            ("cache hit", cache_tokens),
137        ];
138        if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
139            let saved = raw_tokens.saturating_sub(best.1);
140            let pct = if raw_tokens > 0 {
141                (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
142            } else {
143                0
144            };
145            rows.push(format!(
146                "Best: \"{}\" saves {} tokens ({}%)",
147                best.0, saved, pct
148            ));
149        }
150
151        let tdd_extra = sig_tokens.saturating_sub(sig_tdd_tokens);
152        let tdd_pct = if sig_tokens > 0 {
153            (tdd_extra as f64 / sig_tokens as f64 * 100.0).round() as usize
154        } else {
155            0
156        };
157        rows.push(format!(
158            "TDD bonus (signatures): {tdd_extra} extra tokens saved ({tdd_pct}%)"
159        ));
160
161        let ascii_extra = sig_tokens.saturating_sub(sig_tdd_ascii_tokens);
162        let ascii_pct = if sig_tokens > 0 {
163            (ascii_extra as f64 / sig_tokens as f64 * 100.0).round() as usize
164        } else {
165            0
166        };
167        rows.push(format!(
168            "ASCII ruleset bonus (signatures): {ascii_extra} extra tokens saved ({ascii_pct}%)"
169        ));
170    } else {
171        rows.push(format!(
172            "{:<24} {:>6}  {:>8}  {:>7}",
173            "Strategy", "Tokens", "Savings", "Quality"
174        ));
175        rows.push("─".repeat(53));
176        rows.push(format_row("raw", raw_tokens, raw_tokens, "—"));
177        rows.push(format_row(
178            "aggressive",
179            aggressive_tokens,
180            raw_tokens,
181            &q_aggressive,
182        ));
183        rows.push(format_row(
184            "signatures (compact)",
185            sig_tokens,
186            raw_tokens,
187            &q_sig_compact,
188        ));
189        rows.push(format_row(
190            "entropy",
191            entropy_tokens,
192            raw_tokens,
193            &q_entropy,
194        ));
195        rows.push(format_row("cache hit", cache_tokens, raw_tokens, "—"));
196        rows.push("─".repeat(53));
197
198        let strategies = [
199            ("aggressive", aggressive_tokens),
200            ("signatures", sig_tokens),
201            ("entropy", entropy_tokens),
202            ("cache hit", cache_tokens),
203        ];
204        if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
205            let saved = raw_tokens.saturating_sub(best.1);
206            let pct = if raw_tokens > 0 {
207                (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
208            } else {
209                0
210            };
211            rows.push(format!(
212                "Best: \"{}\" saves {} tokens ({}%)",
213                best.0, saved, pct
214            ));
215        }
216    }
217
218    rows.join("\n")
219}
220
221fn format_row(name: &str, tokens: usize, baseline: usize, quality: &str) -> String {
222    if tokens >= baseline {
223        format!("{name:<28} {tokens:>6}  —  {quality:>7}")
224    } else {
225        let saved = baseline - tokens;
226        let pct = (saved as f64 / baseline as f64 * 100.0).round() as usize;
227        format!("{name:<28} {tokens:>6}  -{saved} ({pct}%)  {quality:>7}")
228    }
229}
230
231fn quality_cell(original: &str, compressed: &str, ext: &str) -> String {
232    let q = quality::score(original, compressed, ext);
233    let pct = (q.composite * 100.0).round() as u32;
234    let pass = if q.passed { "✓" } else { "✗" };
235    format!("{pct:>3}%{pass}")
236}