lean_ctx/tools/
ctx_benchmark.rs1use std::path::Path;
2
3use crate::core::compressor;
4use crate::core::entropy;
5use crate::core::signatures;
6use crate::core::symbol_map::{self, SymbolMap};
7use crate::core::tokens::count_tokens;
8use crate::tools::CrpMode;
9
10pub fn handle(path: &str, crp_mode: CrpMode) -> String {
11 let content = match std::fs::read_to_string(path) {
12 Ok(c) => c,
13 Err(e) => return format!("ERROR: {e}"),
14 };
15
16 let line_count = content.lines().count();
17 let short = crate::core::protocol::shorten_path(path);
18 let ext = Path::new(path)
19 .extension()
20 .and_then(|e| e.to_str())
21 .unwrap_or("");
22
23 let raw_tokens = count_tokens(&content);
24
25 let aggressive = compressor::aggressive_compress(&content, Some(ext));
26 let aggressive_tokens = count_tokens(&aggressive);
27
28 let sigs = signatures::extract_signatures(&content, ext);
29 let sig_compact: String = sigs
30 .iter()
31 .map(|s| s.to_compact())
32 .collect::<Vec<_>>()
33 .join("\n");
34 let sig_tokens = count_tokens(&sig_compact);
35
36 let sig_tdd: String = sigs
37 .iter()
38 .map(|s| s.to_tdd())
39 .collect::<Vec<_>>()
40 .join("\n");
41 let sig_tdd_tokens = count_tokens(&sig_tdd);
42
43 let entropy_result = entropy::entropy_compress(&content);
44 let entropy_tokens = entropy_result.compressed_tokens;
45
46 let cache_hit = format!("F? cached 2t {line_count}L");
47 let cache_tokens = count_tokens(&cache_hit);
48
49 let mut sym = SymbolMap::new();
50 let idents = symbol_map::extract_identifiers(&content, ext);
51 for ident in &idents {
52 sym.register(ident);
53 }
54 let tdd_full = sym.apply(&content);
55 let tdd_table = sym.format_table();
56 let tdd_full_tokens = count_tokens(&tdd_full) + count_tokens(&tdd_table);
57
58 let tdd_agg = sym.apply(&aggressive);
59 let tdd_agg_tokens = count_tokens(&tdd_agg) + count_tokens(&tdd_table);
60
61 let mut rows = Vec::new();
62 rows.push(format!("Benchmark: {short} ({line_count}L)\n"));
63
64 if crp_mode.is_tdd() {
65 rows.push(format!(
66 "{:<28} {:>6} {:>8}",
67 "Strategy", "Tokens", "Savings"
68 ));
69 rows.push("─".repeat(46));
70 rows.push(format_row("raw", raw_tokens, raw_tokens));
71 rows.push(format_row("aggressive", aggressive_tokens, raw_tokens));
72 rows.push(format_row("signatures (compact)", sig_tokens, raw_tokens));
73 rows.push(format_row("signatures (tdd)", sig_tdd_tokens, raw_tokens));
74 rows.push(format_row("entropy", entropy_tokens, raw_tokens));
75 rows.push(format_row("full + §MAP (tdd)", tdd_full_tokens, raw_tokens));
76 rows.push(format_row(
77 "aggressive + §MAP (tdd)",
78 tdd_agg_tokens,
79 raw_tokens,
80 ));
81 rows.push(format_row("cache hit", cache_tokens, raw_tokens));
82 rows.push("─".repeat(46));
83
84 let strategies = [
85 ("aggressive", aggressive_tokens),
86 ("signatures (compact)", sig_tokens),
87 ("signatures (tdd)", sig_tdd_tokens),
88 ("entropy", entropy_tokens),
89 ("full + §MAP", tdd_full_tokens),
90 ("aggressive + §MAP", tdd_agg_tokens),
91 ("cache hit", cache_tokens),
92 ];
93 let best = strategies.iter().min_by_key(|(_, t)| *t).unwrap();
94 let saved = raw_tokens.saturating_sub(best.1);
95 let pct = if raw_tokens > 0 {
96 (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
97 } else {
98 0
99 };
100 rows.push(format!(
101 "Best: \"{}\" saves {} tokens ({}%)",
102 best.0, saved, pct
103 ));
104
105 let tdd_extra = sig_tokens.saturating_sub(sig_tdd_tokens);
106 let tdd_pct = if sig_tokens > 0 {
107 (tdd_extra as f64 / sig_tokens as f64 * 100.0).round() as usize
108 } else {
109 0
110 };
111 rows.push(format!(
112 "TDD bonus (signatures): {} extra tokens saved ({}%)",
113 tdd_extra, tdd_pct
114 ));
115 } else {
116 rows.push(format!(
117 "{:<24} {:>6} {:>8}",
118 "Strategy", "Tokens", "Savings"
119 ));
120 rows.push("─".repeat(42));
121 rows.push(format_row("raw", raw_tokens, raw_tokens));
122 rows.push(format_row("aggressive", aggressive_tokens, raw_tokens));
123 rows.push(format_row("signatures (compact)", sig_tokens, raw_tokens));
124 rows.push(format_row("entropy", entropy_tokens, raw_tokens));
125 rows.push(format_row("cache hit", cache_tokens, raw_tokens));
126 rows.push("─".repeat(42));
127
128 let strategies = [
129 ("aggressive", aggressive_tokens),
130 ("signatures", sig_tokens),
131 ("entropy", entropy_tokens),
132 ("cache hit", cache_tokens),
133 ];
134 let best = strategies.iter().min_by_key(|(_, t)| *t).unwrap();
135 let saved = raw_tokens.saturating_sub(best.1);
136 let pct = if raw_tokens > 0 {
137 (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
138 } else {
139 0
140 };
141 rows.push(format!(
142 "Best: \"{}\" saves {} tokens ({}%)",
143 best.0, saved, pct
144 ));
145 }
146
147 rows.join("\n")
148}
149
150fn format_row(name: &str, tokens: usize, baseline: usize) -> String {
151 if tokens >= baseline {
152 format!("{name:<28} {tokens:>6} —")
153 } else {
154 let saved = baseline - tokens;
155 let pct = (saved as f64 / baseline as f64 * 100.0).round() as usize;
156 format!("{name:<28} {tokens:>6} -{saved} ({pct}%)")
157 }
158}