Skip to main content

lean_ctx/tools/
ctx_benchmark.rs

1use std::path::Path;
2
3use crate::core::compressor;
4use crate::core::entropy;
5use crate::core::signatures;
6use crate::core::symbol_map::{self, SymbolMap};
7use crate::core::tokens::count_tokens;
8use crate::tools::CrpMode;
9
10pub fn handle(path: &str, crp_mode: CrpMode) -> String {
11    let content = match std::fs::read_to_string(path) {
12        Ok(c) => c,
13        Err(e) => return format!("ERROR: {e}"),
14    };
15
16    let line_count = content.lines().count();
17    let short = crate::core::protocol::shorten_path(path);
18    let ext = Path::new(path)
19        .extension()
20        .and_then(|e| e.to_str())
21        .unwrap_or("");
22
23    let raw_tokens = count_tokens(&content);
24
25    let aggressive = compressor::aggressive_compress(&content, Some(ext));
26    let aggressive_tokens = count_tokens(&aggressive);
27
28    let sigs = signatures::extract_signatures(&content, ext);
29    let sig_compact: String = sigs
30        .iter()
31        .map(|s| s.to_compact())
32        .collect::<Vec<_>>()
33        .join("\n");
34    let sig_tokens = count_tokens(&sig_compact);
35
36    let sig_tdd: String = sigs
37        .iter()
38        .map(|s| s.to_tdd())
39        .collect::<Vec<_>>()
40        .join("\n");
41    let sig_tdd_tokens = count_tokens(&sig_tdd);
42
43    let entropy_result = entropy::entropy_compress(&content);
44    let entropy_tokens = entropy_result.compressed_tokens;
45
46    let cache_hit = format!("F? cached 2t {line_count}L");
47    let cache_tokens = count_tokens(&cache_hit);
48
49    let mut sym = SymbolMap::new();
50    let idents = symbol_map::extract_identifiers(&content, ext);
51    for ident in &idents {
52        sym.register(ident);
53    }
54    let tdd_full = sym.apply(&content);
55    let tdd_table = sym.format_table();
56    let tdd_full_tokens = count_tokens(&tdd_full) + count_tokens(&tdd_table);
57
58    let tdd_agg = sym.apply(&aggressive);
59    let tdd_agg_tokens = count_tokens(&tdd_agg) + count_tokens(&tdd_table);
60
61    let mut rows = Vec::new();
62    rows.push(format!("Benchmark: {short} ({line_count}L)\n"));
63
64    if crp_mode.is_tdd() {
65        rows.push(format!(
66            "{:<28} {:>6}  {:>8}",
67            "Strategy", "Tokens", "Savings"
68        ));
69        rows.push("─".repeat(46));
70        rows.push(format_row("raw", raw_tokens, raw_tokens));
71        rows.push(format_row("aggressive", aggressive_tokens, raw_tokens));
72        rows.push(format_row("signatures (compact)", sig_tokens, raw_tokens));
73        rows.push(format_row("signatures (tdd)", sig_tdd_tokens, raw_tokens));
74        rows.push(format_row("entropy", entropy_tokens, raw_tokens));
75        rows.push(format_row("full + §MAP (tdd)", tdd_full_tokens, raw_tokens));
76        rows.push(format_row(
77            "aggressive + §MAP (tdd)",
78            tdd_agg_tokens,
79            raw_tokens,
80        ));
81        rows.push(format_row("cache hit", cache_tokens, raw_tokens));
82        rows.push("─".repeat(46));
83
84        let strategies = [
85            ("aggressive", aggressive_tokens),
86            ("signatures (compact)", sig_tokens),
87            ("signatures (tdd)", sig_tdd_tokens),
88            ("entropy", entropy_tokens),
89            ("full + §MAP", tdd_full_tokens),
90            ("aggressive + §MAP", tdd_agg_tokens),
91            ("cache hit", cache_tokens),
92        ];
93        if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
94            let saved = raw_tokens.saturating_sub(best.1);
95            let pct = if raw_tokens > 0 {
96                (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
97            } else {
98                0
99            };
100            rows.push(format!(
101                "Best: \"{}\" saves {} tokens ({}%)",
102                best.0, saved, pct
103            ));
104        }
105
106        let tdd_extra = sig_tokens.saturating_sub(sig_tdd_tokens);
107        let tdd_pct = if sig_tokens > 0 {
108            (tdd_extra as f64 / sig_tokens as f64 * 100.0).round() as usize
109        } else {
110            0
111        };
112        rows.push(format!(
113            "TDD bonus (signatures): {} extra tokens saved ({}%)",
114            tdd_extra, tdd_pct
115        ));
116    } else {
117        rows.push(format!(
118            "{:<24} {:>6}  {:>8}",
119            "Strategy", "Tokens", "Savings"
120        ));
121        rows.push("─".repeat(42));
122        rows.push(format_row("raw", raw_tokens, raw_tokens));
123        rows.push(format_row("aggressive", aggressive_tokens, raw_tokens));
124        rows.push(format_row("signatures (compact)", sig_tokens, raw_tokens));
125        rows.push(format_row("entropy", entropy_tokens, raw_tokens));
126        rows.push(format_row("cache hit", cache_tokens, raw_tokens));
127        rows.push("─".repeat(42));
128
129        let strategies = [
130            ("aggressive", aggressive_tokens),
131            ("signatures", sig_tokens),
132            ("entropy", entropy_tokens),
133            ("cache hit", cache_tokens),
134        ];
135        if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
136            let saved = raw_tokens.saturating_sub(best.1);
137            let pct = if raw_tokens > 0 {
138                (saved as f64 / raw_tokens as f64 * 100.0).round() as usize
139            } else {
140                0
141            };
142            rows.push(format!(
143                "Best: \"{}\" saves {} tokens ({}%)",
144                best.0, saved, pct
145            ));
146        }
147    }
148
149    rows.join("\n")
150}
151
152fn format_row(name: &str, tokens: usize, baseline: usize) -> String {
153    if tokens >= baseline {
154        format!("{name:<28} {tokens:>6}  —")
155    } else {
156        let saved = baseline - tokens;
157        let pct = (saved as f64 / baseline as f64 * 100.0).round() as usize;
158        format!("{name:<28} {tokens:>6}  -{saved} ({pct}%)")
159    }
160}