abstraction_tokens/
abstraction_tokens.rs1use agentic_eval::tokens::Model;
15
16fn main() {
17 let cl = Model::OpenAiGpt4;
18 let o2 = Model::OpenAiGpt4o;
19 println!("=== Abstraction as the post-floor token lever (real BPE) ===");
20 println!(
21 "tokenizer: {}\n",
22 if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
23 );
24
25 let cases: &[(&str, &str, &str)] = &[
27 (
28 "sum a list",
29 "f sum(xs)\n var t = 0\n for x in xs\n t = t + x\n t",
30 "f sum(xs)\n fold(xs, 0, +)",
31 ),
32 (
33 "word frequencies",
34 "f wc(ws)\n var m = {}\n for w in ws\n m[w] = m[w] + 1\n m",
35 "f wc(ws)\n freq(ws)",
36 ),
37 (
38 "evens, doubled",
39 "f f(xs)\n var out = []\n for x in xs\n if x % 2 == 0\n out.push(x * 2)\n out",
40 "f f(xs)\n xs | filter even | map double",
41 ),
42 (
43 "max of a list",
44 "f max(xs)\n var m = xs[0]\n for x in xs\n if x > m\n m = x\n m",
45 "f max(xs)\n reduce(xs, max)",
46 ),
47 ];
48
49 println!("{:<18} {:>9} {:>9} {:>7}", "intent", "handrolled", "vocab", "saved");
50 let (mut h_cl, mut v_cl, mut h_o, mut v_o) = (0, 0, 0, 0);
51 for (name, hand, vocab) in cases {
52 let (h, v) = (cl.count(hand), cl.count(vocab));
53 println!("{name:<18} {h:>9} {v:>9} {:>6}%", 100 - 100 * v / h);
54 h_cl += h; v_cl += v;
55 h_o += o2.count(hand); v_o += o2.count(vocab);
56 }
57 println!("\nTOTAL cl100k {h_cl} → {v_cl} ({}% saved) o200k {h_o} → {v_o} ({}% saved)",
58 100 - 100 * v_cl / h_cl, 100 - 100 * v_o / h_o);
59
60 println!("\nFINDING");
61 println!(" At the surface floor, abstraction is the only per-call token lever left, and it is");
62 println!(" POSITIVE-SUM: a single-token, total, capability-typed primitive (a) cuts payload");
63 println!(" tokens (above), (b) RAISES reliability (no hand-rolled off-by-one / empty-list bug),");
64 println!(" and (c) preserves safety (the primitive's effect rides its type to the boundary).");
65 println!(" Encoding tricks (binary, dense UTF-8) and layout were all token-neutral-or-worse —");
66 println!(" vocabulary is the one that pays. The discipline: name primitives as single BPE tokens,");
67 println!(" make them total, and choose them by the empirical frequency of SWE intents.");
68}