Skip to main content

abstraction_tokens/
abstraction_tokens.rs

1//! Second-pass lever: once the *surface* is at the payload floor (inference,
2//! `;`-removal — landed, MechGen #1 on swe_token_benchmark), the only remaining
3//! per-call token lever is RAISING THE ABSTRACTION — a high-level primitive
4//! expresses an SWE intent in fewer *payload* tokens than hand-rolling it.
5//!
6//! Measured with the real cl100k/o200k BPE: hand-rolled (compiles today) vs the
7//! same intent via a standard-vocabulary primitive. The combinators (map/filter/
8//! fold/reduce/sum/freq/sort/…) are now REGISTERED in MechGen (resolve + type,
9//! `--check`ed) and audited single-token (`vocabulary_audit`); precise totality
10//! typing is a staged backend follow-on. The point is the token delta they buy.
11//!
12//!   cargo run -p agentic-eval --example abstraction_tokens --features real-tokens
13
14use agentic_eval::tokens::Model;
15
16fn main() {
17    let cl = Model::OpenAiGpt4;
18    let o2 = Model::OpenAiGpt4o;
19    println!("=== Abstraction as the post-floor token lever (real BPE) ===");
20    println!(
21        "tokenizer: {}\n",
22        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
23    );
24
25    // (intent, hand-rolled [compiles today], with-vocabulary [proposed primitive])
26    let cases: &[(&str, &str, &str)] = &[
27        (
28            "sum a list",
29            "f sum(xs)\n  var t = 0\n  for x in xs\n    t = t + x\n  t",
30            "f sum(xs)\n  fold(xs, 0, +)",
31        ),
32        (
33            "word frequencies",
34            "f wc(ws)\n  var m = {}\n  for w in ws\n    m[w] = m[w] + 1\n  m",
35            "f wc(ws)\n  freq(ws)",
36        ),
37        (
38            "evens, doubled",
39            "f f(xs)\n  var out = []\n  for x in xs\n    if x % 2 == 0\n      out.push(x * 2)\n  out",
40            "f f(xs)\n  xs | filter even | map double",
41        ),
42        (
43            "max of a list",
44            "f max(xs)\n  var m = xs[0]\n  for x in xs\n    if x > m\n      m = x\n  m",
45            "f max(xs)\n  reduce(xs, max)",
46        ),
47    ];
48
49    println!("{:<18} {:>9} {:>9} {:>7}", "intent", "handrolled", "vocab", "saved");
50    let (mut h_cl, mut v_cl, mut h_o, mut v_o) = (0, 0, 0, 0);
51    for (name, hand, vocab) in cases {
52        let (h, v) = (cl.count(hand), cl.count(vocab));
53        println!("{name:<18} {h:>9} {v:>9} {:>6}%", 100 - 100 * v / h);
54        h_cl += h; v_cl += v;
55        h_o += o2.count(hand); v_o += o2.count(vocab);
56    }
57    println!("\nTOTAL  cl100k {h_cl} → {v_cl} ({}% saved)   o200k {h_o} → {v_o} ({}% saved)",
58        100 - 100 * v_cl / h_cl, 100 - 100 * v_o / h_o);
59
60    println!("\nFINDING");
61    println!("  At the surface floor, abstraction is the only per-call token lever left, and it is");
62    println!("  POSITIVE-SUM: a single-token, total, capability-typed primitive (a) cuts payload");
63    println!("  tokens (above), (b) RAISES reliability (no hand-rolled off-by-one / empty-list bug),");
64    println!("  and (c) preserves safety (the primitive's effect rides its type to the boundary).");
65    println!("  Encoding tricks (binary, dense UTF-8) and layout were all token-neutral-or-worse —");
66    println!("  vocabulary is the one that pays. The discipline: name primitives as single BPE tokens,");
67    println!("  make them total, and choose them by the empirical frequency of SWE intents.");
68}