Skip to main content

Model

agentic_eval::tokens

Enum Model

pub enum Model {
    OpenAiGpt4,
    OpenAiGpt4o,
    AnthropicClaude,
    Heuristic,
}

Expand description

A popular agentic AI system, identified by its tokenizer family.

Variants§

OpenAiGpt4

OpenAI GPT-4 / GPT-3.5-turbo family — cl100k_base BPE.

OpenAiGpt4o

OpenAI GPT-4o / o-series family — o200k_base BPE.

AnthropicClaude

Anthropic Claude. Approximation: Anthropic publishes no offline tokenizer crate, so this uses the shared heuristic_tokens estimate (the same as Model::Heuristic) and must be read as an estimate, not an exact count. Model::is_exact returns false for it.

Heuristic

A tokenizer-agnostic labeled heuristic (no model-specific BPE).

Implementations§

impl Model

pub fn name(self) -> &'static str

A human-readable label for the model/tokenizer (e.g. for report output).

Examples found in repository ?

examples/evaluate.rs (line 37)

12fn main() {
13    println!("agentic-eval — four-axis evaluation of programs for agentic AI\n");
14
15    // Two encodings of "read a file and keep the large entries".
16    let legible = Program::new(
17        "legible",
18        r#"ls("./src") | where(fn(f) => f.size > 1000) | map(fn(f) => f.name)"#,
19    )
20    .with_standing_context("ls/where/map are standard, high-probability names")
21    .with_output("name\nfoo.rs\nbar.rs");
22    let cipher = Program::new("cipher", r#"l./src|w~.size>1k|m~.name"#)
23        .with_standing_context("single-letter+sigil cheatsheet line; ".repeat(120))
24        .with_output("name\nfoo.rs\nbar.rs")
25        .with_retries(8); // terse cipher is mis-emitted more often
26
27    // ── 1. Token efficiency ──────────────────────────────────────────────
28    println!("[1] Token efficiency (amortized over 30 turns):");
29    for model in [
30        Model::OpenAiGpt4,
31        Model::OpenAiGpt4o,
32        Model::AnthropicClaude,
33    ] {
34        let cmp = compare(&legible, &cipher, model, 30);
35        println!(
36            "  {:<28} legible={:>6}  cipher={:>6}  → {} wins ({:.2}x){}",
37            model.name(),
38            cmp.a_total,
39            cmp.b_total,
40            if cmp.winner_is_a { "legible" } else { "cipher" },
41            cmp.ratio,
42            if model.is_exact() { "" } else { " [est]" },
43        );
44    }
45
46    // ── 2. Determinism ───────────────────────────────────────────────────
47    // A canonical renderer (byte-stable) vs. one that embeds a timestamp.
48    let canonical = assess_determinism(5, || "name\nfoo.rs\nbar.rs".to_string());
49    let mut t = 0u64;
50    let noisy = assess_determinism(5, || {
51        t += 1;
52        format!("name\nfoo.rs\nbar.rs  # at {t}")
53    });
54    println!("\n[2] Determinism:");
55    println!(
56        "  canonical output : deterministic={} ({} distinct / {} runs)",
57        canonical.deterministic, canonical.distinct, canonical.runs
58    );
59    println!(
60        "  timestamped output: deterministic={} ({} distinct / {} runs)",
61        noisy.deterministic, noisy.distinct, noisy.runs
62    );
63
64    // ── 3. Reliability ───────────────────────────────────────────────────
65    // The legible form parses on all 6 sample invocations; the cipher mis-parses
66    // twice but at least returns a structured error once.
67    let samples = [0, 1, 2, 3, 4, 5];
68    let legible_rel = assess_reliability(&samples, |_| Outcome::ok());
69    let cipher_rel = assess_reliability(&samples, |&i| match i {
70        4 => Outcome::structured_failure(),
71        5 => Outcome::opaque_failure(),
72        _ => Outcome::ok(),
73    });
74    println!("\n[3] Reliability:");
75    println!(
76        "  legible: pass {:.0}%  actionable {:.0}%",
77        legible_rel.pass_rate * 100.0,
78        legible_rel.actionable_rate * 100.0
79    );
80    println!(
81        "  cipher : pass {:.0}%  actionable {:.0}%",
82        cipher_rel.pass_rate * 100.0,
83        cipher_rel.actionable_rate * 100.0
84    );
85
86    // ── 4. Safety ────────────────────────────────────────────────────────
87    // The task reads + lists (ReadLocal); a "rm the small files" variant adds a
88    // Destructive effect. Score the gating under the agent policy.
89    let read_only = assess_safety(&[Effect::ReadLocal, Effect::WriteLocal], Mode::Agent);
90    let destructive = assess_safety(
91        &[Effect::ReadLocal, Effect::Destructive, Effect::Exec],
92        Mode::Agent,
93    );
94    println!("\n[4] Safety (agent policy):");
95    println!(
96        "  read+write task : grade {} (bounded={}, {} approval-gated)",
97        read_only.grade, read_only.bounded, read_only.approval_gated
98    );
99    println!(
100        "  rm+exec task    : grade {} (bounded={}, {} approval-gated, {} denied)",
101        destructive.grade, destructive.bounded, destructive.approval_gated, destructive.denied
102    );
103
104    println!("\nSummary: the legible form is competitive-or-cheaper on tokens once standing");
105    println!("context counts, more deterministic and reliable to parse, and the agent policy");
106    println!("bounds the blast radius of even the destructive variant.");
107}

pub fn all() -> [Model; 4]

Every model this build can count for (exact or approximate).

pub fn from_name(name: &str) -> Option<Model>

Parse a model from a short identifier (case-insensitive), for CLI/config use. Accepts common aliases: gpt4/gpt-4/cl100k; gpt4o/gpt-4o/ o200k; claude/anthropic; heuristic/heur. Returns None otherwise.

pub fn is_exact(self) -> bool

Whether this model’s count is exact (a real BPE) in this build, vs. an estimate. OpenAI families are exact only with --features real-tokens.

Examples found in repository ?

examples/tokens_of.rs (line 16)

11fn main() {
12    let cl = Model::OpenAiGpt4;
13    let o2 = Model::OpenAiGpt4o;
14    println!(
15        "tokenizer exact: cl100k={} o200k={}",
16        cl.is_exact(),
17        o2.is_exact()
18    );
19    println!("{:>7} {:>7}   file", "cl100k", "o200k");
20    for path in std::env::args().skip(1) {
21        match fs::read_to_string(&path) {
22            Ok(s) => println!("{:>7} {:>7}   {}", cl.count(&s), o2.count(&s), path),
23            Err(e) => println!("    ERR     ERR   {path}: {e}"),
24        }
25    }
26}

More examples

Hide additional examples

examples/vocabulary_audit.rs (line 24)

18fn main() {
19    let cl = Model::OpenAiGpt4;
20    let o2 = Model::OpenAiGpt4o;
21    println!("=== Standard-vocabulary tokenizer audit (§8b) ===");
22    println!(
23        "tokenizer: {}   names: {}\n",
24        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" },
25        VOCAB.len()
26    );
27
28    // Agents emit a name with a leading space; BPE is space-aware.
29    let mut single = 0usize;
30    let mut offenders: Vec<(&str, usize, usize)> = Vec::new();
31    for &name in VOCAB {
32        let ctx = format!(" {name}");
33        let (c, o) = (cl.count(&ctx), o2.count(&ctx));
34        if c <= 1 && o <= 1 {
35            single += 1;
36        } else {
37            offenders.push((name, c, o));
38        }
39    }
40
41    println!("SINGLE BPE TOKEN (both tokenizers): {single}/{}", VOCAB.len());
42    if offenders.is_empty() {
43        println!("  ✓ every vocabulary name is a single token — the §8b discipline holds.");
44    } else {
45        println!("\nOFFENDERS (rename or drop — a multi-token name negates the saving):");
46        for (n, c, o) in &offenders {
47            println!("  {n:<12} cl100k {c}  o200k {o}");
48        }
49    }
50
51    println!("\nWHY IT MATTERS");
52    println!("  The vocabulary's win is naming an intent in ~1 token. A 2-token name (e.g.");
53    println!("  `frequencies` = 'frequ'+'encies') halves that. Picking `freq`/`map`/`fold` over");
54    println!("  `frequencies`/`transform`/`accumulate` is tokenizer co-design, audited here.");
55}

examples/keyword_audit.rs (line 31)

25fn main() {
26    let cl = Model::OpenAiGpt4;
27    let o2 = Model::OpenAiGpt4o;
28    println!("=== MechGen keyword tokenizer audit (migration step 4) ===");
29    println!(
30        "tokenizer: {}   keywords: {}\n",
31        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" },
32        KEYWORDS.len()
33    );
34
35    // A keyword usually appears with a leading space in code; BPE is space-aware,
36    // so " return" can differ from "return". Audit the in-context form (leading
37    // space) — that is what an agent actually emits.
38    let mut offenders: Vec<(&str, usize, usize)> = Vec::new();
39    let mut single = 0usize;
40    for &kw in KEYWORDS {
41        let ctx = format!(" {kw}");
42        let c = cl.count(&ctx);
43        let o = o2.count(&ctx);
44        if c <= 1 && o <= 1 {
45            single += 1;
46        } else {
47            offenders.push((kw, c, o));
48        }
49    }
50
51    println!("SINGLE BPE TOKEN (both tokenizers): {single}/{}", KEYWORDS.len());
52    println!("\nOFFENDERS (>1 token in cl100k or o200k):");
53    if offenders.is_empty() {
54        println!("  (none)");
55    } else {
56        offenders.sort_by(|a, b| b.1.cmp(&a.1));
57        for (kw, c, o) in &offenders {
58            println!("  {kw:<20} cl100k {c}  o200k {o}");
59        }
60    }
61
62    println!("\nVERDICT");
63    println!(
64        "  {}/{} keywords are already single-token (the agent-mode single/double-char forms",
65        single, KEYWORDS.len()
66    );
67    println!("  f/m/v/u/… and common words if/for/match/… cost exactly one token).");
68    if !offenders.is_empty() {
69        println!("  The {} offenders are compound/rare words (snake_case splits on `_`); each should", offenders.len());
70        println!("  get a single-token agent-mode alias. They are specialized (swarm combinators,");
71        println!("  grammar extension) — rare in practice, so the realized token cost is small, but");
72        println!("  the surface is not yet uniformly single-token. This is the concrete step-4 work-list.");
73    }
74}

examples/abstraction_tokens.rs (line 22)

16fn main() {
17    let cl = Model::OpenAiGpt4;
18    let o2 = Model::OpenAiGpt4o;
19    println!("=== Abstraction as the post-floor token lever (real BPE) ===");
20    println!(
21        "tokenizer: {}\n",
22        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
23    );
24
25    // (intent, hand-rolled [compiles today], with-vocabulary [proposed primitive])
26    let cases: &[(&str, &str, &str)] = &[
27        (
28            "sum a list",
29            "f sum(xs)\n  var t = 0\n  for x in xs\n    t = t + x\n  t",
30            "f sum(xs)\n  fold(xs, 0, +)",
31        ),
32        (
33            "word frequencies",
34            "f wc(ws)\n  var m = {}\n  for w in ws\n    m[w] = m[w] + 1\n  m",
35            "f wc(ws)\n  freq(ws)",
36        ),
37        (
38            "evens, doubled",
39            "f f(xs)\n  var out = []\n  for x in xs\n    if x % 2 == 0\n      out.push(x * 2)\n  out",
40            "f f(xs)\n  xs | filter even | map double",
41        ),
42        (
43            "max of a list",
44            "f max(xs)\n  var m = xs[0]\n  for x in xs\n    if x > m\n      m = x\n  m",
45            "f max(xs)\n  reduce(xs, max)",
46        ),
47    ];
48
49    println!("{:<18} {:>9} {:>9} {:>7}", "intent", "handrolled", "vocab", "saved");
50    let (mut h_cl, mut v_cl, mut h_o, mut v_o) = (0, 0, 0, 0);
51    for (name, hand, vocab) in cases {
52        let (h, v) = (cl.count(hand), cl.count(vocab));
53        println!("{name:<18} {h:>9} {v:>9} {:>6}%", 100 - 100 * v / h);
54        h_cl += h; v_cl += v;
55        h_o += o2.count(hand); v_o += o2.count(vocab);
56    }
57    println!("\nTOTAL  cl100k {h_cl} → {v_cl} ({}% saved)   o200k {h_o} → {v_o} ({}% saved)",
58        100 - 100 * v_cl / h_cl, 100 - 100 * v_o / h_o);
59
60    println!("\nFINDING");
61    println!("  At the surface floor, abstraction is the only per-call token lever left, and it is");
62    println!("  POSITIVE-SUM: a single-token, total, capability-typed primitive (a) cuts payload");
63    println!("  tokens (above), (b) RAISES reliability (no hand-rolled off-by-one / empty-list bug),");
64    println!("  and (c) preserves safety (the primitive's effect rides its type to the boundary).");
65    println!("  Encoding tricks (binary, dense UTF-8) and layout were all token-neutral-or-worse —");
66    println!("  vocabulary is the one that pays. The discipline: name primitives as single BPE tokens,");
67    println!("  make them total, and choose them by the empirical frequency of SWE intents.");
68}

examples/rain_tokens.rs (line 88)

87fn main() {
88    let exact = Model::OpenAiGpt4.is_exact();
89    println!("=== MechGen 'digital rain' vs token streams (cl100k + o200k BPE) ===");
90    println!(
91        "tokenizer: {}\n",
92        if exact { "REAL tiktoken (exact)" } else { "HEURISTIC (CJK undercounted — rerun with --features real-tokens)" }
93    );
94
95    let samples: &[(&str, &str)] = &[
96        ("net", "net MLP { layer fc1: Linear(8, 16); layer act: ReLU; layer fc2: Linear(16, 4); forward { fc1 } }"),
97        ("fn", "fn factorial(n: u64) -> u64 { if n <= 1 { return 1; } n * factorial(n - 1) }"),
98        ("kb", "kb Family { fact parent(alice, bob); fact parent(bob, carol); rule gp(x: i32, z: i32) where parent(x, y), parent(y, z) { x } }"),
99    ];
100
101    println!("{:<5} {:>10} {:>22} {:>22} {:>14}", "kind", "form", "cl100k tok", "o200k tok", "chars");
102    let cl = Model::OpenAiGpt4;
103    let o2 = Model::OpenAiGpt4o;
104    let (mut s_cl, mut s_o, mut r_cl, mut r_o, mut rl_cl, mut rl_o) = (0, 0, 0, 0, 0, 0);
105    for (name, src) in samples {
106        let (stream, legend) = rain(src);
107        let rl = format!("{legend}\n{stream}"); // rain + legend (one-off, reversible)
108        let bytes = src.as_bytes();
109        let base = b64(bytes);
110        let row = |label: &str, s: &str| {
111            println!("{:<5} {:>10} {:>22} {:>22} {:>14}", "", label, cl.count(s), o2.count(s), chars(s));
112        };
113        println!("[{name}]");
114        row("source", src);
115        row("rain(stream)", &stream);
116        row("rain+legend", &rl);
117        row("base64(bytes)", &base);
118        s_cl += cl.count(src); s_o += o2.count(src);
119        r_cl += cl.count(&stream); r_o += o2.count(&stream);
120        rl_cl += cl.count(&rl); rl_o += o2.count(&rl);
121    }
122
123    println!("\nTOTALS (3 samples) — token ratio vs source (>1.0 = rain is WORSE):");
124    println!("  source            cl100k {s_cl:>4}   o200k {s_o:>4}");
125    println!("  rain(stream)      cl100k {r_cl:>4} ({:.2}x)   o200k {r_o:>4} ({:.2}x)", r_cl as f64 / s_cl as f64, r_o as f64 / s_o as f64);
126    println!("  rain+legend       cl100k {rl_cl:>4} ({:.2}x)   o200k {rl_o:>4} ({:.2}x)", rl_cl as f64 / s_cl as f64, rl_o as f64 / s_o as f64);
127
128    println!("\nVERDICT");
129    println!("  Digital rain shrinks CHARACTERS (~3x — the Matrix look) but the dense glyph");
130    println!("  stream costs MORE BPE tokens than the ASCII source (ratios above), because the");
131    println!("  tokenizer splits each rare multi-byte glyph into several tokens. Adding the legend");
132    println!("  (needed for reversibility) makes a one-off snippet far worse still. Even an");
133    println!("  amortized shared codebook (stream only) does not beat source on tokens.");
134    println!("  This is the project's token-floor finding, re-confirmed on the dense-symbol idea:");
135    println!("  an LLM emits TOKENS, not glyphs/bytes — the information (names/ops/dims) is the floor.");
136    if !exact {
137        println!("\n  (heuristic run: it counts ~1 token/char and UNDER-counts CJK — the real cl100k/o200k");
138        println!("   gap is larger. Rerun with --features real-tokens for the exact, even-worse numbers.)");
139    }
140}

examples/design_tokens.rs (line 24)

18fn main() {
19    let cl = Model::OpenAiGpt4;
20    let o2 = Model::OpenAiGpt4o;
21    println!("=== Token-efficiency design levers (real cl100k + o200k BPE) ===");
22    println!(
23        "tokenizer: {}\n",
24        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
25    );
26
27    // Each task: (name, ceremony-heavy, current-ish, ab-initio).
28    let tasks: &[(&str, &str, &str, &str)] = &[
29        (
30            "word-count",
31            // A — ceremony-heavy
32            "use std::collections::HashMap;\n\nfn count_words(text: &str) -> HashMap<String, u32> {\n    let mut counts: HashMap<String, u32> = HashMap::new();\n    for word in text.split_whitespace() {\n        *counts.entry(word.to_string()).or_insert(0) += 1;\n    }\n    counts\n}",
33            // B — current-MechGen-ish (sigils, var, some inference)
34            "fn count_words(text: &str) -> {s: u32} {\n    var counts = {s: u32}.new()\n    for word in text.split() {\n        counts.entry(word).or(0) += 1\n    }\n    counts\n}",
35            // C — ab-initio (inference + layout + ambient builtins)
36            "count_words text =\n  counts = {}\n  for w in split text\n    counts[w] += 1\n  counts",
37        ),
38        (
39            "factorial",
40            "fn factorial(n: u64) -> u64 {\n    if n <= 1 {\n        return 1;\n    }\n    n * factorial(n - 1)\n}",
41            "fn factorial(n: u64) -> u64 {\n    if n <= 1 { 1 } else { n * factorial(n - 1) }\n}",
42            "fact n =\n  if n <= 1: 1\n  else: n * fact (n - 1)",
43        ),
44        (
45            "safe-divide", // returns optional/result — safety ceremony vs sigil
46            "fn safe_div(a: i32, b: i32) -> Option<i32> {\n    if b == 0 {\n        return None;\n    }\n    Some(a / b)\n}",
47            "fn safe_div(a: i32, b: i32) -> ?i32 {\n    if b == 0 { none } else { a / b }\n}",
48            "div a b =\n  if b == 0: none\n  else: a / b",
49        ),
50    ];
51
52    println!("{:<13} {:>4} {:>9} {:>8} {:>7}", "task", "form", "cl100k", "o200k", "chars");
53    let (mut a_cl, mut b_cl, mut c_cl) = (0, 0, 0);
54    let (mut a_o, mut b_o, mut c_o) = (0, 0, 0);
55    for (name, a, b, c) in tasks {
56        let row = |label: &str, s: &str| {
57            println!("{:<13} {:>4} {:>9} {:>8} {:>7}", "", label, cl.count(s), o2.count(s), s.chars().count());
58        };
59        println!("[{name}]");
60        row("A heavy", a);
61        row("B curr", b);
62        row("C abinit", c);
63        a_cl += cl.count(a); b_cl += cl.count(b); c_cl += cl.count(c);
64        a_o += o2.count(a); b_o += o2.count(b); c_o += o2.count(c);
65    }
66
67    println!("\nTOTALS (3 tasks):");
68    println!("  A ceremony-heavy   cl100k {a_cl:>3}   o200k {a_o:>3}   (baseline)");
69    println!("  B current-ish      cl100k {b_cl:>3} ({:.0}%)   o200k {b_o:>3} ({:.0}%)", 100.0 * b_cl as f64 / a_cl as f64, 100.0 * b_o as f64 / a_o as f64);
70    println!("  C ab-initio        cl100k {c_cl:>3} ({:.0}%)   o200k {c_o:>3} ({:.0}%)", 100.0 * c_cl as f64 / a_cl as f64, 100.0 * c_o as f64 / a_o as f64);
71    println!("\n  → ab-initio cuts ~{:.0}% of cl100k tokens vs ceremony-heavy by REMOVING ceremony",
72        100.0 * (1.0 - c_cl as f64 / a_cl as f64));
73    println!("    (types/mutability/return/imports inferred; layout replaces braces+`;`; terse safety");
74    println!("    sigils; ambient builtins). The remaining tokens are the irreducible payload —");
75    println!("    names/ops/literals — which no design can remove. That residue IS the token floor.");
76}

Additional examples can be found in:

pub fn count(self, text: &str) -> usize

Count the tokens in text under this model.

Examples found in repository ?

examples/tokens_of.rs (line 22)

11fn main() {
12    let cl = Model::OpenAiGpt4;
13    let o2 = Model::OpenAiGpt4o;
14    println!(
15        "tokenizer exact: cl100k={} o200k={}",
16        cl.is_exact(),
17        o2.is_exact()
18    );
19    println!("{:>7} {:>7}   file", "cl100k", "o200k");
20    for path in std::env::args().skip(1) {
21        match fs::read_to_string(&path) {
22            Ok(s) => println!("{:>7} {:>7}   {}", cl.count(&s), o2.count(&s), path),
23            Err(e) => println!("    ERR     ERR   {path}: {e}"),
24        }
25    }
26}

More examples

Hide additional examples

examples/vocabulary_audit.rs (line 33)

18fn main() {
19    let cl = Model::OpenAiGpt4;
20    let o2 = Model::OpenAiGpt4o;
21    println!("=== Standard-vocabulary tokenizer audit (§8b) ===");
22    println!(
23        "tokenizer: {}   names: {}\n",
24        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" },
25        VOCAB.len()
26    );
27
28    // Agents emit a name with a leading space; BPE is space-aware.
29    let mut single = 0usize;
30    let mut offenders: Vec<(&str, usize, usize)> = Vec::new();
31    for &name in VOCAB {
32        let ctx = format!(" {name}");
33        let (c, o) = (cl.count(&ctx), o2.count(&ctx));
34        if c <= 1 && o <= 1 {
35            single += 1;
36        } else {
37            offenders.push((name, c, o));
38        }
39    }
40
41    println!("SINGLE BPE TOKEN (both tokenizers): {single}/{}", VOCAB.len());
42    if offenders.is_empty() {
43        println!("  ✓ every vocabulary name is a single token — the §8b discipline holds.");
44    } else {
45        println!("\nOFFENDERS (rename or drop — a multi-token name negates the saving):");
46        for (n, c, o) in &offenders {
47            println!("  {n:<12} cl100k {c}  o200k {o}");
48        }
49    }
50
51    println!("\nWHY IT MATTERS");
52    println!("  The vocabulary's win is naming an intent in ~1 token. A 2-token name (e.g.");
53    println!("  `frequencies` = 'frequ'+'encies') halves that. Picking `freq`/`map`/`fold` over");
54    println!("  `frequencies`/`transform`/`accumulate` is tokenizer co-design, audited here.");
55}

examples/keyword_audit.rs (line 42)

25fn main() {
26    let cl = Model::OpenAiGpt4;
27    let o2 = Model::OpenAiGpt4o;
28    println!("=== MechGen keyword tokenizer audit (migration step 4) ===");
29    println!(
30        "tokenizer: {}   keywords: {}\n",
31        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" },
32        KEYWORDS.len()
33    );
34
35    // A keyword usually appears with a leading space in code; BPE is space-aware,
36    // so " return" can differ from "return". Audit the in-context form (leading
37    // space) — that is what an agent actually emits.
38    let mut offenders: Vec<(&str, usize, usize)> = Vec::new();
39    let mut single = 0usize;
40    for &kw in KEYWORDS {
41        let ctx = format!(" {kw}");
42        let c = cl.count(&ctx);
43        let o = o2.count(&ctx);
44        if c <= 1 && o <= 1 {
45            single += 1;
46        } else {
47            offenders.push((kw, c, o));
48        }
49    }
50
51    println!("SINGLE BPE TOKEN (both tokenizers): {single}/{}", KEYWORDS.len());
52    println!("\nOFFENDERS (>1 token in cl100k or o200k):");
53    if offenders.is_empty() {
54        println!("  (none)");
55    } else {
56        offenders.sort_by(|a, b| b.1.cmp(&a.1));
57        for (kw, c, o) in &offenders {
58            println!("  {kw:<20} cl100k {c}  o200k {o}");
59        }
60    }
61
62    println!("\nVERDICT");
63    println!(
64        "  {}/{} keywords are already single-token (the agent-mode single/double-char forms",
65        single, KEYWORDS.len()
66    );
67    println!("  f/m/v/u/… and common words if/for/match/… cost exactly one token).");
68    if !offenders.is_empty() {
69        println!("  The {} offenders are compound/rare words (snake_case splits on `_`); each should", offenders.len());
70        println!("  get a single-token agent-mode alias. They are specialized (swarm combinators,");
71        println!("  grammar extension) — rare in practice, so the realized token cost is small, but");
72        println!("  the surface is not yet uniformly single-token. This is the concrete step-4 work-list.");
73    }
74}

examples/abstraction_tokens.rs (line 52)

16fn main() {
17    let cl = Model::OpenAiGpt4;
18    let o2 = Model::OpenAiGpt4o;
19    println!("=== Abstraction as the post-floor token lever (real BPE) ===");
20    println!(
21        "tokenizer: {}\n",
22        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
23    );
24
25    // (intent, hand-rolled [compiles today], with-vocabulary [proposed primitive])
26    let cases: &[(&str, &str, &str)] = &[
27        (
28            "sum a list",
29            "f sum(xs)\n  var t = 0\n  for x in xs\n    t = t + x\n  t",
30            "f sum(xs)\n  fold(xs, 0, +)",
31        ),
32        (
33            "word frequencies",
34            "f wc(ws)\n  var m = {}\n  for w in ws\n    m[w] = m[w] + 1\n  m",
35            "f wc(ws)\n  freq(ws)",
36        ),
37        (
38            "evens, doubled",
39            "f f(xs)\n  var out = []\n  for x in xs\n    if x % 2 == 0\n      out.push(x * 2)\n  out",
40            "f f(xs)\n  xs | filter even | map double",
41        ),
42        (
43            "max of a list",
44            "f max(xs)\n  var m = xs[0]\n  for x in xs\n    if x > m\n      m = x\n  m",
45            "f max(xs)\n  reduce(xs, max)",
46        ),
47    ];
48
49    println!("{:<18} {:>9} {:>9} {:>7}", "intent", "handrolled", "vocab", "saved");
50    let (mut h_cl, mut v_cl, mut h_o, mut v_o) = (0, 0, 0, 0);
51    for (name, hand, vocab) in cases {
52        let (h, v) = (cl.count(hand), cl.count(vocab));
53        println!("{name:<18} {h:>9} {v:>9} {:>6}%", 100 - 100 * v / h);
54        h_cl += h; v_cl += v;
55        h_o += o2.count(hand); v_o += o2.count(vocab);
56    }
57    println!("\nTOTAL  cl100k {h_cl} → {v_cl} ({}% saved)   o200k {h_o} → {v_o} ({}% saved)",
58        100 - 100 * v_cl / h_cl, 100 - 100 * v_o / h_o);
59
60    println!("\nFINDING");
61    println!("  At the surface floor, abstraction is the only per-call token lever left, and it is");
62    println!("  POSITIVE-SUM: a single-token, total, capability-typed primitive (a) cuts payload");
63    println!("  tokens (above), (b) RAISES reliability (no hand-rolled off-by-one / empty-list bug),");
64    println!("  and (c) preserves safety (the primitive's effect rides its type to the boundary).");
65    println!("  Encoding tricks (binary, dense UTF-8) and layout were all token-neutral-or-worse —");
66    println!("  vocabulary is the one that pays. The discipline: name primitives as single BPE tokens,");
67    println!("  make them total, and choose them by the empirical frequency of SWE intents.");
68}

examples/rain_tokens.rs (line 111)

87fn main() {
88    let exact = Model::OpenAiGpt4.is_exact();
89    println!("=== MechGen 'digital rain' vs token streams (cl100k + o200k BPE) ===");
90    println!(
91        "tokenizer: {}\n",
92        if exact { "REAL tiktoken (exact)" } else { "HEURISTIC (CJK undercounted — rerun with --features real-tokens)" }
93    );
94
95    let samples: &[(&str, &str)] = &[
96        ("net", "net MLP { layer fc1: Linear(8, 16); layer act: ReLU; layer fc2: Linear(16, 4); forward { fc1 } }"),
97        ("fn", "fn factorial(n: u64) -> u64 { if n <= 1 { return 1; } n * factorial(n - 1) }"),
98        ("kb", "kb Family { fact parent(alice, bob); fact parent(bob, carol); rule gp(x: i32, z: i32) where parent(x, y), parent(y, z) { x } }"),
99    ];
100
101    println!("{:<5} {:>10} {:>22} {:>22} {:>14}", "kind", "form", "cl100k tok", "o200k tok", "chars");
102    let cl = Model::OpenAiGpt4;
103    let o2 = Model::OpenAiGpt4o;
104    let (mut s_cl, mut s_o, mut r_cl, mut r_o, mut rl_cl, mut rl_o) = (0, 0, 0, 0, 0, 0);
105    for (name, src) in samples {
106        let (stream, legend) = rain(src);
107        let rl = format!("{legend}\n{stream}"); // rain + legend (one-off, reversible)
108        let bytes = src.as_bytes();
109        let base = b64(bytes);
110        let row = |label: &str, s: &str| {
111            println!("{:<5} {:>10} {:>22} {:>22} {:>14}", "", label, cl.count(s), o2.count(s), chars(s));
112        };
113        println!("[{name}]");
114        row("source", src);
115        row("rain(stream)", &stream);
116        row("rain+legend", &rl);
117        row("base64(bytes)", &base);
118        s_cl += cl.count(src); s_o += o2.count(src);
119        r_cl += cl.count(&stream); r_o += o2.count(&stream);
120        rl_cl += cl.count(&rl); rl_o += o2.count(&rl);
121    }
122
123    println!("\nTOTALS (3 samples) — token ratio vs source (>1.0 = rain is WORSE):");
124    println!("  source            cl100k {s_cl:>4}   o200k {s_o:>4}");
125    println!("  rain(stream)      cl100k {r_cl:>4} ({:.2}x)   o200k {r_o:>4} ({:.2}x)", r_cl as f64 / s_cl as f64, r_o as f64 / s_o as f64);
126    println!("  rain+legend       cl100k {rl_cl:>4} ({:.2}x)   o200k {rl_o:>4} ({:.2}x)", rl_cl as f64 / s_cl as f64, rl_o as f64 / s_o as f64);
127
128    println!("\nVERDICT");
129    println!("  Digital rain shrinks CHARACTERS (~3x — the Matrix look) but the dense glyph");
130    println!("  stream costs MORE BPE tokens than the ASCII source (ratios above), because the");
131    println!("  tokenizer splits each rare multi-byte glyph into several tokens. Adding the legend");
132    println!("  (needed for reversibility) makes a one-off snippet far worse still. Even an");
133    println!("  amortized shared codebook (stream only) does not beat source on tokens.");
134    println!("  This is the project's token-floor finding, re-confirmed on the dense-symbol idea:");
135    println!("  an LLM emits TOKENS, not glyphs/bytes — the information (names/ops/dims) is the floor.");
136    if !exact {
137        println!("\n  (heuristic run: it counts ~1 token/char and UNDER-counts CJK — the real cl100k/o200k");
138        println!("   gap is larger. Rerun with --features real-tokens for the exact, even-worse numbers.)");
139    }
140}

examples/design_tokens.rs (line 57)

18fn main() {
19    let cl = Model::OpenAiGpt4;
20    let o2 = Model::OpenAiGpt4o;
21    println!("=== Token-efficiency design levers (real cl100k + o200k BPE) ===");
22    println!(
23        "tokenizer: {}\n",
24        if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
25    );
26
27    // Each task: (name, ceremony-heavy, current-ish, ab-initio).
28    let tasks: &[(&str, &str, &str, &str)] = &[
29        (
30            "word-count",
31            // A — ceremony-heavy
32            "use std::collections::HashMap;\n\nfn count_words(text: &str) -> HashMap<String, u32> {\n    let mut counts: HashMap<String, u32> = HashMap::new();\n    for word in text.split_whitespace() {\n        *counts.entry(word.to_string()).or_insert(0) += 1;\n    }\n    counts\n}",
33            // B — current-MechGen-ish (sigils, var, some inference)
34            "fn count_words(text: &str) -> {s: u32} {\n    var counts = {s: u32}.new()\n    for word in text.split() {\n        counts.entry(word).or(0) += 1\n    }\n    counts\n}",
35            // C — ab-initio (inference + layout + ambient builtins)
36            "count_words text =\n  counts = {}\n  for w in split text\n    counts[w] += 1\n  counts",
37        ),
38        (
39            "factorial",
40            "fn factorial(n: u64) -> u64 {\n    if n <= 1 {\n        return 1;\n    }\n    n * factorial(n - 1)\n}",
41            "fn factorial(n: u64) -> u64 {\n    if n <= 1 { 1 } else { n * factorial(n - 1) }\n}",
42            "fact n =\n  if n <= 1: 1\n  else: n * fact (n - 1)",
43        ),
44        (
45            "safe-divide", // returns optional/result — safety ceremony vs sigil
46            "fn safe_div(a: i32, b: i32) -> Option<i32> {\n    if b == 0 {\n        return None;\n    }\n    Some(a / b)\n}",
47            "fn safe_div(a: i32, b: i32) -> ?i32 {\n    if b == 0 { none } else { a / b }\n}",
48            "div a b =\n  if b == 0: none\n  else: a / b",
49        ),
50    ];
51
52    println!("{:<13} {:>4} {:>9} {:>8} {:>7}", "task", "form", "cl100k", "o200k", "chars");
53    let (mut a_cl, mut b_cl, mut c_cl) = (0, 0, 0);
54    let (mut a_o, mut b_o, mut c_o) = (0, 0, 0);
55    for (name, a, b, c) in tasks {
56        let row = |label: &str, s: &str| {
57            println!("{:<13} {:>4} {:>9} {:>8} {:>7}", "", label, cl.count(s), o2.count(s), s.chars().count());
58        };
59        println!("[{name}]");
60        row("A heavy", a);
61        row("B curr", b);
62        row("C abinit", c);
63        a_cl += cl.count(a); b_cl += cl.count(b); c_cl += cl.count(c);
64        a_o += o2.count(a); b_o += o2.count(b); c_o += o2.count(c);
65    }
66
67    println!("\nTOTALS (3 tasks):");
68    println!("  A ceremony-heavy   cl100k {a_cl:>3}   o200k {a_o:>3}   (baseline)");
69    println!("  B current-ish      cl100k {b_cl:>3} ({:.0}%)   o200k {b_o:>3} ({:.0}%)", 100.0 * b_cl as f64 / a_cl as f64, 100.0 * b_o as f64 / a_o as f64);
70    println!("  C ab-initio        cl100k {c_cl:>3} ({:.0}%)   o200k {c_o:>3} ({:.0}%)", 100.0 * c_cl as f64 / a_cl as f64, 100.0 * c_o as f64 / a_o as f64);
71    println!("\n  → ab-initio cuts ~{:.0}% of cl100k tokens vs ceremony-heavy by REMOVING ceremony",
72        100.0 * (1.0 - c_cl as f64 / a_cl as f64));
73    println!("    (types/mutability/return/imports inferred; layout replaces braces+`;`; terse safety");
74    println!("    sigils; ambient builtins). The remaining tokens are the irreducible payload —");
75    println!("    names/ops/literals — which no design can remove. That residue IS the token floor.");
76}

Additional examples can be found in:

Trait Implementations§

impl Clone for Model

fn clone(&self) -> Model

Returns a duplicate of the value. Read more

1.0.0 (const: unstable) · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

impl Copy for Model

impl Debug for Model

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

impl Eq for Model

impl PartialEq for Model

fn eq(&self, other: &Model) -> bool

Tests for self and other values to be equal, and is used by ==.

1.0.0 (const: unstable) · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.

impl Serialize for Model

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

Serialize this value into the given Serde serializer. Read more

impl StructuralPartialEq for Model

Auto Trait Implementations§

impl Freeze for Model

impl RefUnwindSafe for Model

impl Send for Model

impl Sync for Model

impl Unpin for Model

impl UnsafeUnpin for Model

impl UnwindSafe for Model

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

impl<T> CloneToUninit for T
where T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

impl<T> From<T> for T

fn from(t: T) -> T

Returns the argument unchanged.

impl<T, U> Into<U> for T
where U: From<T>,

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

impl<T> ToOwned for T
where T: Clone,

type Owned = T

The resulting type after obtaining ownership.

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

impl<T, U> TryFrom<U> for T
where U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.