design_tokens/
design_tokens.rs1use agentic_eval::tokens::Model;
17
18fn main() {
19 let cl = Model::OpenAiGpt4;
20 let o2 = Model::OpenAiGpt4o;
21 println!("=== Token-efficiency design levers (real cl100k + o200k BPE) ===");
22 println!(
23 "tokenizer: {}\n",
24 if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
25 );
26
27 let tasks: &[(&str, &str, &str, &str)] = &[
29 (
30 "word-count",
31 "use std::collections::HashMap;\n\nfn count_words(text: &str) -> HashMap<String, u32> {\n let mut counts: HashMap<String, u32> = HashMap::new();\n for word in text.split_whitespace() {\n *counts.entry(word.to_string()).or_insert(0) += 1;\n }\n counts\n}",
33 "fn count_words(text: &str) -> {s: u32} {\n var counts = {s: u32}.new()\n for word in text.split() {\n counts.entry(word).or(0) += 1\n }\n counts\n}",
35 "count_words text =\n counts = {}\n for w in split text\n counts[w] += 1\n counts",
37 ),
38 (
39 "factorial",
40 "fn factorial(n: u64) -> u64 {\n if n <= 1 {\n return 1;\n }\n n * factorial(n - 1)\n}",
41 "fn factorial(n: u64) -> u64 {\n if n <= 1 { 1 } else { n * factorial(n - 1) }\n}",
42 "fact n =\n if n <= 1: 1\n else: n * fact (n - 1)",
43 ),
44 (
45 "safe-divide", "fn safe_div(a: i32, b: i32) -> Option<i32> {\n if b == 0 {\n return None;\n }\n Some(a / b)\n}",
47 "fn safe_div(a: i32, b: i32) -> ?i32 {\n if b == 0 { none } else { a / b }\n}",
48 "div a b =\n if b == 0: none\n else: a / b",
49 ),
50 ];
51
52 println!("{:<13} {:>4} {:>9} {:>8} {:>7}", "task", "form", "cl100k", "o200k", "chars");
53 let (mut a_cl, mut b_cl, mut c_cl) = (0, 0, 0);
54 let (mut a_o, mut b_o, mut c_o) = (0, 0, 0);
55 for (name, a, b, c) in tasks {
56 let row = |label: &str, s: &str| {
57 println!("{:<13} {:>4} {:>9} {:>8} {:>7}", "", label, cl.count(s), o2.count(s), s.chars().count());
58 };
59 println!("[{name}]");
60 row("A heavy", a);
61 row("B curr", b);
62 row("C abinit", c);
63 a_cl += cl.count(a); b_cl += cl.count(b); c_cl += cl.count(c);
64 a_o += o2.count(a); b_o += o2.count(b); c_o += o2.count(c);
65 }
66
67 println!("\nTOTALS (3 tasks):");
68 println!(" A ceremony-heavy cl100k {a_cl:>3} o200k {a_o:>3} (baseline)");
69 println!(" B current-ish cl100k {b_cl:>3} ({:.0}%) o200k {b_o:>3} ({:.0}%)", 100.0 * b_cl as f64 / a_cl as f64, 100.0 * b_o as f64 / a_o as f64);
70 println!(" C ab-initio cl100k {c_cl:>3} ({:.0}%) o200k {c_o:>3} ({:.0}%)", 100.0 * c_cl as f64 / a_cl as f64, 100.0 * c_o as f64 / a_o as f64);
71 println!("\n → ab-initio cuts ~{:.0}% of cl100k tokens vs ceremony-heavy by REMOVING ceremony",
72 100.0 * (1.0 - c_cl as f64 / a_cl as f64));
73 println!(" (types/mutability/return/imports inferred; layout replaces braces+`;`; terse safety");
74 println!(" sigils; ambient builtins). The remaining tokens are the irreducible payload —");
75 println!(" names/ops/literals — which no design can remove. That residue IS the token floor.");
76}