swe_token_benchmark/
swe_token_benchmark.rs1use agentic_eval::tokens::Model;
14
15fn main() {
16 let cl = Model::OpenAiGpt4;
17 let o2 = Model::OpenAiGpt4o;
18 println!("=== Agentic-SWE token benchmark — 6 languages × 3 tasks (real BPE) ===");
19 println!(
20 "tokenizer: {}\n",
21 if cl.is_exact() { "REAL tiktoken (exact)" } else { "HEURISTIC — rerun with --features real-tokens" }
22 );
23
24 let langs: &[(&str, [&str; 3])] = &[
26 (
27 "MechGen",
28 [
29 "f factorial(n)\n if n <= 1\n 1\n else\n n * factorial(n - 1)",
30 "f sum(xs)\n var t = 0\n for x in xs\n t = t + x\n t",
31 "S Point { x: f64, y: f64 }\nf dist2(p: Point)\n p.x * p.x + p.y * p.y",
32 ],
33 ),
34 (
35 "Python",
36 [
37 "def factorial(n):\n return 1 if n <= 1 else n * factorial(n - 1)",
38 "def sum_list(xs):\n t = 0\n for x in xs:\n t += x\n return t",
39 "from dataclasses import dataclass\n@dataclass\nclass Point:\n x: float\n y: float\ndef dist2(p):\n return p.x * p.x + p.y * p.y",
40 ],
41 ),
42 (
43 "Rust",
44 [
45 "fn factorial(n: u64) -> u64 {\n if n <= 1 { 1 } else { n * factorial(n - 1) }\n}",
46 "fn sum_list(xs: &[i64]) -> i64 {\n let mut t = 0;\n for x in xs {\n t += x;\n }\n t\n}",
47 "struct Point {\n x: f64,\n y: f64,\n}\nfn dist2(p: &Point) -> f64 {\n p.x * p.x + p.y * p.y\n}",
48 ],
49 ),
50 (
51 "Go",
52 [
53 "func factorial(n int) int {\n\tif n <= 1 {\n\t\treturn 1\n\t}\n\treturn n * factorial(n-1)\n}",
54 "func sumList(xs []int) int {\n\tt := 0\n\tfor _, x := range xs {\n\t\tt += x\n\t}\n\treturn t\n}",
55 "type Point struct {\n\tX, Y float64\n}\nfunc dist2(p Point) float64 {\n\treturn p.X*p.X + p.Y*p.Y\n}",
56 ],
57 ),
58 (
59 "TypeScript",
60 [
61 "function factorial(n: number): number {\n return n <= 1 ? 1 : n * factorial(n - 1);\n}",
62 "function sumList(xs: number[]): number {\n let t = 0;\n for (const x of xs) {\n t += x;\n }\n return t;\n}",
63 "interface Point {\n x: number;\n y: number;\n}\nfunction dist2(p: Point): number {\n return p.x * p.x + p.y * p.y;\n}",
64 ],
65 ),
66 (
67 "Java",
68 [
69 "static long factorial(long n) {\n return n <= 1 ? 1 : n * factorial(n - 1);\n}",
70 "static long sumList(long[] xs) {\n long t = 0;\n for (long x : xs) {\n t += x;\n }\n return t;\n}",
71 "record Point(double x, double y) {}\nstatic double dist2(Point p) {\n return p.x() * p.x() + p.y() * p.y();\n}",
72 ],
73 ),
74 ];
75
76 println!("{:<12} {:>9} {:>9} {:>9} {:>9}", "language", "factori", "sum", "point", "TOTAL cl");
77 let mut totals: Vec<(&str, usize, usize)> = Vec::new();
78 for (name, progs) in langs {
79 let c: Vec<usize> = progs.iter().map(|p| cl.count(p)).collect();
80 let o: usize = progs.iter().map(|p| o2.count(p)).sum();
81 let tot: usize = c.iter().sum();
82 println!("{name:<12} {:>9} {:>9} {:>9} {:>9}", c[0], c[1], c[2], tot);
83 totals.push((name, tot, o));
84 }
85
86 println!("\nRANK by total cl100k tokens (lower = terser):");
87 totals.sort_by_key(|t| t.1);
88 let best = totals[0].1 as f64;
89 let mg = totals.iter().find(|t| t.0 == "MechGen").unwrap().1;
90 for (i, (name, tot, o)) in totals.iter().enumerate() {
91 let mark = if *name == "MechGen" { " ← landed ab-initio surface" } else { "" };
92 println!(" {}. {name:<11} {tot:>3} cl100k {o:>3} o200k ({:.2}x){mark}", i + 1, *tot as f64 / best);
93 }
94
95 println!("\nREADING");
96 let py = totals.iter().find(|t| t.0 == "Python").unwrap().1;
97 println!(" MechGen total {mg} cl100k vs Python {py}, Rust {}, Go {}, TS {}, Java {}.",
98 totals.iter().find(|t| t.0 == "Rust").unwrap().1,
99 totals.iter().find(|t| t.0 == "Go").unwrap().1,
100 totals.iter().find(|t| t.0 == "TypeScript").unwrap().1,
101 totals.iter().find(|t| t.0 == "Java").unwrap().1);
102 println!(" Every MechGen snippet compiles (--check). The terseness is from inference +");
103 println!(" `;`-removal (real, landed), NOT layout (token-neutral) — names/ops/literals are");
104 println!(" the irreducible payload floor that bounds all of them.");
105}