use aethershell::parser::parse_program;
use aethershell::transpile::agentic::{describe_ontology, transpile_agentic_to_ae};
struct Task {
name: &'static str,
legible: &'static str,
cipher: &'static str,
}
const CORPUS: &[Task] = &[
Task {
name: "list+filter+project",
legible: r#"ls("./src") | where(fn(f) => f.size > 1000) | map(fn(f) => f.name)"#,
cipher: r#"l./src|w~.size>1k|m~.name"#,
},
Task {
name: "read file",
legible: r#"file.read("README.md")"#,
cipher: r#"F.r"README.md""#,
},
Task {
name: "http+json+select",
legible: r#"http.get("https://api.github.com/repos/nervosys/AetherShell") | json.parse(_) | select("stargazers_count")"#,
cipher: r#"H.g"https://api.github.com/repos/nervosys/AetherShell"|J.p(_)|s"stargazers_count""#,
},
Task {
name: "map+filter+reduce",
legible: r#"[1,2,3,4,5] | map(fn(x) => x * 2) | where(fn(x) => x > 4) | reduce(fn(a,b) => a + b, 0)"#,
cipher: r#"[1,2,3,4,5]|m~x:x*2|w~x:x>4|r~a,b:a+b,0"#,
},
Task {
name: "sys host echo",
legible: r#"echo("Running on ${sys.hostname()}")"#,
cipher: r#"e"Running on ${S.h()}""#,
},
Task {
name: "docker ps names",
legible: r#"docker.ps() | map(fn(c) => c.name)"#,
cipher: r#"DK.p()|m~.name"#,
},
Task {
name: "match status",
legible: r#"match status { 200 => "ok", _ => "err" }"#,
cipher: r#"?status{200=>"ok",_=>"err"}"#,
},
Task {
name: "try/catch fallback",
legible: r#"try { http.get(url) } catch e { "fallback" }"#,
cipher: r#"!{H.g(url)}{"fallback"}"#,
},
Task {
name: "grep+head",
legible: r#"grep("*.rs") | head(10)"#,
cipher: r#"g*.rs|h10"#,
},
Task {
name: "for-each",
legible: r#"([1,2,3]) | each(fn(x) => echo(x))"#,
cipher: r#"*[1,2,3]~x:echo(x)"#,
},
Task {
name: "ls+map+head",
legible: r#"ls(".") | map(fn(f) => f.name) | head(5)"#,
cipher: r#"l.|m~.name|h5"#,
},
Task {
name: "list+filter",
legible: r#"ls("/tmp") | where(fn(f) => f.size > 0)"#,
cipher: r#"l/tmp|w~.size>0"#,
},
Task {
name: "map double",
legible: r#"[1,2,3] | map(fn(x) => x * 2)"#,
cipher: r#"[1,2,3]|m~x:x*2"#,
},
];
const SESSION_TURNS: usize = 30;
use aethershell::builtins::est_token_count as est_tokens;
#[cfg(feature = "real-tokens")]
fn est_tokens_o200k(s: &str) -> usize {
use std::sync::OnceLock;
static BPE: OnceLock<tiktoken_rs::CoreBPE> = OnceLock::new();
let bpe = BPE.get_or_init(|| tiktoken_rs::o200k_base().expect("load o200k_base"));
bpe.encode_with_special_tokens(s).len()
}
#[cfg(not(feature = "real-tokens"))]
fn est_tokens_o200k(s: &str) -> usize {
est_tokens(s)
}
fn cipher_ok(src: &str) -> bool {
match transpile_agentic_to_ae(src) {
Ok(ae) => parse_program(&ae).is_ok(),
Err(_) => false,
}
}
fn legible_ok(src: &str) -> bool {
parse_program(src).is_ok()
}
fn main() {
println!("AetherShell Phase-1 Token Benchmark — cipher vs legible\n");
println!(
"{:<22} {:>6} {:>6} {:>7} {:>6} {:>6} {:>7} {:>5} {:>5}",
"task", "c.chr", "l.chr", "chr-sav", "c.tok", "l.tok", "tok-sav", "c.ok", "l.ok"
);
println!("{}", "-".repeat(92));
let (mut tc_chr, mut tl_chr, mut tc_tok, mut tl_tok) = (0usize, 0usize, 0usize, 0usize);
let (mut c_fail, mut l_fail) = (0usize, 0usize);
for t in CORPUS {
let (cc, lc) = (t.cipher.chars().count(), t.legible.chars().count());
let (ct, lt) = (est_tokens(t.cipher), est_tokens(t.legible));
let chr_sav = pct(cc, lc);
let tok_sav = pct(ct, lt);
let cok = cipher_ok(t.cipher);
let lok = legible_ok(t.legible);
if !cok {
c_fail += 1;
}
if !lok {
l_fail += 1;
}
tc_chr += cc;
tl_chr += lc;
tc_tok += ct;
tl_tok += lt;
println!(
"{:<22} {:>6} {:>6} {:>6.0}% {:>6} {:>6} {:>6.0}% {:>5} {:>5}",
t.name,
cc,
lc,
chr_sav,
ct,
lt,
tok_sav,
yn(cok),
yn(lok)
);
}
println!("{}", "-".repeat(92));
println!(
"{:<22} {:>6} {:>6} {:>6.0}% {:>6} {:>6} {:>6.0}% {:>5} {:>5}",
"TOTAL (input only)",
tc_chr,
tl_chr,
pct(tc_chr, tl_chr),
tc_tok,
tl_tok,
pct(tc_tok, tl_tok),
format!("{}f", c_fail),
format!("{}f", l_fail),
);
let cipher_cheatsheet = describe_ontology();
let cipher_sc_tok = est_tokens(&cipher_cheatsheet);
let legible_sc_tok = 400usize;
println!("\nStanding context (re-sent each turn, est. tokens):");
println!(
" cipher : {:>7} (the describe_ontology cheatsheet an agent must carry to emit valid .aeg)",
cipher_sc_tok
);
println!(
" legible: {:>7} (short module index; names are already high-probability tokens)",
legible_sc_tok
);
let cipher_total = cipher_sc_tok + (tc_tok * SESSION_TURNS) + (tc_tok); let legible_total = legible_sc_tok + (tl_tok * SESSION_TURNS);
let _ = c_fail;
println!(
"\n§4 criterion over {} turns (standing_context + input*turns + retry proxy), est. tokens:",
SESSION_TURNS
);
println!(" cipher : {:>8}", cipher_total);
println!(" legible: {:>8}", legible_total);
let verdict = if legible_total < cipher_total {
"LEGIBLE wins: the cipher's standing-context tax dominates its small per-line input savings."
} else {
"CIPHER wins on this corpus."
};
let tokenizer = if cfg!(feature = "real-tokens") {
"real GPT-4 BPE (cl100k_base via tiktoken-rs)"
} else {
"labeled heuristic (build with --features real-tokens for real tokens)"
};
println!("\nTokenizer: {tokenizer}");
println!("Verdict: {verdict}");
{
let (mut c2_tok, mut l2_tok) = (0usize, 0usize);
for t in CORPUS {
c2_tok += est_tokens_o200k(t.cipher);
l2_tok += est_tokens_o200k(t.legible);
}
let cipher_sc2 = est_tokens_o200k(&cipher_cheatsheet);
let legible_sc2 = legible_sc_tok; let cipher_total2 = cipher_sc2 + (c2_tok * SESSION_TURNS) + c2_tok;
let legible_total2 = legible_sc2 + (l2_tok * SESSION_TURNS);
let verdict2 = if legible_total2 < cipher_total2 {
"LEGIBLE wins (verdict holds under o200k too)"
} else {
"CIPHER wins under o200k"
};
let label = if cfg!(feature = "real-tokens") {
"real GPT-4o BPE (o200k_base)"
} else {
"heuristic (same as cl100k column without --features real-tokens)"
};
println!("\nCross-tokenizer check — {label}:");
println!(
" input tokens (cipher/legible): {} / {} standing-context (cipher/legible): {} / {}",
c2_tok, l2_tok, cipher_sc2, legible_sc2
);
println!(
" §4 over {} turns (cipher/legible): {} / {}",
SESSION_TURNS, cipher_total2, legible_total2
);
println!(" Verdict: {verdict2}");
}
println!(
"\nReliability: cipher round-trip failures = {}/{}, legible parse failures = {}/{}.",
c_fail,
CORPUS.len(),
l_fail,
CORPUS.len()
);
println!(
"\nNOTE: char/standing-context/reliability numbers are EXACT. Token numbers are {}.\n\
The headline finding is structural: input is the smallest cost term, and the cipher\n\
inflates standing context by ~{}x relative to legible.",
if cfg!(feature = "real-tokens") {
"EXACT (real cl100k BPE)"
} else {
"a labeled heuristic"
},
if legible_sc_tok > 0 {
cipher_sc_tok / legible_sc_tok
} else {
0
}
);
}
fn pct(a: usize, b: usize) -> f64 {
if b == 0 {
0.0
} else {
(1.0 - (a as f64 / b as f64)) * 100.0
}
}
fn yn(b: bool) -> &'static str {
if b {
"ok"
} else {
"FAIL"
}
}