use aethershell::builtins::{est_token_count, render_canonical};
use aethershell::env::Env;
use aethershell::eval::eval_program;
use aethershell::parser::parse_program;
use aethershell::safety::{self, SafetyError};
use aethershell::transpile::agentic::describe_ontology;
use aethershell::value::Value;
use agentic_eval::determinism::assess_determinism;
use agentic_eval::reliability::{assess_reliability, Outcome};
use agentic_eval::safety::{assess_safety, Effect, Mode};
use agentic_eval::tokens::AgentCost;
fn eval_to_value(code: &str) -> anyhow::Result<Value> {
let stmts = parse_program(code)?;
let mut env = Env::new();
eval_program(&stmts, &mut env)
}
fn main() {
println!("AetherShell × agentic-eval — four-axis self-evaluation (real engine)\n");
let legible = r#"ls("./src") | where(fn(f) => f.size > 1000) | map(fn(f) => f.name)"#;
let cipher = r#"l./src|w~.size>1k|m~.name"#;
let legible_cost = AgentCost {
standing_context: est_token_count("ls/where/map are standard high-probability names"),
input: est_token_count(legible),
output: 0,
retries: 0,
};
let cipher_cost = AgentCost {
standing_context: est_token_count(&describe_ontology()),
input: est_token_count(cipher),
output: 0,
retries: 1, };
let turns = 30;
let exact = cfg!(feature = "real-tokens");
println!(
"[1] Token efficiency — AetherShell est_token_count ({}, {} turns):",
if exact {
"EXACT cl100k BPE"
} else {
"heuristic"
},
turns
);
println!(
" legible: input={:>4} standing={:>5} session-total={:>6}",
legible_cost.input,
legible_cost.standing_context,
legible_cost.total_over(turns)
);
println!(
" cipher : input={:>4} standing={:>5} session-total={:>6}",
cipher_cost.input,
cipher_cost.standing_context,
cipher_cost.total_over(turns)
);
let winner = if legible_cost.total_over(turns) <= cipher_cost.total_over(turns) {
"legible"
} else {
"cipher"
};
println!(" → {winner} wins over a session (standing-context tax dominates the input edge)\n");
let det = assess_determinism(8, || {
let v = eval_to_value(r#"{ b: 2.0, a: 1, items: [3, 1, 2] }"#).expect("eval");
render_canonical(&v).unwrap_or_default()
});
println!(
"[2] Determinism — canonical render: deterministic={} ({} distinct / {} runs)",
det.deterministic, det.distinct, det.runs
);
println!(" byte-stable sample: {}\n", det.first);
let programs = [
r#"len([1, 2, 3])"#, r#"upper("hi")"#, r#"[1, 2, 3] | map(fn(x) => x + 1)"#, r#"env(123)"#, r#"((("#, ];
let rel = assess_reliability(&programs, |code| match eval_to_value(code) {
Ok(_) => Outcome::ok(),
Err(e) if e.downcast_ref::<SafetyError>().is_some() => Outcome::structured_failure(),
Err(_) => Outcome::opaque_failure(),
});
println!(
"[3] Reliability — {} programs: pass {:.0}% actionable {:.0}% (structured failures: {})",
rel.total,
rel.pass_rate * 100.0,
rel.actionable_rate * 100.0,
rel.structured_failures
);
println!();
let builtins = [
"len",
"file_read",
"file_write",
"http_get",
"proc_kill",
"rm",
"sh",
];
let effects: Vec<Effect> = builtins
.iter()
.filter_map(|b| Effect::from_name(safety::effect_of(b).as_str()))
.collect();
let saf = assess_safety(&effects, Mode::Agent);
println!(
"[4] Safety — {} representative builtins under the agent policy:",
builtins.len()
);
for (b, e) in builtins.iter().zip(&effects) {
println!(" {:<11} {}", b, e.name());
}
println!(
" grade {} bounded={} (allowed={} approval-gated={} denied={})",
saf.grade, saf.bounded, saf.allowed, saf.approval_gated, saf.denied
);
let eval = agentic_eval::Evaluation::new("AetherShell (legible .ae surface)")
.with_tokens(legible_cost)
.with_determinism(det)
.with_reliability(rel)
.with_safety(saf);
println!("\n=== Combined evaluation ===\n{eval}");
println!("\nResult: AetherShell renders deterministically, surfaces wrong-typed arguments");
println!("as structured (actionable) errors, and bounds the blast radius of its dangerous");
println!(
"builtins under the agent policy — while the legible surface stays token-competitive."
);
}