use crate::commands::{commands_for, known_command_count};
use crate::safety::{Effect, Mode};
use crate::tokens::Model;
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct AxisDoc {
pub name: &'static str,
pub summary: &'static str,
pub entry_points: &'static [&'static str],
pub needs_execution: bool,
pub output_type: &'static str,
}
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct EffectDoc {
pub name: &'static str,
pub summary: &'static str,
pub dangerous: bool,
pub human_decision: &'static str,
pub agent_decision: &'static str,
pub example_commands: Vec<&'static str>,
}
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct ModelDoc {
pub name: &'static str,
pub exact: bool,
}
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct Ontology {
pub crate_name: &'static str,
pub version: &'static str,
pub summary: &'static str,
pub axes: Vec<AxisDoc>,
pub effects: Vec<EffectDoc>,
pub modes: Vec<&'static str>,
pub models: Vec<ModelDoc>,
pub known_commands: usize,
pub languages: Vec<SubjectDoc>,
pub frameworks: Vec<SubjectDoc>,
pub vms: Vec<SubjectDoc>,
pub web_stacks: Vec<SubjectDoc>,
}
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub struct SubjectDoc {
pub name: &'static str,
pub fitness: f64,
}
pub fn axes() -> Vec<AxisDoc> {
vec![
AxisDoc {
name: "tokens",
summary: "token efficiency: the four cost terms an agent pays — standing \
context, input, output, retries — amortized over a session; plus \
output scaling (per-item cost) and prompt-cache savings",
entry_points: &[
"evaluate",
"evaluate_with",
"compare",
"rank",
"rank_with",
"assess_scaling",
"assess_cache",
"cacheable_prefix_tokens",
],
needs_execution: false,
output_type: "AgentCost | ScalingReport | CacheReport",
},
AxisDoc {
name: "determinism",
summary: "whether a program's output is byte-identical across repeated runs \
(so an agent can parse, cache, and diff it)",
entry_points: &["assess_determinism", "stable_across"],
needs_execution: true,
output_type: "DeterminismReport",
},
AxisDoc {
name: "reliability",
summary: "success rate over representative invocations, whether failures are \
structured/actionable rather than dead ends, and graded error \
quality (code/message/location/fix)",
entry_points: &["assess_reliability", "assess_error_quality"],
needs_execution: true,
output_type: "ReliabilityReport | ErrorQualityReport",
},
AxisDoc {
name: "safety",
summary: "the fraction of a program's dangerous blast radius that is gated \
(approval/denied) under an agent policy; plus reversibility \
(recoverable blast radius) and data-exfiltration exposure",
entry_points: &[
"assess_safety",
"assess_safety_named",
"assess_safety_script",
"assess_reversibility",
"assess_exfiltration",
],
needs_execution: false,
output_type: "SafetyReport | ReversibilityReport | ExfiltrationReport",
},
]
}
pub fn effects() -> Vec<EffectDoc> {
Effect::all()
.into_iter()
.map(|e| EffectDoc {
name: e.name(),
summary: e.summary(),
dangerous: e.is_dangerous(),
human_decision: e.decision(Mode::Human).name(),
agent_decision: e.decision(Mode::Agent).name(),
example_commands: commands_for(e).iter().take(4).copied().collect(),
})
.collect()
}
pub fn models() -> Vec<ModelDoc> {
Model::all()
.into_iter()
.map(|m| ModelDoc {
name: m.name(),
exact: m.is_exact(),
})
.collect()
}
pub fn languages() -> Vec<SubjectDoc> {
crate::languages::Language::all()
.iter()
.map(|&l| {
let p = crate::languages::profile(l);
SubjectDoc {
name: l.name(),
fitness: p.fitness(),
}
})
.collect()
}
pub fn frameworks() -> Vec<SubjectDoc> {
crate::frameworks::Framework::all()
.iter()
.map(|&f| {
let p = crate::frameworks::profile(f);
SubjectDoc {
name: f.name(),
fitness: p.fitness(),
}
})
.collect()
}
pub fn vms() -> Vec<SubjectDoc> {
crate::vms::Vm::all()
.iter()
.map(|&v| {
let p = crate::vms::profile(v);
SubjectDoc {
name: v.name(),
fitness: p.fitness(),
}
})
.collect()
}
pub fn web_stacks() -> Vec<SubjectDoc> {
crate::web::WebStack::all()
.iter()
.map(|&w| {
let p = crate::web::profile(w);
SubjectDoc {
name: w.name(),
fitness: p.fitness(),
}
})
.collect()
}
pub fn ontology() -> Ontology {
Ontology {
crate_name: "agentic-eval",
version: VERSION,
summary: "evaluate programs, programming languages, AI frameworks, \
VM/sandbox systems, and web stacks / wire protocols for \
agentic AI use across four axes — token efficiency, \
determinism, reliability, and safety (frameworks add \
discoverability; VM systems use agent-native axes: \
start-latency, density, isolation, snapshotting, agent-control; \
web stacks use streaming, tool-discoverability, \
encoding-efficiency, interop, security-primitives)",
axes: axes(),
effects: effects(),
modes: Mode::all().iter().map(|m| m.name()).collect(),
models: models(),
known_commands: known_command_count(),
languages: languages(),
frameworks: frameworks(),
vms: vms(),
web_stacks: web_stacks(),
}
}
pub fn manifest() -> String {
let o = ontology();
let mut s = String::new();
s.push_str(&format!("{} {} — {}\n", o.crate_name, o.version, o.summary));
s.push_str("axes: ");
s.push_str(&o.axes.iter().map(|a| a.name).collect::<Vec<_>>().join(", "));
s.push_str(&format!("\neffects({}): ", o.effects.len()));
s.push_str(
&o.effects
.iter()
.map(|e| e.name)
.collect::<Vec<_>>()
.join(" "),
);
s.push_str("\nmodes: ");
s.push_str(&o.modes.join(", "));
s.push_str("\nmodels: ");
s.push_str(
&o.models
.iter()
.map(|m| m.name)
.collect::<Vec<_>>()
.join(", "),
);
s.push_str(&format!(
"\ncommands: {} classified across {} effect classes",
o.known_commands,
o.effects.len()
));
s.push_str(&format!("\nlanguages({}): ", o.languages.len()));
s.push_str(
&o.languages
.iter()
.map(|l| l.name)
.collect::<Vec<_>>()
.join(" "),
);
s.push_str(&format!("\nframeworks({}): ", o.frameworks.len()));
s.push_str(
&o.frameworks
.iter()
.map(|f| f.name)
.collect::<Vec<_>>()
.join(" "),
);
s.push_str(&format!("\nvms({}): ", o.vms.len()));
s.push_str(&o.vms.iter().map(|v| v.name).collect::<Vec<_>>().join(" "));
s.push_str(&format!("\nweb_stacks({}): ", o.web_stacks.len()));
s.push_str(
&o.web_stacks
.iter()
.map(|w| w.name)
.collect::<Vec<_>>()
.join(" "),
);
s.push_str(
"\ndescribe(<axis|effect|model|language|framework|vm|web|\"axes\"|\"effects\"|\"models\"|\"languages\"|\"frameworks\"|\"vms\"|\"web\">) for detail",
);
s
}
pub fn describe(query: &str) -> Option<String> {
let q = query.trim().to_ascii_lowercase();
let o = ontology();
match q.as_str() {
"axes" => {
return Some(
o.axes
.iter()
.map(describe_axis)
.collect::<Vec<_>>()
.join("\n"),
)
}
"effects" => {
return Some(
o.effects
.iter()
.map(describe_effect)
.collect::<Vec<_>>()
.join("\n"),
)
}
"models" => {
return Some(
o.models
.iter()
.map(|m| format!("{} (exact={})", m.name, m.exact))
.collect::<Vec<_>>()
.join("\n"),
)
}
"modes" => return Some(o.modes.join(", ")),
"commands" => {
return Some(format!(
"{} CLI commands classified; describe an effect (e.g. \"network\") for examples",
o.known_commands
))
}
"languages" => {
return Some(
crate::languages::rank_languages()
.iter()
.map(|p| p.to_string())
.collect::<Vec<_>>()
.join("\n"),
)
}
"frameworks" => {
return Some(
crate::frameworks::rank_frameworks()
.iter()
.map(|p| p.to_string())
.collect::<Vec<_>>()
.join("\n"),
)
}
"vms" => {
return Some(
crate::vms::rank_vms()
.iter()
.map(|p| p.to_string())
.collect::<Vec<_>>()
.join("\n"),
)
}
"web" | "web-stacks" | "web_stacks" => {
return Some(
crate::web::rank_web_stacks()
.iter()
.map(|p| p.to_string())
.collect::<Vec<_>>()
.join("\n"),
)
}
_ => {}
}
if let Some(l) = crate::languages::Language::from_name(&q) {
let p = crate::languages::profile(l);
let mut s = p.to_string();
for e in &p.evidence {
s.push_str("\n - ");
s.push_str(e);
}
return Some(s);
}
if let Some(fw) = crate::frameworks::Framework::from_name(&q) {
let p = crate::frameworks::profile(fw);
let mut s = p.to_string();
for e in &p.evidence {
s.push_str("\n - ");
s.push_str(e);
}
return Some(s);
}
if let Some(v) = crate::vms::Vm::from_name(&q) {
let p = crate::vms::profile(v);
let mut s = p.to_string();
for e in &p.evidence {
s.push_str("\n - ");
s.push_str(e);
}
return Some(s);
}
if let Some(w) = crate::web::WebStack::from_name(&q) {
let p = crate::web::profile(w);
let mut s = p.to_string();
for e in &p.evidence {
s.push_str("\n - ");
s.push_str(e);
}
return Some(s);
}
if let Some(a) = o.axes.iter().find(|a| a.name == q) {
return Some(describe_axis(a));
}
if let Some(e) =
Effect::from_name(&q).and_then(|e| o.effects.iter().find(|d| d.name == e.name()))
{
return Some(describe_effect(e));
}
if let Some(m) = Model::from_name(&q) {
return Some(format!("{} (exact={})", m.name(), m.is_exact()));
}
None
}
fn describe_axis(a: &AxisDoc) -> String {
format!(
"axis {}: {}\n output: {} needs_execution: {}\n entry_points: {}",
a.name,
a.summary,
a.output_type,
a.needs_execution,
a.entry_points.join(", ")
)
}
fn describe_effect(e: &EffectDoc) -> String {
format!(
"effect {}: {}\n dangerous: {} human={} agent={}\n e.g. {}",
e.name,
e.summary,
e.dangerous,
e.human_decision,
e.agent_decision,
if e.example_commands.is_empty() {
"(none)".to_string()
} else {
e.example_commands.join(", ")
}
)
}
impl std::fmt::Display for Ontology {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{}", manifest())?;
writeln!(f, "\n# axes")?;
for a in &self.axes {
writeln!(f, "{}", describe_axis(a))?;
}
writeln!(f, "\n# effects")?;
for e in &self.effects {
writeln!(f, "{}", describe_effect(e))?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn manifest_is_compact_and_lists_every_section() {
let m = manifest();
assert!(m.contains("agentic-eval"));
assert!(m.contains(VERSION));
for axis in ["tokens", "determinism", "reliability", "safety"] {
assert!(m.contains(axis), "manifest lists axis {axis}: {m}");
}
for e in Effect::all() {
assert!(m.contains(e.name()), "manifest lists effect {}", e.name());
}
assert!(m.contains("languages("), "manifest lists languages");
assert!(m.contains("frameworks("), "manifest lists frameworks");
assert!(m.contains("vms("), "manifest lists vms");
assert!(m.contains("web_stacks("), "manifest lists web stacks");
assert!(m.contains("mechgen") && m.contains("rmi"));
assert!(m.contains("aethervm") && m.contains("firecracker"));
assert!(m.contains("spine") && m.contains("grpc"));
assert!(m.len() < 1800, "manifest stays compact ({} bytes)", m.len());
}
#[test]
fn describe_expands_languages_frameworks_vms_and_web() {
let langs = describe("languages").unwrap();
assert!(langs.contains("rust") && langs.contains("fitness"));
let fws = describe("frameworks").unwrap();
assert!(fws.contains("pytorch") && fws.contains("discoverability"));
let vms = describe("vms").unwrap();
assert!(vms.contains("firecracker") && vms.contains("agent-control"));
let web = describe("web").unwrap();
assert!(
web.contains("spine") && web.contains("streaming"),
"describe(\"web\") should list ranked web stacks with the streaming axis"
);
assert_eq!(
describe("web-stacks").unwrap(),
web,
"describe(\"web-stacks\") alias matches describe(\"web\")"
);
let rust = describe("rust").unwrap();
assert!(rust.contains("reliability") && rust.contains("\n - "));
let torch = describe("torch").unwrap(); assert!(torch.contains("pytorch"));
let aether = describe("aethervm").unwrap();
assert!(aether.contains("snapshot") && aether.contains("\n - "));
assert!(describe("kvm").unwrap().contains("qemu-kvm")); let spine = describe("spine").unwrap();
assert!(
spine.contains("fitness") && spine.contains("\n - "),
"describe(\"spine\") expands to profile + evidence bullets"
);
assert!(describe("openai").unwrap().contains("openai-api")); assert!(describe("destructive").unwrap().contains("agent="));
}
#[test]
fn ontology_is_complete_and_consistent() {
let o = ontology();
assert_eq!(o.axes.len(), 4);
assert_eq!(o.effects.len(), 8); assert_eq!(o.modes.len(), 2);
assert_eq!(o.models.len(), 4);
assert_eq!(o.web_stacks.len(), 7); assert!(
o.web_stacks.iter().any(|w| w.name == "spine"),
"web_stacks index includes SPINE"
);
assert!(o.known_commands > 100, "classifier ontology is substantial");
let destructive = o.effects.iter().find(|e| e.name == "destructive").unwrap();
assert!(destructive.dangerous);
assert_eq!(destructive.human_decision, "allow");
assert_eq!(destructive.agent_decision, "approve");
let privileged = o.effects.iter().find(|e| e.name == "privileged").unwrap();
assert_eq!(privileged.agent_decision, "deny");
}
#[test]
fn describe_expands_axes_effects_models_and_keywords() {
assert!(describe("safety").unwrap().contains("assess_safety"));
assert!(describe("TOKENS").unwrap().contains("AgentCost")); let dest = describe("destructive").unwrap();
assert!(dest.contains("agent=approve"));
assert!(describe("gpt4").unwrap().contains("cl100k"));
assert!(describe("effects").unwrap().contains("privileged"));
assert!(describe("models").unwrap().contains("heuristic"));
assert!(describe("does-not-exist").is_none());
}
#[test]
fn manifest_and_describe_are_deterministic() {
assert_eq!(manifest(), manifest());
assert_eq!(describe("effects"), describe("effects"));
assert_eq!(ontology().to_string(), ontology().to_string());
}
#[test]
fn version_matches_the_crate() {
assert_eq!(VERSION, env!("CARGO_PKG_VERSION"));
assert!(manifest().contains(env!("CARGO_PKG_VERSION")));
}
}