1use crate::commands::{commands_for, known_command_count};
30use crate::safety::{Effect, Mode};
31use crate::tokens::Model;
32
33pub const VERSION: &str = env!("CARGO_PKG_VERSION");
35
36#[cfg_attr(feature = "serde", derive(serde::Serialize))]
38#[derive(Debug, Clone)]
39pub struct AxisDoc {
40 pub name: &'static str,
42 pub summary: &'static str,
44 pub entry_points: &'static [&'static str],
46 pub needs_execution: bool,
49 pub output_type: &'static str,
51}
52
53#[cfg_attr(feature = "serde", derive(serde::Serialize))]
55#[derive(Debug, Clone)]
56pub struct EffectDoc {
57 pub name: &'static str,
59 pub summary: &'static str,
61 pub dangerous: bool,
63 pub human_decision: &'static str,
65 pub agent_decision: &'static str,
67 pub example_commands: Vec<&'static str>,
69}
70
71#[cfg_attr(feature = "serde", derive(serde::Serialize))]
73#[derive(Debug, Clone)]
74pub struct ModelDoc {
75 pub name: &'static str,
77 pub exact: bool,
79}
80
81#[cfg_attr(feature = "serde", derive(serde::Serialize))]
83#[derive(Debug, Clone)]
84pub struct Ontology {
85 pub crate_name: &'static str,
87 pub version: &'static str,
89 pub summary: &'static str,
91 pub axes: Vec<AxisDoc>,
93 pub effects: Vec<EffectDoc>,
95 pub modes: Vec<&'static str>,
97 pub models: Vec<ModelDoc>,
99 pub known_commands: usize,
101 pub languages: Vec<SubjectDoc>,
103 pub frameworks: Vec<SubjectDoc>,
105 pub vms: Vec<SubjectDoc>,
107 pub web_stacks: Vec<SubjectDoc>,
110}
111
112#[cfg_attr(feature = "serde", derive(serde::Serialize))]
115#[derive(Debug, Clone)]
116pub struct SubjectDoc {
117 pub name: &'static str,
119 pub fitness: f64,
121}
122
123pub fn axes() -> Vec<AxisDoc> {
125 vec![
126 AxisDoc {
127 name: "tokens",
128 summary: "token efficiency: the four cost terms an agent pays — standing \
129 context, input, output, retries — amortized over a session; plus \
130 output scaling (per-item cost) and prompt-cache savings",
131 entry_points: &[
132 "evaluate",
133 "evaluate_with",
134 "compare",
135 "rank",
136 "rank_with",
137 "assess_scaling",
138 "assess_cache",
139 "cacheable_prefix_tokens",
140 ],
141 needs_execution: false,
142 output_type: "AgentCost | ScalingReport | CacheReport",
143 },
144 AxisDoc {
145 name: "determinism",
146 summary: "whether a program's output is byte-identical across repeated runs \
147 (so an agent can parse, cache, and diff it)",
148 entry_points: &["assess_determinism", "stable_across"],
149 needs_execution: true,
150 output_type: "DeterminismReport",
151 },
152 AxisDoc {
153 name: "reliability",
154 summary: "success rate over representative invocations, whether failures are \
155 structured/actionable rather than dead ends, and graded error \
156 quality (code/message/location/fix)",
157 entry_points: &["assess_reliability", "assess_error_quality"],
158 needs_execution: true,
159 output_type: "ReliabilityReport | ErrorQualityReport",
160 },
161 AxisDoc {
162 name: "safety",
163 summary: "the fraction of a program's dangerous blast radius that is gated \
164 (approval/denied) under an agent policy; plus reversibility \
165 (recoverable blast radius) and data-exfiltration exposure",
166 entry_points: &[
167 "assess_safety",
168 "assess_safety_named",
169 "assess_safety_script",
170 "assess_reversibility",
171 "assess_exfiltration",
172 ],
173 needs_execution: false,
174 output_type: "SafetyReport | ReversibilityReport | ExfiltrationReport",
175 },
176 ]
177}
178
179pub fn effects() -> Vec<EffectDoc> {
182 Effect::all()
183 .into_iter()
184 .map(|e| EffectDoc {
185 name: e.name(),
186 summary: e.summary(),
187 dangerous: e.is_dangerous(),
188 human_decision: e.decision(Mode::Human).name(),
189 agent_decision: e.decision(Mode::Agent).name(),
190 example_commands: commands_for(e).iter().take(4).copied().collect(),
191 })
192 .collect()
193}
194
195pub fn models() -> Vec<ModelDoc> {
197 Model::all()
198 .into_iter()
199 .map(|m| ModelDoc {
200 name: m.name(),
201 exact: m.is_exact(),
202 })
203 .collect()
204}
205
206pub fn languages() -> Vec<SubjectDoc> {
208 crate::languages::Language::all()
209 .iter()
210 .map(|&l| {
211 let p = crate::languages::profile(l);
212 SubjectDoc {
213 name: l.name(),
214 fitness: p.fitness(),
215 }
216 })
217 .collect()
218}
219
220pub fn frameworks() -> Vec<SubjectDoc> {
222 crate::frameworks::Framework::all()
223 .iter()
224 .map(|&f| {
225 let p = crate::frameworks::profile(f);
226 SubjectDoc {
227 name: f.name(),
228 fitness: p.fitness(),
229 }
230 })
231 .collect()
232}
233
234pub fn vms() -> Vec<SubjectDoc> {
236 crate::vms::Vm::all()
237 .iter()
238 .map(|&v| {
239 let p = crate::vms::profile(v);
240 SubjectDoc {
241 name: v.name(),
242 fitness: p.fitness(),
243 }
244 })
245 .collect()
246}
247
248pub fn web_stacks() -> Vec<SubjectDoc> {
250 crate::web::WebStack::all()
251 .iter()
252 .map(|&w| {
253 let p = crate::web::profile(w);
254 SubjectDoc {
255 name: w.name(),
256 fitness: p.fitness(),
257 }
258 })
259 .collect()
260}
261
262pub fn ontology() -> Ontology {
264 Ontology {
265 crate_name: "agentic-eval",
266 version: VERSION,
267 summary: "evaluate programs, programming languages, AI frameworks, \
268 VM/sandbox systems, and web stacks / wire protocols for \
269 agentic AI use across four axes — token efficiency, \
270 determinism, reliability, and safety (frameworks add \
271 discoverability; VM systems use agent-native axes: \
272 start-latency, density, isolation, snapshotting, agent-control; \
273 web stacks use streaming, tool-discoverability, \
274 encoding-efficiency, interop, security-primitives)",
275 axes: axes(),
276 effects: effects(),
277 modes: Mode::all().iter().map(|m| m.name()).collect(),
278 models: models(),
279 known_commands: known_command_count(),
280 languages: languages(),
281 frameworks: frameworks(),
282 vms: vms(),
283 web_stacks: web_stacks(),
284 }
285}
286
287pub fn manifest() -> String {
291 let o = ontology();
292 let mut s = String::new();
293 s.push_str(&format!("{} {} — {}\n", o.crate_name, o.version, o.summary));
294 s.push_str("axes: ");
295 s.push_str(&o.axes.iter().map(|a| a.name).collect::<Vec<_>>().join(", "));
296 s.push_str(&format!("\neffects({}): ", o.effects.len()));
297 s.push_str(
298 &o.effects
299 .iter()
300 .map(|e| e.name)
301 .collect::<Vec<_>>()
302 .join(" "),
303 );
304 s.push_str("\nmodes: ");
305 s.push_str(&o.modes.join(", "));
306 s.push_str("\nmodels: ");
307 s.push_str(
308 &o.models
309 .iter()
310 .map(|m| m.name)
311 .collect::<Vec<_>>()
312 .join(", "),
313 );
314 s.push_str(&format!(
315 "\ncommands: {} classified across {} effect classes",
316 o.known_commands,
317 o.effects.len()
318 ));
319 s.push_str(&format!("\nlanguages({}): ", o.languages.len()));
320 s.push_str(
321 &o.languages
322 .iter()
323 .map(|l| l.name)
324 .collect::<Vec<_>>()
325 .join(" "),
326 );
327 s.push_str(&format!("\nframeworks({}): ", o.frameworks.len()));
328 s.push_str(
329 &o.frameworks
330 .iter()
331 .map(|f| f.name)
332 .collect::<Vec<_>>()
333 .join(" "),
334 );
335 s.push_str(&format!("\nvms({}): ", o.vms.len()));
336 s.push_str(&o.vms.iter().map(|v| v.name).collect::<Vec<_>>().join(" "));
337 s.push_str(&format!("\nweb_stacks({}): ", o.web_stacks.len()));
338 s.push_str(
339 &o.web_stacks
340 .iter()
341 .map(|w| w.name)
342 .collect::<Vec<_>>()
343 .join(" "),
344 );
345 s.push_str(
346 "\ndescribe(<axis|effect|model|language|framework|vm|web|\"axes\"|\"effects\"|\"models\"|\"languages\"|\"frameworks\"|\"vms\"|\"web\">) for detail",
347 );
348 s
349}
350
351pub fn describe(query: &str) -> Option<String> {
356 let q = query.trim().to_ascii_lowercase();
357 let o = ontology();
358
359 match q.as_str() {
361 "axes" => {
362 return Some(
363 o.axes
364 .iter()
365 .map(describe_axis)
366 .collect::<Vec<_>>()
367 .join("\n"),
368 )
369 }
370 "effects" => {
371 return Some(
372 o.effects
373 .iter()
374 .map(describe_effect)
375 .collect::<Vec<_>>()
376 .join("\n"),
377 )
378 }
379 "models" => {
380 return Some(
381 o.models
382 .iter()
383 .map(|m| format!("{} (exact={})", m.name, m.exact))
384 .collect::<Vec<_>>()
385 .join("\n"),
386 )
387 }
388 "modes" => return Some(o.modes.join(", ")),
389 "commands" => {
390 return Some(format!(
391 "{} CLI commands classified; describe an effect (e.g. \"network\") for examples",
392 o.known_commands
393 ))
394 }
395 "languages" => {
396 return Some(
397 crate::languages::rank_languages()
398 .iter()
399 .map(|p| p.to_string())
400 .collect::<Vec<_>>()
401 .join("\n"),
402 )
403 }
404 "frameworks" => {
405 return Some(
406 crate::frameworks::rank_frameworks()
407 .iter()
408 .map(|p| p.to_string())
409 .collect::<Vec<_>>()
410 .join("\n"),
411 )
412 }
413 "vms" => {
414 return Some(
415 crate::vms::rank_vms()
416 .iter()
417 .map(|p| p.to_string())
418 .collect::<Vec<_>>()
419 .join("\n"),
420 )
421 }
422 "web" | "web-stacks" | "web_stacks" => {
423 return Some(
424 crate::web::rank_web_stacks()
425 .iter()
426 .map(|p| p.to_string())
427 .collect::<Vec<_>>()
428 .join("\n"),
429 )
430 }
431 _ => {}
432 }
433
434 if let Some(l) = crate::languages::Language::from_name(&q) {
436 let p = crate::languages::profile(l);
437 let mut s = p.to_string();
438 for e in &p.evidence {
439 s.push_str("\n - ");
440 s.push_str(e);
441 }
442 return Some(s);
443 }
444 if let Some(fw) = crate::frameworks::Framework::from_name(&q) {
446 let p = crate::frameworks::profile(fw);
447 let mut s = p.to_string();
448 for e in &p.evidence {
449 s.push_str("\n - ");
450 s.push_str(e);
451 }
452 return Some(s);
453 }
454 if let Some(v) = crate::vms::Vm::from_name(&q) {
456 let p = crate::vms::profile(v);
457 let mut s = p.to_string();
458 for e in &p.evidence {
459 s.push_str("\n - ");
460 s.push_str(e);
461 }
462 return Some(s);
463 }
464 if let Some(w) = crate::web::WebStack::from_name(&q) {
466 let p = crate::web::profile(w);
467 let mut s = p.to_string();
468 for e in &p.evidence {
469 s.push_str("\n - ");
470 s.push_str(e);
471 }
472 return Some(s);
473 }
474
475 if let Some(a) = o.axes.iter().find(|a| a.name == q) {
477 return Some(describe_axis(a));
478 }
479 if let Some(e) =
481 Effect::from_name(&q).and_then(|e| o.effects.iter().find(|d| d.name == e.name()))
482 {
483 return Some(describe_effect(e));
484 }
485 if let Some(m) = Model::from_name(&q) {
487 return Some(format!("{} (exact={})", m.name(), m.is_exact()));
488 }
489 None
490}
491
492fn describe_axis(a: &AxisDoc) -> String {
493 format!(
494 "axis {}: {}\n output: {} needs_execution: {}\n entry_points: {}",
495 a.name,
496 a.summary,
497 a.output_type,
498 a.needs_execution,
499 a.entry_points.join(", ")
500 )
501}
502
503fn describe_effect(e: &EffectDoc) -> String {
504 format!(
505 "effect {}: {}\n dangerous: {} human={} agent={}\n e.g. {}",
506 e.name,
507 e.summary,
508 e.dangerous,
509 e.human_decision,
510 e.agent_decision,
511 if e.example_commands.is_empty() {
512 "(none)".to_string()
513 } else {
514 e.example_commands.join(", ")
515 }
516 )
517}
518
519impl std::fmt::Display for Ontology {
520 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
523 writeln!(f, "{}", manifest())?;
524 writeln!(f, "\n# axes")?;
525 for a in &self.axes {
526 writeln!(f, "{}", describe_axis(a))?;
527 }
528 writeln!(f, "\n# effects")?;
529 for e in &self.effects {
530 writeln!(f, "{}", describe_effect(e))?;
531 }
532 Ok(())
533 }
534}
535
536#[cfg(test)]
537mod tests {
538 use super::*;
539
540 #[test]
541 fn manifest_is_compact_and_lists_every_section() {
542 let m = manifest();
543 assert!(m.contains("agentic-eval"));
544 assert!(m.contains(VERSION));
545 for axis in ["tokens", "determinism", "reliability", "safety"] {
546 assert!(m.contains(axis), "manifest lists axis {axis}: {m}");
547 }
548 for e in Effect::all() {
550 assert!(m.contains(e.name()), "manifest lists effect {}", e.name());
551 }
552 assert!(m.contains("languages("), "manifest lists languages");
554 assert!(m.contains("frameworks("), "manifest lists frameworks");
555 assert!(m.contains("vms("), "manifest lists vms");
556 assert!(m.contains("web_stacks("), "manifest lists web stacks");
557 assert!(m.contains("mechgen") && m.contains("rmi"));
558 assert!(m.contains("aethervm") && m.contains("firecracker"));
559 assert!(m.contains("spine") && m.contains("grpc"));
560 assert!(m.len() < 1800, "manifest stays compact ({} bytes)", m.len());
562 }
563
564 #[test]
565 fn describe_expands_languages_frameworks_vms_and_web() {
566 let langs = describe("languages").unwrap();
568 assert!(langs.contains("rust") && langs.contains("fitness"));
569 let fws = describe("frameworks").unwrap();
570 assert!(fws.contains("pytorch") && fws.contains("discoverability"));
571 let vms = describe("vms").unwrap();
572 assert!(vms.contains("firecracker") && vms.contains("agent-control"));
573 let web = describe("web").unwrap();
574 assert!(
575 web.contains("spine") && web.contains("streaming"),
576 "describe(\"web\") should list ranked web stacks with the streaming axis"
577 );
578 assert_eq!(
579 describe("web-stacks").unwrap(),
580 web,
581 "describe(\"web-stacks\") alias matches describe(\"web\")"
582 );
583 let rust = describe("rust").unwrap();
585 assert!(rust.contains("reliability") && rust.contains("\n - "));
586 let torch = describe("torch").unwrap(); assert!(torch.contains("pytorch"));
588 let aether = describe("aethervm").unwrap();
589 assert!(aether.contains("snapshot") && aether.contains("\n - "));
590 assert!(describe("kvm").unwrap().contains("qemu-kvm")); let spine = describe("spine").unwrap();
592 assert!(
593 spine.contains("fitness") && spine.contains("\n - "),
594 "describe(\"spine\") expands to profile + evidence bullets"
595 );
596 assert!(describe("openai").unwrap().contains("openai-api")); assert!(describe("destructive").unwrap().contains("agent="));
601 }
602
603 #[test]
604 fn ontology_is_complete_and_consistent() {
605 let o = ontology();
606 assert_eq!(o.axes.len(), 4);
607 assert_eq!(o.effects.len(), 8); assert_eq!(o.modes.len(), 2);
609 assert_eq!(o.models.len(), 4);
610 assert_eq!(o.web_stacks.len(), 7); assert!(
612 o.web_stacks.iter().any(|w| w.name == "spine"),
613 "web_stacks index includes SPINE"
614 );
615 assert!(o.known_commands > 100, "classifier ontology is substantial");
616 let destructive = o.effects.iter().find(|e| e.name == "destructive").unwrap();
618 assert!(destructive.dangerous);
619 assert_eq!(destructive.human_decision, "allow");
620 assert_eq!(destructive.agent_decision, "approve");
621 let privileged = o.effects.iter().find(|e| e.name == "privileged").unwrap();
622 assert_eq!(privileged.agent_decision, "deny");
623 }
624
625 #[test]
626 fn describe_expands_axes_effects_models_and_keywords() {
627 assert!(describe("safety").unwrap().contains("assess_safety"));
628 assert!(describe("TOKENS").unwrap().contains("AgentCost")); let dest = describe("destructive").unwrap();
630 assert!(dest.contains("agent=approve"));
631 assert!(describe("gpt4").unwrap().contains("cl100k"));
633 assert!(describe("effects").unwrap().contains("privileged"));
635 assert!(describe("models").unwrap().contains("heuristic"));
636 assert!(describe("does-not-exist").is_none());
638 }
639
640 #[test]
641 fn manifest_and_describe_are_deterministic() {
642 assert_eq!(manifest(), manifest());
643 assert_eq!(describe("effects"), describe("effects"));
644 assert_eq!(ontology().to_string(), ontology().to_string());
646 }
647
648 #[test]
649 fn version_matches_the_crate() {
650 assert_eq!(VERSION, env!("CARGO_PKG_VERSION"));
651 assert!(manifest().contains(env!("CARGO_PKG_VERSION")));
652 }
653}