web_benchmark/
web_benchmark.rs

1//! Benchmark **web stacks / wire protocols** for agentic AI use — the
2//! protocol an agent actually has to speak when it calls another service.
3//!
4//! Ranks SPINE against the OpenAI API, Anthropic API, MCP, gRPC, plain
5//! HTTP+JSON, and GraphQL on five agent-native axes (streaming,
6//! tool-discoverability, encoding-efficiency, interop, security-primitives),
7//! then shows the SPINE-vs-OpenAI head-to-head and the evidence.
8//!
9//! Run: `cargo run -p agentic-eval --example web_benchmark`
10
11use agentic_eval::web::{compare_web_stacks, profile, rank_web_stacks, WebStack};
12
13fn main() {
14    println!("agentic-eval — web stacks / wire protocols for agentic AI use");
15    println!("axes: streaming, tool-discoverability, encoding, interop, security\n");
16
17    // ── Ranked benchmark (best-first by composite agentic fitness) ───────────
18    println!(
19        "{:<15} {:>7}   {:>9} {:>5} {:>8} {:>7} {:>8}",
20        "stack", "fitness", "streaming", "tools", "encoding", "interop", "security"
21    );
22    for p in rank_web_stacks() {
23        println!(
24            "{:<15} {:>7.2}   {:>9.2} {:>5.2} {:>8.2} {:>7.2} {:>8.2}",
25            p.stack.name(),
26            p.fitness(),
27            p.streaming,
28            p.tool_discoverability,
29            p.encoding_efficiency,
30            p.interop,
31            p.security_primitives,
32        );
33    }
34
35    // ── Head-to-head: SPINE vs the OpenAI API (the dominant baseline) ───────
36    println!("\nhead-to-head (positive = SPINE fits agentic use better):");
37    print!("{}", compare_web_stacks(WebStack::Spine, WebStack::OpenAiApi));
38
39    // ── Evidence behind SPINE's profile ─────────────────────────────────────
40    println!("\nwhy SPINE scores where it does:");
41    for e in &profile(WebStack::Spine).evidence {
42        println!("  - {e}");
43    }
44
45    println!(
46        "\nReading: SPINE now leads the composite (0.90), edging gRPC (0.83).\n\
47         It was always strong on the agent-native axes it was designed for\n\
48         (LLM StreamStart/Token/End frames with multiplex-aware StreamCancel +\n\
49         mid-stream usage, a CapabilityQuery handshake, inline W3C TraceContext).\n\
50         v1.4.0's CBOR wire format plus v1.5.0's byte-string tensor payloads\n\
51         bring encoding to parity with protobuf (0.95; 89% smaller embedding\n\
52         frames), and per-message Ed25519 signed frames give message-level\n\
53         non-repudiation beyond channel mTLS (security 0.95). The inflection is\n\
54         three deployable bridges: a runnable MCP stdio server (v1.6.0), the\n\
55         OpenAI-compatible gateway, and a production-grade gRPC AgentService\n\
56         (v1.8.0, reflection-enabled + real-model-backed in v1.9.0) — reachable\n\
57         from the three dominant agent ecosystems with standard client stubs,\n\
58         lifting interop 0.15 -> 0.67. Honest\n\
59         caveat: interop is still SPINE's weakest axis — the bridges map the\n\
60         agentic surface (not SPINE's native binary frames) and SPINE's own\n\
61         protocol has ~zero native install base."
62    );
63}
web_benchmark/web_benchmark.rs

web_benchmark/
web_benchmark.rs