adk_bench/lib.rs
1//! # adk-bench
2//!
3//! A comprehensive benchmarking framework for ADK-Rust that measures
4//! framework-level runtime performance using real LLM APIs.
5//!
6//! `adk-bench` isolates framework overhead from LLM latency through precise
7//! per-call instrumentation, supports concurrent agent throughput testing,
8//! memory profiling, and external framework comparison via subprocess
9//! execution with a standardized JSON protocol (External Benchmark Protocol).
10//!
11//! ## Features
12//!
13//! - **Cold start measurement**: Binary launch to first LLM call timing
14//! - **Agent loop overhead**: Per-turn framework processing latency (excluding LLM time)
15//! - **Concurrent throughput**: Agents/second under Tokio async load
16//! - **Memory footprint**: Platform-specific RSS sampling (Linux/macOS)
17//! - **Tool invocation latency**: Deserialization, validation, and dispatch timing
18//! - **Token overhead**: Framework-injected token cost analysis
19//! - **External comparison**: Subprocess-based competitor framework benchmarking
20//! - **Regression detection**: Baseline save/compare with configurable tolerance
21//!
22//! ## Feature Flags
23//!
24//! - `tau2` — Enables the τ²-bench task quality adapter
25//! - `bfcl` — Enables the BFCL (Berkeley Function Calling Leaderboard) adapter
26//!
27//! ## Quick Start
28//!
29//! ```rust,ignore
30//! use adk_bench::{BenchConfig, BenchRunner};
31//!
32//! let config = BenchConfig::default();
33//! let runner = BenchRunner::new(config);
34//! let results = runner.run().await?;
35//! ```
36
37pub mod config;
38pub mod error;
39pub mod external;
40pub mod formatter;
41pub mod instrumented_llm;
42pub mod memory;
43pub mod metrics;
44pub mod runner;
45pub mod workload;
46
47/// Task quality adapters for established benchmark suites.
48pub mod adapters;
49pub use adapters::{CaseResult, TaskQualityAdapter, TaskQualityResult};
50
51// Public re-exports
52pub use config::{BenchConfig, ExternalFrameworkConfig, OutputFormat, TaskSuite};
53pub use error::{BenchError, Result};
54pub use external::{
55 ExternalConfigFile, ExternalDurationStats, ExternalMetricsOutput, ExternalRunner,
56 ExternalTokenOverhead, load_external_configs,
57};
58pub use formatter::{ComparisonResult, format_comparison, format_result};
59pub use instrumented_llm::{DeterministicConfig, InstrumentedLlm, LlmCallRecord};
60pub use metrics::{
61 BenchmarkResult, ConcurrencyLevel, DurationStats, MemoryMetrics, MetricCollector, RunMetadata,
62 ThroughputMetrics, TokenBreakdown, TokenOverheadMetrics, ToolInvocationMetrics, compute_stats,
63};
64pub use runner::{BenchRunner, RegressionReport};
65pub use workload::{
66 AgentConfig, ToolDefinition, Workload, builtin_workloads, load_workload,
67 multi_agent_delegation_workload,
68};