use serde::{Deserialize, Serialize};
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct DurationStats {
pub min_us: u64,
pub max_us: u64,
pub mean_us: u64,
pub median_us: u64,
pub p95_us: u64,
pub p99_us: u64,
pub std_dev_us: u64,
pub count: usize,
pub coefficient_of_variation: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct BenchmarkResult {
#[serde(default = "default_schema_version")]
pub schema_version: u32,
pub workload_name: String,
pub model: String,
pub metadata: RunMetadata,
pub cold_start: DurationStats,
pub agent_loop_overhead: DurationStats,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tool_invocation: Option<ToolInvocationMetrics>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub throughput: Option<ThroughputMetrics>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub memory: Option<MemoryMetrics>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub token_overhead: Option<TokenOverheadMetrics>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reproducibility_rate: Option<f64>,
pub iterations: usize,
}
fn default_schema_version() -> u32 {
1
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct RunMetadata {
pub timestamp: String,
pub adk_version: String,
pub rust_version: String,
pub os: String,
pub arch: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct ToolInvocationMetrics {
pub total: DurationStats,
pub deserialization: DurationStats,
pub schema_validation: DurationStats,
pub execution_dispatch: DurationStats,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct ThroughputMetrics {
pub levels: Vec<ConcurrencyLevel>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct ConcurrencyLevel {
pub concurrency: usize,
pub agents_per_second: f64,
pub completion_time: DurationStats,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct MemoryMetrics {
pub peak_rss_bytes: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub per_agent_bytes: Option<u64>,
pub sample_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct TokenOverheadMetrics {
pub total_tokens: u64,
pub user_content_tokens: u64,
pub overhead_tokens: u64,
pub overhead_percentage: f64,
pub breakdown: TokenBreakdown,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct TokenBreakdown {
pub system_prompt_tokens: u64,
pub tool_schema_tokens: u64,
pub framework_wrapper_tokens: u64,
}
pub fn compute_stats(durations: &[Duration]) -> DurationStats {
if durations.is_empty() {
return DurationStats {
min_us: 0,
max_us: 0,
mean_us: 0,
median_us: 0,
p95_us: 0,
p99_us: 0,
std_dev_us: 0,
count: 0,
coefficient_of_variation: 0.0,
};
}
let mut micros: Vec<u64> = durations.iter().map(|d| d.as_micros() as u64).collect();
micros.sort_unstable();
let count = micros.len();
let min_us = micros[0];
let max_us = micros[count - 1];
let sum: u64 = micros.iter().sum();
let mean_us = sum / count as u64;
let median_us = percentile_nearest_rank(µs, 50.0);
let p95_us = percentile_nearest_rank(µs, 95.0);
let p99_us = percentile_nearest_rank(µs, 99.0);
let mean_f64 = sum as f64 / count as f64;
let variance: f64 = micros
.iter()
.map(|&v| {
let diff = v as f64 - mean_f64;
diff * diff
})
.sum::<f64>()
/ count as f64;
let std_dev_f64 = variance.sqrt();
let std_dev_us = std_dev_f64 as u64;
let coefficient_of_variation = if mean_f64 == 0.0 { 0.0 } else { std_dev_f64 / mean_f64 };
DurationStats {
min_us,
max_us,
mean_us,
median_us,
p95_us,
p99_us,
std_dev_us,
count,
coefficient_of_variation,
}
}
fn percentile_nearest_rank(sorted: &[u64], percentile: f64) -> u64 {
let count = sorted.len();
if count == 1 {
return sorted[0];
}
let rank = ((percentile / 100.0) * count as f64).ceil() as usize;
let rank = rank.clamp(1, count);
sorted[rank - 1]
}
#[derive(Debug, Clone)]
pub struct ToolLatencyRecord {
pub total: Duration,
pub deserialization: Duration,
pub schema_validation: Duration,
pub execution_dispatch: Duration,
}
pub struct MetricCollector {
run_start: Option<Instant>,
first_llm_call: Option<Instant>,
turn_overheads: Vec<Duration>,
tool_latencies: Vec<ToolLatencyRecord>,
memory_samples: Vec<u64>,
}
impl MetricCollector {
pub fn new() -> Self {
Self {
run_start: None,
first_llm_call: None,
turn_overheads: Vec::new(),
tool_latencies: Vec::new(),
memory_samples: Vec::new(),
}
}
pub fn mark_run_start(&mut self) {
self.run_start = Some(Instant::now());
}
pub fn mark_first_llm_call(&mut self) {
if self.first_llm_call.is_none() {
self.first_llm_call = Some(Instant::now());
}
}
pub fn record_turn_overhead(&mut self, overhead: Duration) {
self.turn_overheads.push(overhead);
}
pub fn record_tool_latency(&mut self, record: ToolLatencyRecord) {
self.tool_latencies.push(record);
}
pub fn record_memory_sample(&mut self, rss_bytes: u64) {
self.memory_samples.push(rss_bytes);
}
pub fn cold_start_duration(&self) -> Option<Duration> {
match (self.run_start, self.first_llm_call) {
(Some(start), Some(first)) => Some(first.duration_since(start)),
_ => None,
}
}
pub fn turn_overheads(&self) -> &[Duration] {
&self.turn_overheads
}
pub fn tool_latencies(&self) -> &[ToolLatencyRecord] {
&self.tool_latencies
}
pub fn memory_samples(&self) -> &[u64] {
&self.memory_samples
}
}
impl Default for MetricCollector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compute_stats_empty() {
let stats = compute_stats(&[]);
assert_eq!(stats.count, 0);
assert_eq!(stats.min_us, 0);
assert_eq!(stats.max_us, 0);
assert_eq!(stats.mean_us, 0);
assert_eq!(stats.median_us, 0);
assert_eq!(stats.p95_us, 0);
assert_eq!(stats.p99_us, 0);
assert_eq!(stats.std_dev_us, 0);
assert_eq!(stats.coefficient_of_variation, 0.0);
}
#[test]
fn test_compute_stats_single_element() {
let durations = vec![Duration::from_micros(500)];
let stats = compute_stats(&durations);
assert_eq!(stats.count, 1);
assert_eq!(stats.min_us, 500);
assert_eq!(stats.max_us, 500);
assert_eq!(stats.mean_us, 500);
assert_eq!(stats.median_us, 500);
assert_eq!(stats.p95_us, 500);
assert_eq!(stats.p99_us, 500);
assert_eq!(stats.std_dev_us, 0);
assert_eq!(stats.coefficient_of_variation, 0.0);
}
#[test]
fn test_compute_stats_multiple_elements() {
let durations = vec![
Duration::from_micros(100),
Duration::from_micros(200),
Duration::from_micros(300),
Duration::from_micros(400),
Duration::from_micros(500),
];
let stats = compute_stats(&durations);
assert_eq!(stats.count, 5);
assert_eq!(stats.min_us, 100);
assert_eq!(stats.max_us, 500);
assert_eq!(stats.mean_us, 300);
assert_eq!(stats.median_us, 300);
assert_eq!(stats.p95_us, 500);
assert_eq!(stats.p99_us, 500);
}
#[test]
fn test_compute_stats_ordering_invariant() {
let durations = vec![
Duration::from_micros(50),
Duration::from_micros(100),
Duration::from_micros(150),
Duration::from_micros(200),
Duration::from_micros(250),
Duration::from_micros(300),
Duration::from_micros(350),
Duration::from_micros(400),
Duration::from_micros(450),
Duration::from_micros(500),
];
let stats = compute_stats(&durations);
assert!(stats.min_us <= stats.median_us);
assert!(stats.median_us <= stats.p95_us);
assert!(stats.p95_us <= stats.p99_us);
assert!(stats.p99_us <= stats.max_us);
}
#[test]
fn test_compute_stats_unsorted_input() {
let durations = vec![
Duration::from_micros(500),
Duration::from_micros(100),
Duration::from_micros(300),
Duration::from_micros(200),
Duration::from_micros(400),
];
let stats = compute_stats(&durations);
assert_eq!(stats.min_us, 100);
assert_eq!(stats.max_us, 500);
assert_eq!(stats.mean_us, 300);
}
#[test]
fn test_metric_collector_cold_start() {
let mut collector = MetricCollector::new();
assert!(collector.cold_start_duration().is_none());
collector.mark_run_start();
assert!(collector.cold_start_duration().is_none());
std::thread::sleep(Duration::from_millis(1));
collector.mark_first_llm_call();
let cold_start = collector.cold_start_duration().unwrap();
assert!(cold_start >= Duration::from_millis(1));
}
#[test]
fn test_metric_collector_first_llm_call_only_once() {
let mut collector = MetricCollector::new();
collector.mark_run_start();
std::thread::sleep(Duration::from_millis(1));
collector.mark_first_llm_call();
let first_duration = collector.cold_start_duration().unwrap();
std::thread::sleep(Duration::from_millis(10));
collector.mark_first_llm_call();
let second_duration = collector.cold_start_duration().unwrap();
assert_eq!(first_duration, second_duration);
}
#[test]
fn test_metric_collector_turn_overheads() {
let mut collector = MetricCollector::new();
collector.record_turn_overhead(Duration::from_micros(100));
collector.record_turn_overhead(Duration::from_micros(200));
assert_eq!(collector.turn_overheads().len(), 2);
}
#[test]
fn test_metric_collector_memory_samples() {
let mut collector = MetricCollector::new();
collector.record_memory_sample(1024);
collector.record_memory_sample(2048);
collector.record_memory_sample(4096);
assert_eq!(collector.memory_samples(), &[1024, 2048, 4096]);
}
#[test]
fn test_metric_collector_tool_latencies() {
let mut collector = MetricCollector::new();
collector.record_tool_latency(ToolLatencyRecord {
total: Duration::from_micros(500),
deserialization: Duration::from_micros(100),
schema_validation: Duration::from_micros(150),
execution_dispatch: Duration::from_micros(250),
});
assert_eq!(collector.tool_latencies().len(), 1);
}
#[test]
fn test_duration_stats_serialization_round_trip() {
let stats = DurationStats {
min_us: 100,
max_us: 500,
mean_us: 300,
median_us: 300,
p95_us: 480,
p99_us: 499,
std_dev_us: 141,
count: 5,
coefficient_of_variation: 0.47,
};
let json = serde_json::to_string(&stats).unwrap();
let deserialized: DurationStats = serde_json::from_str(&json).unwrap();
assert_eq!(stats, deserialized);
}
#[test]
fn test_coefficient_of_variation_zero_mean() {
let durations = vec![Duration::from_micros(0), Duration::from_micros(0)];
let stats = compute_stats(&durations);
assert_eq!(stats.coefficient_of_variation, 0.0);
}
fn sample_benchmark_result() -> BenchmarkResult {
BenchmarkResult {
schema_version: 1,
workload_name: "simple_tool_call".to_string(),
model: "gemini-2.5-flash".to_string(),
metadata: RunMetadata {
timestamp: "2025-01-15T10:30:00Z".to_string(),
adk_version: "0.5.0".to_string(),
rust_version: "1.85.0".to_string(),
os: "linux".to_string(),
arch: "x86_64".to_string(),
},
cold_start: DurationStats {
min_us: 1000,
max_us: 5000,
mean_us: 2500,
median_us: 2400,
p95_us: 4800,
p99_us: 4950,
std_dev_us: 800,
count: 5,
coefficient_of_variation: 0.32,
},
agent_loop_overhead: DurationStats {
min_us: 100,
max_us: 500,
mean_us: 250,
median_us: 240,
p95_us: 480,
p99_us: 495,
std_dev_us: 80,
count: 10,
coefficient_of_variation: 0.32,
},
tool_invocation: None,
throughput: None,
memory: None,
token_overhead: Some(TokenOverheadMetrics {
total_tokens: 1200,
user_content_tokens: 950,
overhead_tokens: 250,
overhead_percentage: 20.83,
breakdown: TokenBreakdown {
system_prompt_tokens: 100,
tool_schema_tokens: 100,
framework_wrapper_tokens: 50,
},
}),
reproducibility_rate: Some(0.95),
iterations: 5,
}
}
#[test]
fn test_benchmark_result_serialization_round_trip() {
let result = sample_benchmark_result();
let json = serde_json::to_string(&result).unwrap();
let deserialized: BenchmarkResult = serde_json::from_str(&json).unwrap();
assert_eq!(result, deserialized);
}
#[test]
fn test_benchmark_result_schema_version_always_present() {
let result = sample_benchmark_result();
let json = serde_json::to_string(&result).unwrap();
let value: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(value["schemaVersion"], serde_json::json!(1));
}
#[test]
fn test_benchmark_result_deserialize_missing_schema_version() {
let json = r#"{
"workloadName": "simple_tool_call",
"model": "gemini-2.5-flash",
"metadata": {
"timestamp": "2025-01-15T10:30:00Z",
"adkVersion": "0.4.0",
"rustVersion": "1.85.0",
"os": "linux",
"arch": "x86_64"
},
"coldStart": {
"minUs": 1000, "maxUs": 5000, "meanUs": 2500,
"medianUs": 2400, "p95Us": 4800, "p99Us": 4950,
"stdDevUs": 800, "count": 5, "coefficientOfVariation": 0.32
},
"agentLoopOverhead": {
"minUs": 100, "maxUs": 500, "meanUs": 250,
"medianUs": 240, "p95Us": 480, "p99Us": 495,
"stdDevUs": 80, "count": 10, "coefficientOfVariation": 0.32
},
"iterations": 5
}"#;
let result: BenchmarkResult = serde_json::from_str(json).unwrap();
assert_eq!(result.schema_version, 1);
}
#[test]
fn test_benchmark_result_deserialize_missing_optional_fields() {
let json = r#"{
"schemaVersion": 1,
"workloadName": "simple_tool_call",
"model": "gemini-2.5-flash",
"metadata": {
"timestamp": "2025-01-15T10:30:00Z",
"adkVersion": "0.4.0",
"rustVersion": "1.85.0",
"os": "linux",
"arch": "x86_64"
},
"coldStart": {
"minUs": 1000, "maxUs": 5000, "meanUs": 2500,
"medianUs": 2400, "p95Us": 4800, "p99Us": 4950,
"stdDevUs": 800, "count": 5, "coefficientOfVariation": 0.32
},
"agentLoopOverhead": {
"minUs": 100, "maxUs": 500, "meanUs": 250,
"medianUs": 240, "p95Us": 480, "p99Us": 495,
"stdDevUs": 80, "count": 10, "coefficientOfVariation": 0.32
},
"iterations": 5
}"#;
let result: BenchmarkResult = serde_json::from_str(json).unwrap();
assert_eq!(result.token_overhead, None);
assert_eq!(result.reproducibility_rate, None);
assert_eq!(result.memory, None);
assert_eq!(result.throughput, None);
assert_eq!(result.tool_invocation, None);
}
#[test]
fn test_benchmark_result_with_all_optional_fields() {
let mut result = sample_benchmark_result();
result.memory = Some(MemoryMetrics {
peak_rss_bytes: 52_428_800,
per_agent_bytes: Some(2_097_152),
sample_count: 50,
});
result.throughput = Some(ThroughputMetrics {
levels: vec![ConcurrencyLevel {
concurrency: 4,
agents_per_second: 12.5,
completion_time: DurationStats {
min_us: 800_000,
max_us: 1_200_000,
mean_us: 1_000_000,
median_us: 980_000,
p95_us: 1_150_000,
p99_us: 1_190_000,
std_dev_us: 100_000,
count: 4,
coefficient_of_variation: 0.1,
},
}],
});
let json = serde_json::to_string(&result).unwrap();
let deserialized: BenchmarkResult = serde_json::from_str(&json).unwrap();
assert_eq!(result, deserialized);
}
}