1use std::path::Path;
27use std::time::Duration;
28
29use serde::{Deserialize, Serialize};
30use tokio::process::Command;
31use tracing::{debug, info, warn};
32
33use crate::config::ExternalFrameworkConfig;
34
35#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
61pub struct ExternalMetricsOutput {
62 pub framework: String,
64
65 pub cold_start_us: u64,
67
68 pub first_llm_call_epoch_ns: u64,
71
72 pub loop_overhead: ExternalDurationStats,
74
75 #[serde(skip_serializing_if = "Option::is_none")]
77 pub peak_rss_bytes: Option<u64>,
78
79 #[serde(skip_serializing_if = "Option::is_none")]
81 pub throughput_agents_per_sec: Option<f64>,
82
83 #[serde(skip_serializing_if = "Option::is_none")]
85 pub token_overhead: Option<ExternalTokenOverhead>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
90pub struct ExternalDurationStats {
91 pub min_us: u64,
93 pub max_us: u64,
95 pub mean_us: u64,
97 pub median_us: u64,
99 pub p95_us: u64,
101 pub p99_us: u64,
103 pub count: u64,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
109pub struct ExternalTokenOverhead {
110 pub total_tokens: u64,
112 pub user_content_tokens: u64,
114 pub overhead_tokens: u64,
116}
117
118pub struct ExternalRunner {
133 timeout: Duration,
134}
135
136impl ExternalRunner {
137 pub fn new(timeout_secs: u64) -> Self {
139 Self { timeout: Duration::from_secs(timeout_secs) }
140 }
141
142 pub fn timeout(&self) -> Duration {
144 self.timeout
145 }
146
147 pub async fn run(
160 &self,
161 config: &ExternalFrameworkConfig,
162 workload_path: &str,
163 ) -> crate::Result<ExternalMetricsOutput> {
164 info!(
165 framework = %config.name,
166 command = %config.command,
167 workload = %workload_path,
168 "starting external framework benchmark"
169 );
170
171 let start_epoch_ns = std::time::SystemTime::now()
175 .duration_since(std::time::UNIX_EPOCH)
176 .unwrap_or_default()
177 .as_nanos() as u64;
178
179 let mut cmd = Command::new(&config.command);
181
182 cmd.args(&config.args);
184
185 cmd.arg(workload_path);
187
188 cmd.env("BENCH_START_EPOCH_NS", start_epoch_ns.to_string());
190
191 for (key, value) in &config.env {
193 cmd.env(key, value);
194 }
195
196 if let Some(working_dir) = &config.working_dir {
198 cmd.current_dir(working_dir);
199 }
200
201 cmd.stdout(std::process::Stdio::piped());
203 cmd.stderr(std::process::Stdio::piped());
204
205 debug!(
206 framework = %config.name,
207 start_epoch_ns = start_epoch_ns,
208 timeout_secs = self.timeout.as_secs(),
209 "spawning external framework subprocess"
210 );
211
212 let output = match tokio::time::timeout(self.timeout, cmd.output()).await {
214 Ok(Ok(output)) => output,
215 Ok(Err(io_err)) => {
216 return Err(crate::BenchError::ExternalRunner {
217 framework: config.name.clone(),
218 reason: format!("failed to spawn subprocess: {io_err}"),
219 });
220 }
221 Err(_elapsed) => {
222 warn!(
223 framework = %config.name,
224 timeout_secs = self.timeout.as_secs(),
225 "external framework timed out"
226 );
227 return Err(crate::BenchError::ExternalTimeout {
228 framework: config.name.clone(),
229 timeout_secs: self.timeout.as_secs(),
230 });
231 }
232 };
233
234 if !output.status.success() {
236 let stderr = String::from_utf8_lossy(&output.stderr);
237 let exit_code = output.status.code().unwrap_or(-1);
238 warn!(
239 framework = %config.name,
240 exit_code = exit_code,
241 stderr = %stderr,
242 "external framework exited with non-zero status"
243 );
244 return Err(crate::BenchError::ExternalRunner {
245 framework: config.name.clone(),
246 reason: format!("subprocess exited with code {exit_code}: {}", stderr.trim()),
247 });
248 }
249
250 let stdout = String::from_utf8_lossy(&output.stdout);
252 let stdout_trimmed = stdout.trim();
253
254 debug!(
255 framework = %config.name,
256 stdout_len = stdout_trimmed.len(),
257 "parsing external framework EBP output"
258 );
259
260 let mut metrics: ExternalMetricsOutput =
261 serde_json::from_str(stdout_trimmed).map_err(|e| {
262 crate::BenchError::ExternalRunner {
263 framework: config.name.clone(),
264 reason: format!("failed to parse EBP JSON output: {e}"),
265 }
266 })?;
267
268 let computed_cold_start_ns = metrics.first_llm_call_epoch_ns.saturating_sub(start_epoch_ns);
272 let computed_cold_start_us = computed_cold_start_ns / 1000;
273
274 metrics.cold_start_us = computed_cold_start_us;
276
277 info!(
278 framework = %metrics.framework,
279 cold_start_us = computed_cold_start_us,
280 loop_overhead_mean_us = metrics.loop_overhead.mean_us,
281 "external framework benchmark completed"
282 );
283
284 Ok(metrics)
285 }
286}
287
288#[derive(Debug, Clone, Serialize, Deserialize)]
306#[serde(rename_all = "camelCase")]
307pub struct ExternalConfigFile {
308 pub frameworks: Vec<ExternalFrameworkConfig>,
310}
311
312pub fn load_external_configs(path: &Path) -> crate::Result<Vec<ExternalFrameworkConfig>> {
331 let content = std::fs::read_to_string(path)?;
332 let config_file: ExternalConfigFile = serde_json::from_str(&content).map_err(|e| {
333 crate::BenchError::Serialization(format!(
334 "failed to parse external config file '{}': {e}",
335 path.display()
336 ))
337 })?;
338 Ok(config_file.frameworks)
339}
340
341#[cfg(test)]
342mod tests {
343 use super::*;
344
345 #[test]
346 fn test_external_metrics_output_deserialize() {
347 let json = r#"{
348 "framework": "langgraph",
349 "cold_start_us": 45000,
350 "first_llm_call_epoch_ns": 1705312800000045000,
351 "loop_overhead": {
352 "min_us": 120,
353 "max_us": 890,
354 "mean_us": 340,
355 "median_us": 310,
356 "p95_us": 780,
357 "p99_us": 870,
358 "count": 10
359 },
360 "throughput_agents_per_sec": 12.5,
361 "peak_rss_bytes": 52428800,
362 "token_overhead": {
363 "total_tokens": 1200,
364 "user_content_tokens": 950,
365 "overhead_tokens": 250
366 }
367 }"#;
368
369 let metrics: ExternalMetricsOutput = serde_json::from_str(json).unwrap();
370 assert_eq!(metrics.framework, "langgraph");
371 assert_eq!(metrics.cold_start_us, 45000);
372 assert_eq!(metrics.first_llm_call_epoch_ns, 1705312800000045000);
373 assert_eq!(metrics.loop_overhead.min_us, 120);
374 assert_eq!(metrics.loop_overhead.max_us, 890);
375 assert_eq!(metrics.loop_overhead.mean_us, 340);
376 assert_eq!(metrics.loop_overhead.median_us, 310);
377 assert_eq!(metrics.loop_overhead.p95_us, 780);
378 assert_eq!(metrics.loop_overhead.p99_us, 870);
379 assert_eq!(metrics.loop_overhead.count, 10);
380 assert_eq!(metrics.throughput_agents_per_sec, Some(12.5));
381 assert_eq!(metrics.peak_rss_bytes, Some(52428800));
382 let token_overhead = metrics.token_overhead.unwrap();
383 assert_eq!(token_overhead.total_tokens, 1200);
384 assert_eq!(token_overhead.user_content_tokens, 950);
385 assert_eq!(token_overhead.overhead_tokens, 250);
386 }
387
388 #[test]
389 fn test_external_metrics_output_deserialize_minimal() {
390 let json = r#"{
391 "framework": "crewai",
392 "cold_start_us": 120000,
393 "first_llm_call_epoch_ns": 1705312800000120000,
394 "loop_overhead": {
395 "min_us": 500,
396 "max_us": 2000,
397 "mean_us": 1000,
398 "median_us": 900,
399 "p95_us": 1800,
400 "p99_us": 1950,
401 "count": 5
402 }
403 }"#;
404
405 let metrics: ExternalMetricsOutput = serde_json::from_str(json).unwrap();
406 assert_eq!(metrics.framework, "crewai");
407 assert_eq!(metrics.cold_start_us, 120000);
408 assert_eq!(metrics.peak_rss_bytes, None);
409 assert_eq!(metrics.throughput_agents_per_sec, None);
410 assert_eq!(metrics.token_overhead, None);
411 }
412
413 #[test]
414 fn test_external_metrics_output_serialize_roundtrip() {
415 let metrics = ExternalMetricsOutput {
416 framework: "test-framework".to_string(),
417 cold_start_us: 5000,
418 first_llm_call_epoch_ns: 1000000005000000,
419 loop_overhead: ExternalDurationStats {
420 min_us: 100,
421 max_us: 500,
422 mean_us: 250,
423 median_us: 230,
424 p95_us: 450,
425 p99_us: 490,
426 count: 20,
427 },
428 peak_rss_bytes: Some(1024 * 1024 * 50),
429 throughput_agents_per_sec: Some(8.5),
430 token_overhead: Some(ExternalTokenOverhead {
431 total_tokens: 1000,
432 user_content_tokens: 800,
433 overhead_tokens: 200,
434 }),
435 };
436
437 let json = serde_json::to_string(&metrics).unwrap();
438 let deserialized: ExternalMetricsOutput = serde_json::from_str(&json).unwrap();
439 assert_eq!(metrics, deserialized);
440 }
441
442 #[test]
443 fn test_external_runner_new() {
444 let runner = ExternalRunner::new(120);
445 assert_eq!(runner.timeout(), Duration::from_secs(120));
446 }
447
448 #[test]
449 fn test_external_runner_default_timeout() {
450 let runner = ExternalRunner::new(300);
451 assert_eq!(runner.timeout(), Duration::from_secs(300));
452 }
453
454 #[test]
455 fn test_external_config_file_deserialize() {
456 let json = r#"{
457 "frameworks": [
458 {
459 "name": "adk-python",
460 "command": "python",
461 "args": ["-m", "adk_bench", "--workload"],
462 "workingDir": "../adk-python",
463 "env": [["GOOGLE_API_KEY", "test-key"]]
464 },
465 {
466 "name": "langgraph",
467 "command": "python",
468 "args": ["bench_runner.py"],
469 "env": []
470 }
471 ]
472 }"#;
473
474 let config_file: ExternalConfigFile = serde_json::from_str(json).unwrap();
475 assert_eq!(config_file.frameworks.len(), 2);
476 assert_eq!(config_file.frameworks[0].name, "adk-python");
477 assert_eq!(config_file.frameworks[0].command, "python");
478 assert_eq!(config_file.frameworks[0].args, vec!["-m", "adk_bench", "--workload"]);
479 assert_eq!(
480 config_file.frameworks[0].working_dir,
481 Some(std::path::PathBuf::from("../adk-python"))
482 );
483 assert_eq!(config_file.frameworks[1].name, "langgraph");
484 assert_eq!(config_file.frameworks[1].working_dir, None);
485 }
486
487 #[test]
488 fn test_load_external_configs_file_not_found() {
489 let result = load_external_configs(Path::new("/nonexistent/path/config.json"));
490 assert!(result.is_err());
491 }
492
493 #[tokio::test]
494 async fn test_external_runner_spawn_failure() {
495 let runner = ExternalRunner::new(10);
496 let config = ExternalFrameworkConfig {
497 name: "nonexistent".to_string(),
498 command: "/this/command/does/not/exist/anywhere".to_string(),
499 args: vec![],
500 working_dir: None,
501 env: vec![],
502 };
503
504 let result = runner.run(&config, "/tmp/workload.json").await;
505 assert!(result.is_err());
506 let err = result.unwrap_err();
507 match err {
508 crate::BenchError::ExternalRunner { framework, reason } => {
509 assert_eq!(framework, "nonexistent");
510 assert!(reason.contains("failed to spawn subprocess"));
511 }
512 _ => panic!("expected ExternalRunner error, got: {err:?}"),
513 }
514 }
515
516 #[tokio::test]
517 async fn test_external_runner_non_zero_exit() {
518 let runner = ExternalRunner::new(10);
519 let config = ExternalFrameworkConfig {
520 name: "failing-script".to_string(),
521 command: "sh".to_string(),
522 args: vec!["-c".to_string(), "exit 1".to_string()],
523 working_dir: None,
524 env: vec![],
525 };
526
527 let result = runner.run(&config, "/tmp/workload.json").await;
528 assert!(result.is_err());
529 let err = result.unwrap_err();
530 match err {
531 crate::BenchError::ExternalRunner { framework, .. } => {
532 assert_eq!(framework, "failing-script");
533 }
534 _ => panic!("expected ExternalRunner error, got: {err:?}"),
535 }
536 }
537
538 #[tokio::test]
539 async fn test_external_runner_invalid_json() {
540 let runner = ExternalRunner::new(10);
541 let config = ExternalFrameworkConfig {
542 name: "bad-json".to_string(),
543 command: "echo".to_string(),
544 args: vec!["not valid json".to_string()],
545 working_dir: None,
546 env: vec![],
547 };
548
549 let result = runner.run(&config, "/tmp/workload.json").await;
550 assert!(result.is_err());
551 let err = result.unwrap_err();
552 match err {
553 crate::BenchError::ExternalRunner { framework, reason } => {
554 assert_eq!(framework, "bad-json");
555 assert!(reason.contains("failed to parse EBP JSON output"));
556 }
557 _ => panic!("expected ExternalRunner error, got: {err:?}"),
558 }
559 }
560
561 #[tokio::test]
562 async fn test_external_runner_timeout() {
563 let runner = ExternalRunner::new(1); let config = ExternalFrameworkConfig {
565 name: "slow-script".to_string(),
566 command: "sh".to_string(),
567 args: vec!["-c".to_string(), "sleep 10; #".to_string()],
569 working_dir: None,
570 env: vec![],
571 };
572
573 let result = runner.run(&config, "/tmp/workload.json").await;
574 assert!(result.is_err());
575 let err = result.unwrap_err();
576 match err {
577 crate::BenchError::ExternalTimeout { framework, timeout_secs } => {
578 assert_eq!(framework, "slow-script");
579 assert_eq!(timeout_secs, 1);
580 }
581 _ => panic!("expected ExternalTimeout error, got: {err:?}"),
582 }
583 }
584
585 #[tokio::test]
586 async fn test_external_runner_valid_output() {
587 let ebp_json = r#"{"framework":"test","cold_start_us":1000,"first_llm_call_epoch_ns":99999999999999999,"loop_overhead":{"min_us":10,"max_us":100,"mean_us":50,"median_us":45,"p95_us":90,"p99_us":95,"count":5}}"#;
589
590 let runner = ExternalRunner::new(10);
591 let config = ExternalFrameworkConfig {
592 name: "test-framework".to_string(),
593 command: "sh".to_string(),
594 args: vec!["-c".to_string(), format!("echo '{}'; #", ebp_json)],
595 working_dir: None,
596 env: vec![],
597 };
598
599 let result = runner.run(&config, "/tmp/workload.json").await;
600 assert!(result.is_ok());
601 let metrics = result.unwrap();
602 assert_eq!(metrics.framework, "test");
603 assert_eq!(metrics.loop_overhead.min_us, 10);
604 assert_eq!(metrics.loop_overhead.count, 5);
605 assert_eq!(metrics.peak_rss_bytes, None);
606 assert_eq!(metrics.throughput_agents_per_sec, None);
607 assert_eq!(metrics.token_overhead, None);
608 }
609
610 #[tokio::test]
611 async fn test_external_runner_env_injection() {
612 let runner = ExternalRunner::new(10);
614 let config = ExternalFrameworkConfig {
615 name: "env-test".to_string(),
616 command: "sh".to_string(),
617 args: vec![
618 "-c".to_string(),
619 r#"FIRST_CALL=$(expr $BENCH_START_EPOCH_NS + 5000000); echo "{\"framework\":\"env-test\",\"cold_start_us\":0,\"first_llm_call_epoch_ns\":$FIRST_CALL,\"loop_overhead\":{\"min_us\":1,\"max_us\":2,\"mean_us\":1,\"median_us\":1,\"p95_us\":2,\"p99_us\":2,\"count\":1}}"; #"#.to_string(),
622 ],
623 working_dir: None,
624 env: vec![("CUSTOM_VAR".to_string(), "hello".to_string())],
625 };
626
627 let result = runner.run(&config, "/tmp/workload.json").await;
628 assert!(result.is_ok(), "run failed: {:?}", result.unwrap_err());
629 let metrics = result.unwrap();
630 assert_eq!(metrics.framework, "env-test");
631 assert_eq!(metrics.cold_start_us, 5000);
634 }
635}