use crate::{
CreateJavascriptContextRequest, JavascriptExecutionEngine, JavascriptExecutionError,
StartJavascriptExecutionRequest,
};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::env;
use std::fmt;
use std::fmt::Write as _;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
const BENCHMARK_MARKER_PREFIX: &str = "__AGENT_OS_BENCH__:";
const LOCAL_GRAPH_MODULE_COUNT: usize = 24;
const BENCHMARK_ARTIFACT_VERSION: u32 = 5;
const BENCHMARK_ARTIFACT_DIR: &str = "target/benchmark-reports/node-import-bench";
const BENCHMARK_RUN_STATE_FILE: &str = "run-state.json";
const TRANSPORT_RTT_CHANNEL: &str = "execution-stdio-echo";
const TRANSPORT_RTT_PAYLOAD_BYTES: [usize; 3] = [32, 4 * 1024, 64 * 1024];
const TRANSPORT_POLL_TIMEOUT: Duration = Duration::from_secs(5);
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct JavascriptBenchmarkConfig {
pub iterations: usize,
pub warmup_iterations: usize,
}
impl Default for JavascriptBenchmarkConfig {
fn default() -> Self {
Self {
iterations: 5,
warmup_iterations: 1,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct BenchmarkHost {
pub node_binary: String,
pub node_version: String,
pub os: &'static str,
pub arch: &'static str,
pub logical_cpus: usize,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BenchmarkScenarioPhases<T> {
pub context_setup_ms: T,
pub startup_ms: T,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub guest_execution_ms: Option<T>,
pub completion_ms: T,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct BenchmarkStats {
pub mean_ms: f64,
pub p50_ms: f64,
pub p95_ms: f64,
pub min_ms: f64,
pub max_ms: f64,
pub stddev_ms: f64,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct BenchmarkDistributionStats {
pub mean: f64,
pub p50: f64,
pub p95: f64,
pub min: f64,
pub max: f64,
pub stddev: f64,
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct BenchmarkResourceUsage<T> {
#[serde(skip_serializing_if = "Option::is_none", default)]
pub rss_bytes: Option<T>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub heap_used_bytes: Option<T>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub cpu_user_us: Option<T>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub cpu_system_us: Option<T>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub cpu_total_us: Option<T>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkTransportRttReport {
pub channel: &'static str,
pub payload_bytes: usize,
pub samples_ms: Vec<f64>,
pub stats: BenchmarkStats,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkScenarioReport {
pub id: &'static str,
pub workload: &'static str,
pub runtime: &'static str,
pub mode: &'static str,
pub description: &'static str,
pub fixture: &'static str,
pub compile_cache: &'static str,
pub wall_samples_ms: Vec<f64>,
pub wall_stats: BenchmarkStats,
pub guest_import_samples_ms: Option<Vec<f64>>,
pub guest_import_stats: Option<BenchmarkStats>,
pub startup_overhead_samples_ms: Option<Vec<f64>>,
pub startup_overhead_stats: Option<BenchmarkStats>,
pub phase_samples_ms: BenchmarkScenarioPhases<Vec<f64>>,
pub phase_stats: BenchmarkScenarioPhases<BenchmarkStats>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub resource_usage_samples: Option<BenchmarkResourceUsage<Vec<f64>>>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub resource_usage_stats: Option<BenchmarkResourceUsage<BenchmarkDistributionStats>>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct JavascriptBenchmarkReport {
pub generated_at_unix_ms: u128,
pub config: JavascriptBenchmarkConfig,
pub host: BenchmarkHost,
pub repo_root: PathBuf,
pub transport_rtt: Vec<BenchmarkTransportRttReport>,
pub scenarios: Vec<BenchmarkScenarioReport>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkComparison {
pub baseline: BenchmarkComparisonBaseline,
pub summary: BenchmarkComparisonSummary,
pub scenario_deltas: Vec<BenchmarkScenarioDelta>,
pub scenarios_missing_from_baseline: Vec<String>,
pub baseline_only_scenarios: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkComparisonBaseline {
pub artifact_version: u32,
pub generated_at_unix_ms: u128,
pub path: PathBuf,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkComparisonSummary {
pub compared_scenario_count: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub largest_wall_improvement: Option<BenchmarkDeltaHighlight>,
#[serde(skip_serializing_if = "Option::is_none")]
pub largest_wall_regression: Option<BenchmarkDeltaHighlight>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkDeltaHighlight {
pub id: String,
pub delta_ms: f64,
pub delta_pct: f64,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkScenarioDelta {
pub id: String,
pub description: String,
pub wall_mean_ms: BenchmarkMetricDelta,
#[serde(skip_serializing_if = "Option::is_none")]
pub guest_import_mean_ms: Option<BenchmarkMetricDelta>,
#[serde(skip_serializing_if = "Option::is_none")]
pub startup_overhead_mean_ms: Option<BenchmarkMetricDelta>,
#[serde(skip_serializing_if = "Option::is_none")]
pub phase_mean_ms: Option<BenchmarkScenarioPhases<BenchmarkMetricDelta>>,
}
#[derive(Debug, Clone, PartialEq, Serialize)]
pub struct BenchmarkMetricDelta {
pub baseline_ms: f64,
pub current_ms: f64,
pub delta_ms: f64,
pub delta_pct: f64,
}
impl JavascriptBenchmarkReport {
pub fn render_markdown(&self) -> String {
self.render_markdown_with_comparison(None)
}
pub fn render_markdown_with_comparison(
&self,
comparison: Option<&BenchmarkComparison>,
) -> String {
let mut markdown = String::new();
let _ = writeln!(&mut markdown, "# Agent OS Node Import Benchmark");
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"- Generated at unix ms: `{}`",
self.generated_at_unix_ms
);
let _ = writeln!(&mut markdown, "- Node binary: `{}`", self.host.node_binary);
let _ = writeln!(
&mut markdown,
"- Node version: `{}`",
self.host.node_version.trim()
);
let _ = writeln!(
&mut markdown,
"- Host: `{}` / `{}` / `{}` logical CPUs",
self.host.os, self.host.arch, self.host.logical_cpus
);
let _ = writeln!(&mut markdown, "- Repo root: `{}`", self.repo_root.display());
let _ = writeln!(
&mut markdown,
"- Iterations: `{}` recorded, `{}` warmup",
self.config.iterations, self.config.warmup_iterations
);
let _ = writeln!(
&mut markdown,
"- Reproduce: `cargo run -p agent-os-execution --bin node-import-bench -- --iterations {} --warmup-iterations {}`",
self.config.iterations, self.config.warmup_iterations
);
let _ = writeln!(&mut markdown);
let _ = writeln!(&mut markdown, "## Transport RTT");
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"| Channel | Payload (bytes) | Mean RTT (ms) | P50 | P95 |"
);
let _ = writeln!(&mut markdown, "| --- | ---: | ---: | ---: | ---: |");
for transport in &self.transport_rtt {
let _ = writeln!(
&mut markdown,
"| `{}` | {} | {} | {} | {} |",
transport.channel,
transport.payload_bytes,
format_ms(transport.stats.mean_ms),
format_ms(transport.stats.p50_ms),
format_ms(transport.stats.p95_ms),
);
}
let _ = writeln!(&mut markdown, "## Control Matrix");
let _ = writeln!(&mut markdown);
for row in self.control_matrix() {
let _ = writeln!(
&mut markdown,
"- Workload `{}`: runtimes {}, modes {}, scenarios {}",
row.workload,
format_label_list(&row.runtimes),
format_label_list(&row.modes),
format_label_list(&row.scenario_ids),
);
}
let _ = writeln!(&mut markdown);
let _ = writeln!(&mut markdown, "## Scenario Summary");
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"| Scenario | Workload | Runtime | Mode | Fixture | Cache | Mean wall (ms) | Mean context (ms) | Mean startup (ms) | Mean guest exec (ms) | Mean completion (ms) | Mean startup overhead (ms) |"
);
let _ = writeln!(
&mut markdown,
"| --- | --- | --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |"
);
for scenario in &self.scenarios {
let guest_execution_mean = scenario
.phase_stats
.guest_execution_ms
.as_ref()
.map(|stats| format_ms(stats.mean_ms))
.unwrap_or_else(|| String::from("n/a"));
let startup_overhead_mean = scenario
.startup_overhead_stats
.as_ref()
.map(|stats| format_ms(stats.mean_ms))
.unwrap_or_else(|| String::from("n/a"));
let _ = writeln!(
&mut markdown,
"| `{}` | `{}` | `{}` | `{}` | {} | {} | {} | {} | {} | {} | {} | {} |",
scenario.id,
scenario.workload,
scenario.runtime,
scenario.mode,
scenario.fixture,
scenario.compile_cache,
format_ms(scenario.wall_stats.mean_ms),
format_ms(scenario.phase_stats.context_setup_ms.mean_ms),
format_ms(scenario.phase_stats.startup_ms.mean_ms),
guest_execution_mean,
format_ms(scenario.phase_stats.completion_ms.mean_ms),
startup_overhead_mean,
);
}
let _ = writeln!(&mut markdown);
let _ = writeln!(&mut markdown, "## Stability And Resource Summary");
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"| Scenario | Wall P50 (ms) | Wall min-max (ms) | Wall stddev (ms) | Mean RSS (MiB) | Mean heap (MiB) | Mean total CPU (ms) |"
);
let _ = writeln!(
&mut markdown,
"| --- | ---: | --- | ---: | ---: | ---: | ---: |"
);
for scenario in &self.scenarios {
let _ = writeln!(
&mut markdown,
"| `{}` | {} | {}-{} | {} | {} | {} | {} |",
scenario.id,
format_ms(scenario.wall_stats.p50_ms),
format_ms(scenario.wall_stats.min_ms),
format_ms(scenario.wall_stats.max_ms),
format_ms(scenario.wall_stats.stddev_ms),
scenario
.resource_usage_stats
.as_ref()
.and_then(|stats| stats.rss_bytes.as_ref())
.map(|stats| format_mib(bytes_to_mib(stats.mean)))
.unwrap_or_else(|| String::from("n/a")),
scenario
.resource_usage_stats
.as_ref()
.and_then(|stats| stats.heap_used_bytes.as_ref())
.map(|stats| format_mib(bytes_to_mib(stats.mean)))
.unwrap_or_else(|| String::from("n/a")),
scenario
.resource_usage_stats
.as_ref()
.and_then(|stats| stats.cpu_total_us.as_ref())
.map(|stats| format_ms(micros_to_ms(stats.mean)))
.unwrap_or_else(|| String::from("n/a")),
);
}
let _ = writeln!(&mut markdown);
let _ = writeln!(&mut markdown, "## Ranked Hotspots");
let _ = writeln!(&mut markdown);
for ranking in self.hotspot_rankings() {
let _ = writeln!(
&mut markdown,
"### {} (`{}`, `{}`)",
ranking.label, ranking.dimension, ranking.unit
);
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"| Rank | Scenario | Workload | Runtime | Mode | Value |"
);
let _ = writeln!(&mut markdown, "| ---: | --- | --- | --- | --- | ---: |");
for scenario in &ranking.ranked_scenarios {
let _ = writeln!(
&mut markdown,
"| {} | `{}` | `{}` | `{}` | `{}` | {} |",
scenario.rank,
scenario.id,
scenario.workload,
scenario.runtime,
scenario.mode,
format_hotspot_value(ranking.unit, scenario.value),
);
}
if !ranking.scenarios_without_metric.is_empty() {
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"Missing metric for: {}",
format_string_label_list(&ranking.scenarios_without_metric),
);
}
let _ = writeln!(&mut markdown);
}
let _ = writeln!(&mut markdown, "## Hotspot Guidance");
let _ = writeln!(&mut markdown);
for line in self.guidance_lines() {
let _ = writeln!(&mut markdown, "- {line}");
}
if let Some(comparison) = comparison {
let _ = writeln!(&mut markdown);
let _ = writeln!(&mut markdown, "## Baseline Comparison");
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"- Baseline artifact: `{}`",
comparison.baseline.path.display()
);
let _ = writeln!(
&mut markdown,
"- Baseline generated at unix ms: `{}`",
comparison.baseline.generated_at_unix_ms
);
let _ = writeln!(
&mut markdown,
"- Compared scenarios: `{}`",
comparison.summary.compared_scenario_count
);
if let Some(improvement) = &comparison.summary.largest_wall_improvement {
let _ = writeln!(
&mut markdown,
"- Largest wall-time improvement: `{}` at {} ({})",
improvement.id,
format_delta_ms(improvement.delta_ms),
format_delta_pct(improvement.delta_pct),
);
}
if let Some(regression) = &comparison.summary.largest_wall_regression {
let _ = writeln!(
&mut markdown,
"- Largest wall-time regression: `{}` at {} ({})",
regression.id,
format_delta_ms(regression.delta_ms),
format_delta_pct(regression.delta_pct),
);
}
if !comparison.scenarios_missing_from_baseline.is_empty() {
let _ = writeln!(
&mut markdown,
"- Scenarios missing from baseline: {}",
comparison.scenarios_missing_from_baseline.join(", ")
);
}
if !comparison.baseline_only_scenarios.is_empty() {
let _ = writeln!(
&mut markdown,
"- Baseline-only scenarios: {}",
comparison.baseline_only_scenarios.join(", ")
);
}
let _ = writeln!(&mut markdown);
let _ = writeln!(
&mut markdown,
"| Scenario | Wall delta (ms) | Wall delta % | Import delta (ms) | Startup delta (ms) | Context delta (ms) | Completion delta (ms) |"
);
let _ = writeln!(
&mut markdown,
"| --- | ---: | ---: | ---: | ---: | ---: | ---: |"
);
for scenario in &comparison.scenario_deltas {
let import_delta = scenario
.guest_import_mean_ms
.as_ref()
.map(|delta| format_delta_ms(delta.delta_ms))
.unwrap_or_else(|| String::from("n/a"));
let startup_delta = scenario
.startup_overhead_mean_ms
.as_ref()
.map(|delta| format_delta_ms(delta.delta_ms))
.unwrap_or_else(|| String::from("n/a"));
let context_delta = scenario
.phase_mean_ms
.as_ref()
.map(|delta| format_delta_ms(delta.context_setup_ms.delta_ms))
.unwrap_or_else(|| String::from("n/a"));
let completion_delta = scenario
.phase_mean_ms
.as_ref()
.map(|delta| format_delta_ms(delta.completion_ms.delta_ms))
.unwrap_or_else(|| String::from("n/a"));
let _ = writeln!(
&mut markdown,
"| `{}` | {} | {} | {} | {} | {} | {} |",
scenario.id,
format_delta_ms(scenario.wall_mean_ms.delta_ms),
format_delta_pct(scenario.wall_mean_ms.delta_pct),
import_delta,
startup_delta,
context_delta,
completion_delta,
);
}
}
let _ = writeln!(&mut markdown);
let _ = writeln!(&mut markdown, "## Raw Samples");
let _ = writeln!(&mut markdown);
for scenario in &self.scenarios {
let _ = writeln!(&mut markdown, "### `{}`", scenario.id);
let _ = writeln!(&mut markdown, "- Workload: `{}`", scenario.workload);
let _ = writeln!(&mut markdown, "- Runtime: `{}`", scenario.runtime);
let _ = writeln!(&mut markdown, "- Mode: `{}`", scenario.mode);
let _ = writeln!(&mut markdown, "- Description: {}", scenario.description);
let _ = writeln!(
&mut markdown,
"- Wall samples (ms): {}",
format_sample_list(&scenario.wall_samples_ms)
);
if let Some(samples) = &scenario.guest_import_samples_ms {
let _ = writeln!(
&mut markdown,
"- Guest import samples (ms): {}",
format_sample_list(samples)
);
}
if let Some(samples) = &scenario.startup_overhead_samples_ms {
let _ = writeln!(
&mut markdown,
"- Startup overhead samples (ms): {}",
format_sample_list(samples)
);
}
let _ = writeln!(
&mut markdown,
"- Context setup samples (ms): {}",
format_sample_list(&scenario.phase_samples_ms.context_setup_ms)
);
let _ = writeln!(
&mut markdown,
"- Startup samples (ms): {}",
format_sample_list(&scenario.phase_samples_ms.startup_ms)
);
if let Some(samples) = &scenario.phase_samples_ms.guest_execution_ms {
let _ = writeln!(
&mut markdown,
"- Guest execution samples (ms): {}",
format_sample_list(samples)
);
}
let _ = writeln!(
&mut markdown,
"- Completion samples (ms): {}",
format_sample_list(&scenario.phase_samples_ms.completion_ms)
);
if let Some(samples) = &scenario.resource_usage_samples {
if let Some(rss_samples) = &samples.rss_bytes {
let _ = writeln!(
&mut markdown,
"- RSS samples (MiB): {}",
format_scaled_sample_list(rss_samples, bytes_to_mib)
);
}
if let Some(heap_samples) = &samples.heap_used_bytes {
let _ = writeln!(
&mut markdown,
"- Heap samples (MiB): {}",
format_scaled_sample_list(heap_samples, bytes_to_mib)
);
}
if let Some(cpu_samples) = &samples.cpu_total_us {
let _ = writeln!(
&mut markdown,
"- Total CPU samples (ms): {}",
format_scaled_sample_list(cpu_samples, micros_to_ms)
);
}
}
let _ = writeln!(&mut markdown);
}
markdown
}
pub fn render_json(&self) -> Result<String, serde_json::Error> {
self.render_json_with_comparison(None)
}
pub fn render_json_with_comparison(
&self,
comparison: Option<&BenchmarkComparison>,
) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(&self.json_artifact(comparison))
}
pub fn write_artifacts(
&self,
output_dir: &Path,
) -> Result<JavascriptBenchmarkArtifactPaths, JavascriptBenchmarkError> {
self.write_artifacts_with_comparison(output_dir, None)
}
pub fn write_artifacts_with_comparison(
&self,
output_dir: &Path,
comparison: Option<&BenchmarkComparison>,
) -> Result<JavascriptBenchmarkArtifactPaths, JavascriptBenchmarkError> {
fs::create_dir_all(output_dir)?;
let markdown_path = output_dir.join("report.md");
let json_path = output_dir.join("report.json");
write_string_atomic(
&markdown_path,
&self.render_markdown_with_comparison(comparison),
)?;
write_string_atomic(&json_path, &self.render_json_with_comparison(comparison)?)?;
Ok(JavascriptBenchmarkArtifactPaths {
markdown_path,
json_path,
})
}
pub fn compare_to_baseline_path(
&self,
baseline_path: &Path,
) -> Result<BenchmarkComparison, JavascriptBenchmarkError> {
let baseline = load_benchmark_artifact(baseline_path)?;
Ok(BenchmarkComparison::from_reports(
self,
baseline_path,
&baseline,
))
}
fn guidance_lines(&self) -> Vec<String> {
let isolate = self.scenario("isolate-startup");
let cold_local = self.scenario("cold-local-import");
let warm_local = self.scenario("warm-local-import");
let prewarmed_local = self.scenario("prewarmed-local-import");
let builtin = self.scenario("builtin-import");
let large = self.scenario("large-package-import");
let mut guidance = Vec::new();
if let (
Some(cold_import),
Some(warm_import),
Some(warm_context),
Some(warm_startup_phase),
Some(warm_completion),
Some(warm_startup_overhead),
Some(warm_wall),
Some(isolate_wall),
) = (
cold_local
.and_then(|scenario| scenario.guest_import_stats.as_ref())
.map(|stats| stats.mean_ms),
warm_local
.and_then(|scenario| scenario.guest_import_stats.as_ref())
.map(|stats| stats.mean_ms),
warm_local.map(|scenario| scenario.phase_stats.context_setup_ms.mean_ms),
warm_local.map(|scenario| scenario.phase_stats.startup_ms.mean_ms),
warm_local.map(|scenario| scenario.phase_stats.completion_ms.mean_ms),
warm_local
.and_then(|scenario| scenario.startup_overhead_stats.as_ref())
.map(|stats| stats.mean_ms),
warm_local.map(|scenario| scenario.wall_stats.mean_ms),
isolate.map(|scenario| scenario.wall_stats.mean_ms),
) {
guidance.push(format!(
"Compile-cache reuse cuts the local import graph from {} to {} on average ({:.1}% faster), but the warm path still spends {} outside guest module evaluation. That keeps startup prewarm work in `ARC-021D` and sidecar warm-pool/snapshot work in `ARC-022` on the critical path above the `{}` empty-isolate floor.",
format_ms(cold_import),
format_ms(warm_import),
percentage_reduction(cold_import, warm_import),
format_ms(warm_startup_overhead),
format_ms(isolate_wall),
));
if warm_wall > 0.0 {
guidance.push(format!(
"Warm local imports still spend {:.1}% of wall time in process startup, wrapper evaluation, and stdio handling instead of guest import work. Optimizations that only touch module compilation will not remove that floor.",
percentage_share(warm_startup_overhead, warm_wall),
));
}
let warm_guest = warm_local
.and_then(|scenario| scenario.phase_stats.guest_execution_ms.as_ref())
.map(|stats| stats.mean_ms)
.unwrap_or(0.0);
guidance.push(format!(
"The warm path phase split is {} context setup, {} runtime startup, {} guest execution, and {} completion/stdio work. Future attribution can now separate bootstrap wins from pure transport/collection wins instead of treating them as one startup bucket.",
format_ms(warm_context),
format_ms(warm_startup_phase),
format_ms(warm_guest),
format_ms(warm_completion),
));
}
if let (Some(warm_startup_overhead), Some(prewarmed_startup_overhead), Some(isolate_wall)) = (
warm_local
.and_then(|scenario| scenario.startup_overhead_stats.as_ref())
.map(|stats| stats.mean_ms),
prewarmed_local
.and_then(|scenario| scenario.startup_overhead_stats.as_ref())
.map(|stats| stats.mean_ms),
isolate.map(|scenario| scenario.wall_stats.mean_ms),
) {
guidance.push(format!(
"Keeping the current import-cache materialization and builtin/polyfill prewarm alive inside one execution engine cuts warm local startup overhead from {} to {} ({:.1}% faster). The remaining {} of non-import work is the post-prewarm floor that broader warm-pool/snapshot work would still need to attack above the `{}` empty-isolate baseline.",
format_ms(warm_startup_overhead),
format_ms(prewarmed_startup_overhead),
percentage_reduction(warm_startup_overhead, prewarmed_startup_overhead),
format_ms(prewarmed_startup_overhead),
format_ms(isolate_wall),
));
}
if let (Some(builtin_import), Some(large_import)) = (
builtin
.and_then(|scenario| scenario.guest_import_stats.as_ref())
.map(|stats| stats.mean_ms),
large
.and_then(|scenario| scenario.guest_import_stats.as_ref())
.map(|stats| stats.mean_ms),
) {
guidance.push(format!(
"The large real-world package import (`typescript`) is {:.1}x the builtin path ({} versus {}). That makes `ARC-021C` the right next import-path optimization story: cache sidecar-scoped resolution results, package-type lookups, and module-format classification before attempting deeper structural rewrites.",
safe_ratio(large_import, builtin_import),
format_ms(large_import),
format_ms(builtin_import),
));
}
if let (Some(smallest), Some(largest)) =
(self.transport_rtt.first(), self.transport_rtt.last())
{
guidance.push(format!(
"Execution-transport RTT over the stdio bridge rises from {} at {} bytes to {} at {} bytes. That gives later work a direct transport floor to compare against the larger startup and import phases.",
format_ms(smallest.stats.mean_ms),
smallest.payload_bytes,
format_ms(largest.stats.mean_ms),
largest.payload_bytes,
));
}
if let Some(noisiest) = self.scenarios.iter().max_by(|lhs, rhs| {
lhs.wall_stats
.stddev_ms
.total_cmp(&rhs.wall_stats.stddev_ms)
}) {
guidance.push(format!(
"Wall-time noise is now surfaced directly in the same artifact set: `{}` currently shows the largest spread at {} stddev over a {}-{} wall range, so future deltas on that path should be judged against stability as well as mean time.",
noisiest.id,
format_ms(noisiest.wall_stats.stddev_ms),
format_ms(noisiest.wall_stats.min_ms),
format_ms(noisiest.wall_stats.max_ms),
));
}
if let Some(heaviest) = self.scenarios.iter().max_by(|lhs, rhs| {
lhs.resource_usage_stats
.as_ref()
.and_then(|stats| stats.rss_bytes.as_ref())
.map(|stats| stats.mean)
.unwrap_or(f64::NEG_INFINITY)
.total_cmp(
&rhs.resource_usage_stats
.as_ref()
.and_then(|stats| stats.rss_bytes.as_ref())
.map(|stats| stats.mean)
.unwrap_or(f64::NEG_INFINITY),
)
}) {
if let Some(rss_mean) = heaviest
.resource_usage_stats
.as_ref()
.and_then(|stats| stats.rss_bytes.as_ref())
{
guidance.push(format!(
"Per-scenario resource reporting is now attached to the benchmark rows themselves: `{}` currently has the highest mean RSS at {} MiB, so import-path changes can now be judged for memory regressions without a separate memory-only pass.",
heaviest.id,
format_mib(bytes_to_mib(rss_mean.mean)),
));
}
}
guidance.push(String::from(
"No new PRD stories were added from this run. The measured hotspots already map cleanly onto existing follow-ons: `ARC-021C` for safe resolution and metadata caches, `ARC-021D` for builtin/polyfill prewarm, and `ARC-022` for broader warm-pool and timing-mitigation execution work.",
));
guidance
}
fn scenario(&self, id: &str) -> Option<&BenchmarkScenarioReport> {
self.scenarios.iter().find(|scenario| scenario.id == id)
}
fn json_artifact<'a>(
&'a self,
comparison: Option<&'a BenchmarkComparison>,
) -> JavascriptBenchmarkArtifact<'a> {
JavascriptBenchmarkArtifact {
artifact_version: BENCHMARK_ARTIFACT_VERSION,
generated_at_unix_ms: self.generated_at_unix_ms,
command: format!(
"cargo run -p agent-os-execution --bin node-import-bench -- --iterations {} --warmup-iterations {}",
self.config.iterations, self.config.warmup_iterations
),
config: &self.config,
host: &self.host,
repo_root: &self.repo_root,
summary: self.summary(),
comparison,
transport_rtt: self
.transport_rtt
.iter()
.map(|transport| BenchmarkTransportRttArtifact {
channel: transport.channel,
payload_bytes: transport.payload_bytes,
samples_ms: &transport.samples_ms,
stats: &transport.stats,
})
.collect(),
scenarios: self
.scenarios
.iter()
.map(|scenario| BenchmarkScenarioArtifact {
id: scenario.id,
workload: scenario.workload,
runtime: scenario.runtime,
mode: scenario.mode,
description: scenario.description,
fixture: scenario.fixture,
compile_cache: scenario.compile_cache,
wall_samples_ms: &scenario.wall_samples_ms,
wall_stats: &scenario.wall_stats,
guest_import_samples_ms: scenario.guest_import_samples_ms.as_deref(),
guest_import_stats: scenario.guest_import_stats.as_ref(),
startup_overhead_samples_ms: scenario.startup_overhead_samples_ms.as_deref(),
startup_overhead_stats: scenario.startup_overhead_stats.as_ref(),
mean_startup_share_pct: scenario.mean_startup_share_pct(),
phase_samples_ms: &scenario.phase_samples_ms,
phase_stats: &scenario.phase_stats,
resource_usage_samples: scenario.resource_usage_samples.as_ref(),
resource_usage_stats: scenario.resource_usage_stats.as_ref(),
})
.collect(),
}
}
fn summary(&self) -> BenchmarkSummaryArtifact<'_> {
BenchmarkSummaryArtifact {
scenario_count: self.scenarios.len(),
recorded_samples_per_scenario: self.config.iterations,
warmup_iterations: self.config.warmup_iterations,
control_matrix: self.control_matrix(),
slowest_wall_scenario: self.slowest_scenario_by(|scenario| scenario.wall_stats.mean_ms),
slowest_guest_import_scenario: self.slowest_scenario_by(|scenario| {
scenario
.guest_import_stats
.as_ref()
.map(|stats| stats.mean_ms)
.unwrap_or(f64::NEG_INFINITY)
}),
highest_startup_share_scenario: self.scenarios.iter().max_by(|lhs, rhs| {
lhs.mean_startup_share_pct()
.unwrap_or(f64::NEG_INFINITY)
.total_cmp(&rhs.mean_startup_share_pct().unwrap_or(f64::NEG_INFINITY))
}),
hotspot_rankings: self.hotspot_rankings(),
guidance_lines: self.guidance_lines(),
}
}
fn control_matrix(&self) -> Vec<BenchmarkControlMatrixArtifact<'_>> {
let mut rows = Vec::new();
let mut row_indexes = BTreeMap::new();
for scenario in &self.scenarios {
let row_index = *row_indexes.entry(scenario.workload).or_insert_with(|| {
rows.push(BenchmarkControlMatrixArtifact {
workload: scenario.workload,
runtimes: Vec::new(),
modes: Vec::new(),
scenario_ids: Vec::new(),
});
rows.len() - 1
});
let row = &mut rows[row_index];
push_unique_label(&mut row.runtimes, scenario.runtime);
push_unique_label(&mut row.modes, scenario.mode);
row.scenario_ids.push(scenario.id);
}
rows
}
fn slowest_scenario_by(
&self,
value: impl Fn(&BenchmarkScenarioReport) -> f64,
) -> Option<&BenchmarkScenarioReport> {
self.scenarios
.iter()
.max_by(|lhs, rhs| value(lhs).total_cmp(&value(rhs)))
}
fn hotspot_rankings(&self) -> Vec<BenchmarkHotspotRankingArtifact<'_>> {
HOTSPOT_METRICS
.iter()
.map(|metric| {
let mut ranked_scenarios = self
.scenarios
.iter()
.filter_map(|scenario| {
(metric.value)(scenario).map(|value| BenchmarkHotspotScenarioArtifact {
rank: 0,
id: scenario.id,
workload: scenario.workload,
runtime: scenario.runtime,
mode: scenario.mode,
value,
})
})
.collect::<Vec<_>>();
ranked_scenarios.sort_by(|lhs, rhs| {
rhs.value
.total_cmp(&lhs.value)
.then_with(|| lhs.id.cmp(rhs.id))
});
for (index, scenario) in ranked_scenarios.iter_mut().enumerate() {
scenario.rank = index + 1;
}
BenchmarkHotspotRankingArtifact {
metric: metric.metric,
label: metric.label,
dimension: metric.dimension,
unit: metric.unit,
ranked_scenarios,
scenarios_without_metric: self
.scenarios
.iter()
.filter(|scenario| (metric.value)(scenario).is_none())
.map(|scenario| scenario.id)
.collect(),
}
})
.collect()
}
}
impl BenchmarkScenarioReport {
fn mean_startup_share_pct(&self) -> Option<f64> {
let startup_mean = self.startup_overhead_stats.as_ref()?.mean_ms;
let wall_mean = self.wall_stats.mean_ms;
if wall_mean <= 0.0 {
Some(0.0)
} else {
Some((startup_mean / wall_mean) * 100.0)
}
}
fn wall_range_ms(&self) -> f64 {
self.wall_stats.max_ms - self.wall_stats.min_ms
}
}
impl BenchmarkResourceUsage<Vec<f64>> {
fn push_sample(&mut self, sample: &BenchmarkResourceUsage<f64>) {
push_optional_sample(&mut self.rss_bytes, sample.rss_bytes);
push_optional_sample(&mut self.heap_used_bytes, sample.heap_used_bytes);
push_optional_sample(&mut self.cpu_user_us, sample.cpu_user_us);
push_optional_sample(&mut self.cpu_system_us, sample.cpu_system_us);
push_optional_sample(&mut self.cpu_total_us, sample.cpu_total_us);
}
fn into_populated(self) -> Option<Self> {
(!self.is_empty()).then_some(self)
}
}
impl<T> BenchmarkResourceUsage<T> {
fn is_empty(&self) -> bool {
self.rss_bytes.is_none()
&& self.heap_used_bytes.is_none()
&& self.cpu_user_us.is_none()
&& self.cpu_system_us.is_none()
&& self.cpu_total_us.is_none()
}
}
impl BenchmarkComparison {
fn from_reports(
current: &JavascriptBenchmarkReport,
baseline_path: &Path,
baseline: &StoredBenchmarkArtifact,
) -> Self {
let baseline_path =
fs::canonicalize(baseline_path).unwrap_or_else(|_| baseline_path.to_path_buf());
let baseline_by_id = baseline
.scenarios
.iter()
.map(|scenario| (scenario.id.as_str(), scenario))
.collect::<BTreeMap<_, _>>();
let mut scenario_deltas = Vec::new();
let mut scenarios_missing_from_baseline = Vec::new();
for scenario in ¤t.scenarios {
if let Some(baseline_scenario) = baseline_by_id.get(scenario.id) {
scenario_deltas.push(BenchmarkScenarioDelta {
id: scenario.id.to_owned(),
description: scenario.description.to_owned(),
wall_mean_ms: BenchmarkMetricDelta::from_means(
baseline_scenario.wall_stats.mean_ms,
scenario.wall_stats.mean_ms,
),
guest_import_mean_ms: match (
baseline_scenario.guest_import_stats.as_ref(),
scenario.guest_import_stats.as_ref(),
) {
(Some(baseline_stats), Some(current_stats)) => {
Some(BenchmarkMetricDelta::from_means(
baseline_stats.mean_ms,
current_stats.mean_ms,
))
}
_ => None,
},
startup_overhead_mean_ms: match (
baseline_scenario.startup_overhead_stats.as_ref(),
scenario.startup_overhead_stats.as_ref(),
) {
(Some(baseline_stats), Some(current_stats)) => {
Some(BenchmarkMetricDelta::from_means(
baseline_stats.mean_ms,
current_stats.mean_ms,
))
}
_ => None,
},
phase_mean_ms: match (
baseline_scenario.phase_stats.as_ref(),
Some(&scenario.phase_stats),
) {
(Some(baseline_phase), Some(current_phase)) => {
Some(BenchmarkScenarioPhases {
context_setup_ms: BenchmarkMetricDelta::from_means(
baseline_phase.context_setup_ms.mean_ms,
current_phase.context_setup_ms.mean_ms,
),
startup_ms: BenchmarkMetricDelta::from_means(
baseline_phase.startup_ms.mean_ms,
current_phase.startup_ms.mean_ms,
),
guest_execution_ms: match (
baseline_phase.guest_execution_ms.as_ref(),
current_phase.guest_execution_ms.as_ref(),
) {
(Some(baseline_stats), Some(current_stats)) => {
Some(BenchmarkMetricDelta::from_means(
baseline_stats.mean_ms,
current_stats.mean_ms,
))
}
_ => None,
},
completion_ms: BenchmarkMetricDelta::from_means(
baseline_phase.completion_ms.mean_ms,
current_phase.completion_ms.mean_ms,
),
})
}
_ => None,
},
});
} else {
scenarios_missing_from_baseline.push(scenario.id.to_owned());
}
}
let current_ids = current
.scenarios
.iter()
.map(|scenario| (scenario.id, ()))
.collect::<BTreeMap<_, _>>();
let baseline_only_scenarios = baseline
.scenarios
.iter()
.filter_map(|scenario| {
(!current_ids.contains_key(scenario.id.as_str())).then(|| scenario.id.clone())
})
.collect::<Vec<_>>();
let largest_wall_improvement = scenario_deltas
.iter()
.filter(|scenario| scenario.wall_mean_ms.delta_ms < 0.0)
.min_by(|lhs, rhs| {
lhs.wall_mean_ms
.delta_ms
.total_cmp(&rhs.wall_mean_ms.delta_ms)
})
.map(BenchmarkDeltaHighlight::from_wall_delta);
let largest_wall_regression = scenario_deltas
.iter()
.filter(|scenario| scenario.wall_mean_ms.delta_ms > 0.0)
.max_by(|lhs, rhs| {
lhs.wall_mean_ms
.delta_ms
.total_cmp(&rhs.wall_mean_ms.delta_ms)
})
.map(BenchmarkDeltaHighlight::from_wall_delta);
Self {
baseline: BenchmarkComparisonBaseline {
artifact_version: baseline.artifact_version,
generated_at_unix_ms: baseline.generated_at_unix_ms,
path: baseline_path,
},
summary: BenchmarkComparisonSummary {
compared_scenario_count: scenario_deltas.len(),
largest_wall_improvement,
largest_wall_regression,
},
scenario_deltas,
scenarios_missing_from_baseline,
baseline_only_scenarios,
}
}
}
impl BenchmarkDeltaHighlight {
fn from_wall_delta(delta: &BenchmarkScenarioDelta) -> Self {
Self {
id: delta.id.clone(),
delta_ms: delta.wall_mean_ms.delta_ms,
delta_pct: delta.wall_mean_ms.delta_pct,
}
}
}
impl BenchmarkMetricDelta {
fn from_means(baseline_ms: f64, current_ms: f64) -> Self {
let delta_ms = current_ms - baseline_ms;
let delta_pct = if baseline_ms <= 0.0 {
0.0
} else {
(delta_ms / baseline_ms) * 100.0
};
Self {
baseline_ms,
current_ms,
delta_ms,
delta_pct,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct JavascriptBenchmarkArtifactPaths {
pub markdown_path: PathBuf,
pub json_path: PathBuf,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct JavascriptBenchmarkRunOutput {
pub artifact_paths: JavascriptBenchmarkArtifactPaths,
pub resumed_stage_count: usize,
}
#[derive(Debug, Serialize)]
struct JavascriptBenchmarkArtifact<'a> {
artifact_version: u32,
generated_at_unix_ms: u128,
command: String,
config: &'a JavascriptBenchmarkConfig,
host: &'a BenchmarkHost,
repo_root: &'a Path,
summary: BenchmarkSummaryArtifact<'a>,
#[serde(skip_serializing_if = "Option::is_none")]
comparison: Option<&'a BenchmarkComparison>,
transport_rtt: Vec<BenchmarkTransportRttArtifact<'a>>,
scenarios: Vec<BenchmarkScenarioArtifact<'a>>,
}
#[derive(Debug, Serialize)]
struct BenchmarkSummaryArtifact<'a> {
scenario_count: usize,
recorded_samples_per_scenario: usize,
warmup_iterations: usize,
control_matrix: Vec<BenchmarkControlMatrixArtifact<'a>>,
#[serde(skip_serializing_if = "Option::is_none")]
slowest_wall_scenario: Option<&'a BenchmarkScenarioReport>,
#[serde(skip_serializing_if = "Option::is_none")]
slowest_guest_import_scenario: Option<&'a BenchmarkScenarioReport>,
#[serde(skip_serializing_if = "Option::is_none")]
highest_startup_share_scenario: Option<&'a BenchmarkScenarioReport>,
hotspot_rankings: Vec<BenchmarkHotspotRankingArtifact<'a>>,
guidance_lines: Vec<String>,
}
#[derive(Debug, Serialize)]
struct BenchmarkScenarioArtifact<'a> {
id: &'static str,
workload: &'static str,
runtime: &'static str,
mode: &'static str,
description: &'static str,
fixture: &'static str,
compile_cache: &'static str,
wall_samples_ms: &'a [f64],
wall_stats: &'a BenchmarkStats,
#[serde(skip_serializing_if = "Option::is_none")]
guest_import_samples_ms: Option<&'a [f64]>,
#[serde(skip_serializing_if = "Option::is_none")]
guest_import_stats: Option<&'a BenchmarkStats>,
#[serde(skip_serializing_if = "Option::is_none")]
startup_overhead_samples_ms: Option<&'a [f64]>,
#[serde(skip_serializing_if = "Option::is_none")]
startup_overhead_stats: Option<&'a BenchmarkStats>,
#[serde(skip_serializing_if = "Option::is_none")]
mean_startup_share_pct: Option<f64>,
phase_samples_ms: &'a BenchmarkScenarioPhases<Vec<f64>>,
phase_stats: &'a BenchmarkScenarioPhases<BenchmarkStats>,
#[serde(skip_serializing_if = "Option::is_none")]
resource_usage_samples: Option<&'a BenchmarkResourceUsage<Vec<f64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
resource_usage_stats: Option<&'a BenchmarkResourceUsage<BenchmarkDistributionStats>>,
}
#[derive(Debug, Serialize)]
struct BenchmarkControlMatrixArtifact<'a> {
workload: &'a str,
runtimes: Vec<&'a str>,
modes: Vec<&'a str>,
scenario_ids: Vec<&'a str>,
}
#[derive(Debug, Serialize)]
struct BenchmarkTransportRttArtifact<'a> {
channel: &'static str,
payload_bytes: usize,
samples_ms: &'a [f64],
stats: &'a BenchmarkStats,
}
#[derive(Debug, Serialize)]
struct BenchmarkHotspotRankingArtifact<'a> {
metric: &'static str,
label: &'static str,
dimension: &'static str,
unit: &'static str,
ranked_scenarios: Vec<BenchmarkHotspotScenarioArtifact<'a>>,
#[serde(skip_serializing_if = "Vec::is_empty")]
scenarios_without_metric: Vec<&'a str>,
}
#[derive(Debug, Serialize)]
struct BenchmarkHotspotScenarioArtifact<'a> {
rank: usize,
id: &'a str,
workload: &'a str,
runtime: &'a str,
mode: &'a str,
value: f64,
}
struct HotspotMetricDefinition {
metric: &'static str,
label: &'static str,
dimension: &'static str,
unit: &'static str,
value: fn(&BenchmarkScenarioReport) -> Option<f64>,
}
const HOTSPOT_METRICS: [HotspotMetricDefinition; 13] = [
HotspotMetricDefinition {
metric: "wall_mean_ms",
label: "Wall Time",
dimension: "time",
unit: "ms",
value: hotspot_wall_mean_ms,
},
HotspotMetricDefinition {
metric: "wall_stddev_ms",
label: "Wall Time Stddev",
dimension: "stability",
unit: "ms",
value: hotspot_wall_stddev_ms,
},
HotspotMetricDefinition {
metric: "wall_range_ms",
label: "Wall Time Range",
dimension: "stability",
unit: "ms",
value: hotspot_wall_range_ms,
},
HotspotMetricDefinition {
metric: "guest_import_mean_ms",
label: "Guest Import Time",
dimension: "time",
unit: "ms",
value: hotspot_guest_import_mean_ms,
},
HotspotMetricDefinition {
metric: "startup_overhead_mean_ms",
label: "Startup Overhead",
dimension: "time",
unit: "ms",
value: hotspot_startup_overhead_mean_ms,
},
HotspotMetricDefinition {
metric: "context_setup_mean_ms",
label: "Context Setup Phase",
dimension: "time",
unit: "ms",
value: hotspot_context_setup_mean_ms,
},
HotspotMetricDefinition {
metric: "startup_phase_mean_ms",
label: "Runtime Startup Phase",
dimension: "time",
unit: "ms",
value: hotspot_startup_phase_mean_ms,
},
HotspotMetricDefinition {
metric: "guest_execution_mean_ms",
label: "Guest Execution Phase",
dimension: "time",
unit: "ms",
value: hotspot_guest_execution_mean_ms,
},
HotspotMetricDefinition {
metric: "completion_mean_ms",
label: "Completion/Stdio Phase",
dimension: "time",
unit: "ms",
value: hotspot_completion_mean_ms,
},
HotspotMetricDefinition {
metric: "startup_share_pct",
label: "Startup Share Of Wall",
dimension: "share",
unit: "pct",
value: hotspot_startup_share_pct,
},
HotspotMetricDefinition {
metric: "rss_mean_mib",
label: "RSS",
dimension: "memory",
unit: "MiB",
value: hotspot_rss_mean_mib,
},
HotspotMetricDefinition {
metric: "heap_mean_mib",
label: "Heap Used",
dimension: "memory",
unit: "MiB",
value: hotspot_heap_mean_mib,
},
HotspotMetricDefinition {
metric: "cpu_total_mean_ms",
label: "Total CPU",
dimension: "cpu",
unit: "ms",
value: hotspot_total_cpu_mean_ms,
},
];
#[derive(Debug)]
pub enum JavascriptBenchmarkError {
InvalidConfig(&'static str),
InvalidWorkspaceRoot(PathBuf),
InvalidBaselineReport {
path: PathBuf,
message: String,
},
Io(std::io::Error),
Utf8(std::string::FromUtf8Error),
Execution(JavascriptExecutionError),
NodeVersion(std::io::Error),
MissingBenchmarkMetric(&'static str),
InvalidBenchmarkMetric {
scenario: &'static str,
raw_value: String,
},
TransportProbeTimeout {
payload_bytes: usize,
},
TransportProbeExited {
exit_code: i32,
stderr: String,
},
InvalidTransportProbeResponse {
payload_bytes: usize,
expected: String,
actual: String,
},
NonZeroExit {
scenario: &'static str,
exit_code: i32,
stderr: String,
},
}
impl fmt::Display for JavascriptBenchmarkError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidConfig(message) => write!(f, "invalid benchmark config: {message}"),
Self::InvalidWorkspaceRoot(path) => {
write!(
f,
"failed to resolve workspace root from execution crate path: {}",
path.display()
)
}
Self::InvalidBaselineReport { path, message } => {
write!(
f,
"failed to parse benchmark baseline artifact {}: {message}",
path.display()
)
}
Self::Io(err) => write!(f, "benchmark I/O failure: {err}"),
Self::Utf8(err) => write!(f, "benchmark output was not valid UTF-8: {err}"),
Self::Execution(err) => write!(f, "benchmark execution failed: {err}"),
Self::NodeVersion(err) => write!(f, "failed to query node version: {err}"),
Self::MissingBenchmarkMetric(scenario) => {
write!(
f,
"benchmark scenario `{scenario}` did not emit a metric marker"
)
}
Self::InvalidBenchmarkMetric {
scenario,
raw_value,
} => write!(
f,
"benchmark scenario `{scenario}` emitted an invalid metric: {raw_value}"
),
Self::TransportProbeTimeout { payload_bytes } => {
write!(
f,
"transport probe timed out waiting for {payload_bytes}-byte round-trip"
)
}
Self::TransportProbeExited { exit_code, stderr } => {
write!(f, "transport probe exited with code {exit_code}: {stderr}")
}
Self::InvalidTransportProbeResponse {
payload_bytes,
expected,
actual,
} => write!(
f,
"transport probe returned unexpected payload for {payload_bytes}-byte round-trip: expected {expected:?}, got {actual:?}"
),
Self::NonZeroExit {
scenario,
exit_code,
stderr,
} => write!(
f,
"benchmark scenario `{scenario}` exited with code {exit_code}: {stderr}"
),
}
}
}
impl std::error::Error for JavascriptBenchmarkError {}
impl From<std::io::Error> for JavascriptBenchmarkError {
fn from(err: std::io::Error) -> Self {
Self::Io(err)
}
}
impl From<std::string::FromUtf8Error> for JavascriptBenchmarkError {
fn from(err: std::string::FromUtf8Error) -> Self {
Self::Utf8(err)
}
}
impl From<serde_json::Error> for JavascriptBenchmarkError {
fn from(err: serde_json::Error) -> Self {
Self::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, err))
}
}
impl From<JavascriptExecutionError> for JavascriptBenchmarkError {
fn from(err: JavascriptExecutionError) -> Self {
Self::Execution(err)
}
}
pub fn run_javascript_benchmarks(
config: &JavascriptBenchmarkConfig,
) -> Result<JavascriptBenchmarkReport, JavascriptBenchmarkError> {
if config.iterations == 0 {
return Err(JavascriptBenchmarkError::InvalidConfig(
"iterations must be greater than zero",
));
}
let repo_root = workspace_root()?;
let host = benchmark_host()?;
let workspace = BenchmarkWorkspace::create(&repo_root)?;
let transport_rtt = measure_transport_rtt(&workspace, config)?;
let mut scenarios = Vec::new();
for scenario in benchmark_scenarios() {
scenarios.push(run_scenario(&workspace, config, scenario)?);
}
Ok(JavascriptBenchmarkReport {
generated_at_unix_ms: SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis(),
config: config.clone(),
host,
repo_root,
transport_rtt,
scenarios,
})
}
fn benchmark_artifact_dir(repo_root: &Path) -> PathBuf {
repo_root.join(BENCHMARK_ARTIFACT_DIR)
}
fn benchmark_run_state_path(artifact_dir: &Path) -> PathBuf {
artifact_dir.join(BENCHMARK_RUN_STATE_FILE)
}
fn load_benchmark_run_state(
state_path: &Path,
config: &JavascriptBenchmarkConfig,
host: &BenchmarkHost,
repo_root: &Path,
definitions: &[ScenarioDefinition],
) -> Result<StoredBenchmarkRunState, JavascriptBenchmarkError> {
match fs::read_to_string(state_path) {
Ok(raw) => match serde_json::from_str::<StoredBenchmarkRunState>(&raw) {
Ok(state) if state.is_compatible(config, host, repo_root) => {
Ok(state.sanitized(definitions))
}
Ok(_) | Err(_) => Ok(StoredBenchmarkRunState::new(config, host, repo_root)),
},
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
Ok(StoredBenchmarkRunState::new(config, host, repo_root))
}
Err(err) => Err(JavascriptBenchmarkError::Io(err)),
}
}
fn persist_benchmark_run_state(
state_path: &Path,
state: &StoredBenchmarkRunState,
) -> Result<(), JavascriptBenchmarkError> {
write_string_atomic(state_path, &serde_json::to_string_pretty(state)?)
}
fn write_string_atomic(path: &Path, contents: &str) -> Result<(), JavascriptBenchmarkError> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let temp_path = path.with_file_name(format!(
".{}.tmp-{}-{}",
path.file_name()
.and_then(|name| name.to_str())
.unwrap_or("artifact"),
std::process::id(),
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos()
));
fs::write(&temp_path, contents)?;
if let Err(err) = fs::rename(&temp_path, path) {
let _ = fs::remove_file(&temp_path);
return Err(JavascriptBenchmarkError::Io(err));
}
Ok(())
}
fn remove_file_if_exists(path: &Path) -> Result<(), JavascriptBenchmarkError> {
match fs::remove_file(path) {
Ok(()) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(err) => Err(JavascriptBenchmarkError::Io(err)),
}
}
fn current_unix_ms() -> u128 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis()
}
#[derive(Debug, Clone, Copy)]
struct ScenarioDefinition {
id: &'static str,
workload: &'static str,
runtime: ScenarioRuntime,
mode: ScenarioMode,
description: &'static str,
fixture: &'static str,
entrypoint: &'static str,
compile_cache: CompileCacheStrategy,
engine_reuse: EngineReuseStrategy,
expect_import_metric: bool,
env: ScenarioEnvironment,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CompileCacheStrategy {
Disabled,
Primed,
}
impl CompileCacheStrategy {
fn label(self) -> &'static str {
match self {
Self::Disabled => "disabled",
Self::Primed => "primed",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum EngineReuseStrategy {
FreshPerSample,
SharedAcrossScenario,
SharedContextAcrossScenario,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ScenarioEnvironment {
None,
ProjectedWorkspaceNodeModules,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ScenarioRuntime {
NativeExecution,
HostNode,
}
impl ScenarioRuntime {
fn label(self) -> &'static str {
match self {
Self::NativeExecution => "native-execution",
Self::HostNode => "host-node",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ScenarioMode {
BaselineControl,
TrueColdStart,
NewSessionReplay,
SameSessionReplay,
SameEngineReplay,
HostControl,
}
impl ScenarioMode {
fn label(self) -> &'static str {
match self {
Self::BaselineControl => "baseline-control",
Self::TrueColdStart => "true-cold-start",
Self::NewSessionReplay => "new-session-replay",
Self::SameSessionReplay => "same-session-replay",
Self::SameEngineReplay => "same-engine-replay",
Self::HostControl => "host-control",
}
}
}
#[derive(Debug)]
struct SampleMeasurement {
wall_ms: f64,
guest_import_ms: Option<f64>,
context_setup_ms: f64,
startup_ms: f64,
completion_ms: f64,
resource_usage: Option<BenchmarkResourceUsage<f64>>,
}
#[derive(Debug)]
struct BenchmarkWorkspace {
root: PathBuf,
repo_root: PathBuf,
}
#[derive(Debug, Deserialize)]
struct StoredBenchmarkArtifact {
artifact_version: u32,
generated_at_unix_ms: u128,
scenarios: Vec<StoredBenchmarkScenario>,
}
#[derive(Debug, Deserialize)]
struct StoredBenchmarkScenario {
id: String,
wall_stats: BenchmarkStats,
#[serde(default)]
guest_import_stats: Option<BenchmarkStats>,
#[serde(default)]
startup_overhead_stats: Option<BenchmarkStats>,
#[serde(default)]
phase_stats: Option<BenchmarkScenarioPhases<BenchmarkStats>>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
struct StoredBenchmarkRunHost {
node_binary: String,
node_version: String,
os: String,
arch: String,
logical_cpus: usize,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct StoredBenchmarkRunState {
artifact_version: u32,
config: JavascriptBenchmarkConfig,
host: StoredBenchmarkRunHost,
repo_root: PathBuf,
#[serde(default)]
transport_rtt: Option<Vec<StoredBenchmarkTransportRttReport>>,
#[serde(default)]
scenarios: Vec<StoredBenchmarkScenarioReport>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct StoredBenchmarkTransportRttReport {
payload_bytes: usize,
samples_ms: Vec<f64>,
stats: BenchmarkStats,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct StoredBenchmarkScenarioReport {
id: String,
wall_samples_ms: Vec<f64>,
wall_stats: BenchmarkStats,
#[serde(default)]
guest_import_samples_ms: Option<Vec<f64>>,
#[serde(default)]
guest_import_stats: Option<BenchmarkStats>,
#[serde(default)]
startup_overhead_samples_ms: Option<Vec<f64>>,
#[serde(default)]
startup_overhead_stats: Option<BenchmarkStats>,
phase_samples_ms: BenchmarkScenarioPhases<Vec<f64>>,
phase_stats: BenchmarkScenarioPhases<BenchmarkStats>,
#[serde(default)]
resource_usage_samples: Option<BenchmarkResourceUsage<Vec<f64>>>,
#[serde(default)]
resource_usage_stats: Option<BenchmarkResourceUsage<BenchmarkDistributionStats>>,
}
impl BenchmarkWorkspace {
fn create(repo_root: &Path) -> Result<Self, JavascriptBenchmarkError> {
let root = repo_root.join(format!(
".tmp-agent-os-execution-bench-{}-{}",
std::process::id(),
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos()
));
fs::create_dir_all(&root)?;
write_benchmark_workspace(&root, repo_root)?;
Ok(Self {
root,
repo_root: repo_root.to_path_buf(),
})
}
}
impl Drop for BenchmarkWorkspace {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.root);
}
}
impl StoredBenchmarkRunHost {
fn from_host(host: &BenchmarkHost) -> Self {
Self {
node_binary: host.node_binary.clone(),
node_version: host.node_version.clone(),
os: host.os.to_owned(),
arch: host.arch.to_owned(),
logical_cpus: host.logical_cpus,
}
}
fn matches_host(&self, host: &BenchmarkHost) -> bool {
self.node_binary == host.node_binary
&& self.node_version == host.node_version
&& self.os == host.os
&& self.arch == host.arch
&& self.logical_cpus == host.logical_cpus
}
}
impl StoredBenchmarkRunState {
fn new(config: &JavascriptBenchmarkConfig, host: &BenchmarkHost, repo_root: &Path) -> Self {
Self {
artifact_version: BENCHMARK_ARTIFACT_VERSION,
config: config.clone(),
host: StoredBenchmarkRunHost::from_host(host),
repo_root: repo_root.to_path_buf(),
transport_rtt: None,
scenarios: Vec::new(),
}
}
fn is_compatible(
&self,
config: &JavascriptBenchmarkConfig,
host: &BenchmarkHost,
repo_root: &Path,
) -> bool {
self.artifact_version == BENCHMARK_ARTIFACT_VERSION
&& self.config == *config
&& self.host.matches_host(host)
&& self.repo_root == repo_root
}
fn sanitized(mut self, definitions: &[ScenarioDefinition]) -> Self {
if let Some(transport_rtt) = &self.transport_rtt {
let payloads = transport_rtt
.iter()
.map(|report| report.payload_bytes)
.collect::<Vec<_>>();
if payloads != TRANSPORT_RTT_PAYLOAD_BYTES {
self.transport_rtt = None;
}
}
let mut scenarios_by_id = self
.scenarios
.into_iter()
.map(|scenario| (scenario.id.clone(), scenario))
.collect::<BTreeMap<_, _>>();
self.scenarios = definitions
.iter()
.filter_map(|definition| scenarios_by_id.remove(definition.id))
.collect();
self
}
fn resumed_stage_count(&self, definitions: &[ScenarioDefinition]) -> usize {
usize::from(self.transport_rtt.is_some())
+ definitions
.iter()
.filter(|definition| self.has_scenario(definition.id))
.count()
}
fn has_scenario(&self, id: &str) -> bool {
self.scenarios.iter().any(|scenario| scenario.id == id)
}
fn record_transport_rtt(&mut self, transport_rtt: &[BenchmarkTransportRttReport]) {
self.transport_rtt = Some(
transport_rtt
.iter()
.map(StoredBenchmarkTransportRttReport::from_report)
.collect(),
);
}
fn record_scenario(&mut self, scenario: &BenchmarkScenarioReport) {
self.scenarios.retain(|stored| stored.id != scenario.id);
self.scenarios
.push(StoredBenchmarkScenarioReport::from_report(scenario));
}
fn to_report(
&self,
config: &JavascriptBenchmarkConfig,
host: &BenchmarkHost,
repo_root: &Path,
definitions: &[ScenarioDefinition],
) -> JavascriptBenchmarkReport {
let scenarios_by_id = self
.scenarios
.iter()
.map(|scenario| (scenario.id.as_str(), scenario))
.collect::<BTreeMap<_, _>>();
JavascriptBenchmarkReport {
generated_at_unix_ms: current_unix_ms(),
config: config.clone(),
host: host.clone(),
repo_root: repo_root.to_path_buf(),
transport_rtt: self
.transport_rtt
.clone()
.unwrap_or_default()
.into_iter()
.map(StoredBenchmarkTransportRttReport::into_report)
.collect(),
scenarios: definitions
.iter()
.filter_map(|definition| {
scenarios_by_id
.get(definition.id)
.map(|scenario| scenario.to_report(*definition))
})
.collect(),
}
}
}
impl StoredBenchmarkTransportRttReport {
fn from_report(report: &BenchmarkTransportRttReport) -> Self {
Self {
payload_bytes: report.payload_bytes,
samples_ms: report.samples_ms.clone(),
stats: report.stats.clone(),
}
}
fn into_report(self) -> BenchmarkTransportRttReport {
BenchmarkTransportRttReport {
channel: TRANSPORT_RTT_CHANNEL,
payload_bytes: self.payload_bytes,
samples_ms: self.samples_ms,
stats: self.stats,
}
}
}
impl StoredBenchmarkScenarioReport {
fn from_report(report: &BenchmarkScenarioReport) -> Self {
Self {
id: report.id.to_owned(),
wall_samples_ms: report.wall_samples_ms.clone(),
wall_stats: report.wall_stats.clone(),
guest_import_samples_ms: report.guest_import_samples_ms.clone(),
guest_import_stats: report.guest_import_stats.clone(),
startup_overhead_samples_ms: report.startup_overhead_samples_ms.clone(),
startup_overhead_stats: report.startup_overhead_stats.clone(),
phase_samples_ms: report.phase_samples_ms.clone(),
phase_stats: report.phase_stats.clone(),
resource_usage_samples: report.resource_usage_samples.clone(),
resource_usage_stats: report.resource_usage_stats.clone(),
}
}
fn to_report(&self, definition: ScenarioDefinition) -> BenchmarkScenarioReport {
BenchmarkScenarioReport {
id: definition.id,
workload: definition.workload,
runtime: definition.runtime.label(),
mode: definition.mode.label(),
description: definition.description,
fixture: definition.fixture,
compile_cache: definition.compile_cache.label(),
wall_samples_ms: self.wall_samples_ms.clone(),
wall_stats: self.wall_stats.clone(),
guest_import_samples_ms: self.guest_import_samples_ms.clone(),
guest_import_stats: self.guest_import_stats.clone(),
startup_overhead_samples_ms: self.startup_overhead_samples_ms.clone(),
startup_overhead_stats: self.startup_overhead_stats.clone(),
phase_samples_ms: self.phase_samples_ms.clone(),
phase_stats: self.phase_stats.clone(),
resource_usage_samples: self.resource_usage_samples.clone(),
resource_usage_stats: self.resource_usage_stats.clone(),
}
}
}
pub fn run_javascript_benchmarks_with_recovery(
config: &JavascriptBenchmarkConfig,
baseline_path: Option<&Path>,
) -> Result<JavascriptBenchmarkRunOutput, JavascriptBenchmarkError> {
if config.iterations == 0 {
return Err(JavascriptBenchmarkError::InvalidConfig(
"iterations must be greater than zero",
));
}
let repo_root = workspace_root()?;
let host = benchmark_host()?;
let artifact_dir = benchmark_artifact_dir(&repo_root);
let workspace = BenchmarkWorkspace::create(&repo_root)?;
let (report, resumed_stage_count, state_path) = orchestrate_javascript_benchmark_report(
config,
&repo_root,
&host,
&artifact_dir,
|| measure_transport_rtt(&workspace, config),
|scenario| run_scenario(&workspace, config, scenario),
)?;
let comparison = baseline_path
.map(|path| report.compare_to_baseline_path(path))
.transpose()?;
let artifact_paths =
report.write_artifacts_with_comparison(&artifact_dir, comparison.as_ref())?;
remove_file_if_exists(&state_path)?;
Ok(JavascriptBenchmarkRunOutput {
artifact_paths,
resumed_stage_count,
})
}
fn orchestrate_javascript_benchmark_report<MeasureTransport, RunScenario>(
config: &JavascriptBenchmarkConfig,
repo_root: &Path,
host: &BenchmarkHost,
artifact_dir: &Path,
mut measure_transport: MeasureTransport,
mut run_scenario: RunScenario,
) -> Result<(JavascriptBenchmarkReport, usize, PathBuf), JavascriptBenchmarkError>
where
MeasureTransport: FnMut() -> Result<Vec<BenchmarkTransportRttReport>, JavascriptBenchmarkError>,
RunScenario:
FnMut(ScenarioDefinition) -> Result<BenchmarkScenarioReport, JavascriptBenchmarkError>,
{
if config.iterations == 0 {
return Err(JavascriptBenchmarkError::InvalidConfig(
"iterations must be greater than zero",
));
}
fs::create_dir_all(artifact_dir)?;
let definitions = benchmark_scenarios();
let state_path = benchmark_run_state_path(artifact_dir);
let mut state = load_benchmark_run_state(&state_path, config, host, repo_root, &definitions)?;
let resumed_stage_count = state.resumed_stage_count(&definitions);
if state.transport_rtt.is_none() {
let transport_rtt = measure_transport()?;
state.record_transport_rtt(&transport_rtt);
persist_benchmark_run_state(&state_path, &state)?;
}
for definition in definitions {
if state.has_scenario(definition.id) {
continue;
}
let scenario = run_scenario(definition)?;
state.record_scenario(&scenario);
persist_benchmark_run_state(&state_path, &state)?;
}
Ok((
state.to_report(config, host, repo_root, &benchmark_scenarios()),
resumed_stage_count,
state_path,
))
}
fn benchmark_scenarios() -> [ScenarioDefinition; 21] {
[
ScenarioDefinition {
id: "isolate-startup",
workload: "startup-floor",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::BaselineControl,
description: "Minimal guest with no extra imports. Measures the current startup floor for create-context plus node process bootstrap.",
fixture: "empty entrypoint",
entrypoint: "./bench/isolate-startup.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: false,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "prewarmed-isolate-startup",
workload: "startup-floor",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Minimal guest after a priming pass while one execution engine keeps materialized assets and builtin/polyfill prewarm state alive, isolating the hot startup floor from import work.",
fixture: "empty entrypoint",
entrypoint: "./bench/isolate-startup.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: false,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "cold-local-import",
workload: "local-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::TrueColdStart,
description: "Cold import of a repo-local ESM graph that simulates layered application modules without compile-cache reuse.",
fixture: "24-module local ESM graph",
entrypoint: "./bench/cold-local-import.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "warm-local-import",
workload: "local-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::NewSessionReplay,
description: "Warm import of the same local ESM graph after a compile-cache priming pass in an earlier isolate.",
fixture: "24-module local ESM graph",
entrypoint: "./bench/warm-local-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "same-context-local-import",
workload: "local-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameSessionReplay,
description: "Warm import of the same local ESM graph by replaying executions against one reused JavaScript context after a compile-cache priming pass.",
fixture: "24-module local ESM graph",
entrypoint: "./bench/warm-local-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedContextAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "prewarmed-local-import",
workload: "local-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Warm import of the same local ESM graph after compile-cache priming while one execution engine keeps materialized assets and builtin/polyfill prewarm state alive.",
fixture: "24-module local ESM graph",
entrypoint: "./bench/warm-local-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "host-local-import",
workload: "local-import",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::HostControl,
description: "Direct host-Node control for the same local ESM graph so later runs can separate native executor overhead from guest import work.",
fixture: "24-module local ESM graph",
entrypoint: "./bench/cold-local-import.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "builtin-import",
workload: "builtin-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::TrueColdStart,
description: "Import of the common builtin path used by the wrappers and polyfill-adjacent bootstrap code.",
fixture: "node:path + node:url + node:fs/promises",
entrypoint: "./bench/builtin-import.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-builtin-stream-import",
workload: "builtin-hot-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Hot single-import microbench for `node:stream` after a priming pass inside one reused execution engine.",
fixture: "node:stream",
entrypoint: "./bench/hot-builtin-stream-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-builtin-stream-web-import",
workload: "builtin-hot-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Hot single-import microbench for `node:stream/web` after a priming pass inside one reused execution engine.",
fixture: "node:stream/web",
entrypoint: "./bench/hot-builtin-stream-web-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-builtin-crypto-import",
workload: "builtin-hot-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Hot single-import microbench for `node:crypto` after a priming pass inside one reused execution engine.",
fixture: "node:crypto",
entrypoint: "./bench/hot-builtin-crypto-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-builtin-zlib-import",
workload: "builtin-hot-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Hot single-import microbench for `node:zlib` after a priming pass inside one reused execution engine.",
fixture: "node:zlib",
entrypoint: "./bench/hot-builtin-zlib-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-builtin-assert-import",
workload: "builtin-hot-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Hot single-import microbench for `node:assert/strict` after a priming pass inside one reused execution engine.",
fixture: "node:assert/strict",
entrypoint: "./bench/hot-builtin-assert-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-builtin-url-import",
workload: "builtin-hot-import",
runtime: ScenarioRuntime::NativeExecution,
mode: ScenarioMode::SameEngineReplay,
description: "Hot single-import microbench for `node:url` after a priming pass inside one reused execution engine.",
fixture: "node:url",
entrypoint: "./bench/hot-builtin-url-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::SharedAcrossScenario,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "hot-projected-package-file-import",
workload: "projected-package-hot-import",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::SameEngineReplay,
description: "Hot projected-package single-import microbench for the TypeScript compiler file with compile cache and projected-source manifest reuse enabled across repeated contexts.",
fixture: "projected TypeScript compiler file",
entrypoint: "./bench/hot-projected-package-file-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::ProjectedWorkspaceNodeModules,
},
ScenarioDefinition {
id: "large-package-import",
workload: "large-package-import",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::TrueColdStart,
description: "Cold import of the real-world `typescript` package from the workspace root `node_modules` tree.",
fixture: "typescript",
entrypoint: "./bench/large-package-import.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "projected-package-import",
workload: "projected-package-import",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::HostControl,
description: "Projected-package guest-path import of TypeScript with compile cache and projected-source manifest reuse enabled across repeated contexts.",
fixture: "projected TypeScript guest-path import",
entrypoint: "./bench/projected-package-import.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::ProjectedWorkspaceNodeModules,
},
ScenarioDefinition {
id: "pdf-lib-startup",
workload: "pdf-lib-startup",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::HostControl,
description: "Cold import of `pdf-lib` plus representative document setup that creates a PDF page and embeds a standard font.",
fixture: "pdf-lib document creation",
entrypoint: "./bench/pdf-lib-startup.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "jszip-startup",
workload: "jszip-startup",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::HostControl,
description: "Cold import of `jszip` plus representative archive staging that builds a nested archive structure.",
fixture: "jszip archive staging",
entrypoint: "./bench/jszip-startup.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "jszip-end-to-end",
workload: "jszip-end-to-end",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::HostControl,
description: "Cold import of `jszip` plus a full compressed archive roundtrip that writes, compresses, reloads, and validates nested archive contents.",
fixture: "jszip end-to-end archive roundtrip",
entrypoint: "./bench/jszip-end-to-end.mjs",
compile_cache: CompileCacheStrategy::Disabled,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
ScenarioDefinition {
id: "jszip-repeated-session-compressed",
workload: "jszip-repeated-session-compressed",
runtime: ScenarioRuntime::HostNode,
mode: ScenarioMode::HostControl,
description: "Repeated-session `jszip` workload after a compile-cache priming pass that compresses and reloads a nested archive in each fresh isolate.",
fixture: "jszip compressed archive roundtrip",
entrypoint: "./bench/jszip-repeated-session-compressed.mjs",
compile_cache: CompileCacheStrategy::Primed,
engine_reuse: EngineReuseStrategy::FreshPerSample,
expect_import_metric: true,
env: ScenarioEnvironment::None,
},
]
}
fn run_scenario(
workspace: &BenchmarkWorkspace,
config: &JavascriptBenchmarkConfig,
scenario: ScenarioDefinition,
) -> Result<BenchmarkScenarioReport, JavascriptBenchmarkError> {
let compile_cache_root = workspace
.root
.join("compile-cache")
.join(scenario.id.replace('-', "_"));
let mut shared_engine = match scenario.engine_reuse {
EngineReuseStrategy::FreshPerSample => None,
EngineReuseStrategy::SharedAcrossScenario
| EngineReuseStrategy::SharedContextAcrossScenario => {
Some(JavascriptExecutionEngine::default())
}
};
let mut shared_context = None;
if scenario.compile_cache == CompileCacheStrategy::Primed {
run_sample(
workspace,
&scenario,
Some(compile_cache_root.clone()),
shared_engine.as_mut(),
&mut shared_context,
)?;
}
for _ in 0..config.warmup_iterations {
run_sample(
workspace,
&scenario,
compile_cache_root_for_strategy(scenario.compile_cache, &compile_cache_root),
shared_engine.as_mut(),
&mut shared_context,
)?;
}
let mut wall_samples_ms = Vec::with_capacity(config.iterations);
let mut guest_import_samples_ms = if scenario.expect_import_metric {
Some(Vec::with_capacity(config.iterations))
} else {
None
};
let mut context_setup_samples_ms = Vec::with_capacity(config.iterations);
let mut startup_samples_ms = Vec::with_capacity(config.iterations);
let mut completion_samples_ms = Vec::with_capacity(config.iterations);
let mut resource_usage_samples = BenchmarkResourceUsage::<Vec<f64>>::default();
for _ in 0..config.iterations {
let sample = run_sample(
workspace,
&scenario,
compile_cache_root_for_strategy(scenario.compile_cache, &compile_cache_root),
shared_engine.as_mut(),
&mut shared_context,
)?;
wall_samples_ms.push(sample.wall_ms);
context_setup_samples_ms.push(sample.context_setup_ms);
startup_samples_ms.push(sample.startup_ms);
completion_samples_ms.push(sample.completion_ms);
if let (Some(import_ms), Some(samples)) =
(sample.guest_import_ms, guest_import_samples_ms.as_mut())
{
samples.push(import_ms);
}
if let Some(resource_usage) = sample.resource_usage.as_ref() {
resource_usage_samples.push_sample(resource_usage);
}
}
let startup_overhead_samples_ms = guest_import_samples_ms.as_ref().map(|guest_samples| {
context_setup_samples_ms
.iter()
.zip(startup_samples_ms.iter())
.zip(completion_samples_ms.iter())
.zip(guest_samples.iter())
.map(|(((context_ms, startup_ms), completion_ms), _guest_ms)| {
context_ms + startup_ms + completion_ms
})
.collect::<Vec<_>>()
});
let phase_samples_ms = BenchmarkScenarioPhases {
context_setup_ms: context_setup_samples_ms,
startup_ms: startup_samples_ms,
guest_execution_ms: guest_import_samples_ms.clone(),
completion_ms: completion_samples_ms,
};
let resource_usage_samples = resource_usage_samples.into_populated();
Ok(BenchmarkScenarioReport {
id: scenario.id,
workload: scenario.workload,
runtime: scenario.runtime.label(),
mode: scenario.mode.label(),
description: scenario.description,
fixture: scenario.fixture,
compile_cache: scenario.compile_cache.label(),
wall_stats: compute_stats(&wall_samples_ms),
guest_import_stats: guest_import_samples_ms
.as_ref()
.map(|samples| compute_stats(samples)),
startup_overhead_stats: startup_overhead_samples_ms
.as_ref()
.map(|samples| compute_stats(samples)),
phase_stats: BenchmarkScenarioPhases {
context_setup_ms: compute_stats(&phase_samples_ms.context_setup_ms),
startup_ms: compute_stats(&phase_samples_ms.startup_ms),
guest_execution_ms: phase_samples_ms
.guest_execution_ms
.as_ref()
.map(|samples| compute_stats(samples)),
completion_ms: compute_stats(&phase_samples_ms.completion_ms),
},
resource_usage_stats: resource_usage_samples
.as_ref()
.and_then(compute_resource_usage_stats),
wall_samples_ms,
guest_import_samples_ms,
startup_overhead_samples_ms,
phase_samples_ms,
resource_usage_samples,
})
}
fn compile_cache_root_for_strategy(strategy: CompileCacheStrategy, root: &Path) -> Option<PathBuf> {
match strategy {
CompileCacheStrategy::Disabled => None,
CompileCacheStrategy::Primed => Some(root.to_path_buf()),
}
}
fn run_sample(
workspace: &BenchmarkWorkspace,
scenario: &ScenarioDefinition,
compile_cache_root: Option<PathBuf>,
shared_engine: Option<&mut JavascriptExecutionEngine>,
shared_context: &mut Option<crate::JavascriptContext>,
) -> Result<SampleMeasurement, JavascriptBenchmarkError> {
match scenario.runtime {
ScenarioRuntime::NativeExecution => run_native_sample(
workspace,
scenario,
compile_cache_root,
shared_engine,
shared_context,
),
ScenarioRuntime::HostNode => run_host_node_sample(workspace, scenario),
}
}
fn run_native_sample(
workspace: &BenchmarkWorkspace,
scenario: &ScenarioDefinition,
compile_cache_root: Option<PathBuf>,
shared_engine: Option<&mut JavascriptExecutionEngine>,
shared_context: &mut Option<crate::JavascriptContext>,
) -> Result<SampleMeasurement, JavascriptBenchmarkError> {
let mut fresh_engine = JavascriptExecutionEngine::default();
let engine = shared_engine.unwrap_or(&mut fresh_engine);
let context_started_at = Instant::now();
let (context, context_setup_ms) = match scenario.engine_reuse {
EngineReuseStrategy::SharedContextAcrossScenario => {
if let Some(context) = shared_context.as_ref() {
(context.clone(), 0.0)
} else {
let context = engine.create_context(CreateJavascriptContextRequest {
vm_id: String::from("vm-bench"),
bootstrap_module: None,
compile_cache_root,
});
let context_setup_ms = context_started_at.elapsed().as_secs_f64() * 1000.0;
*shared_context = Some(context.clone());
(context, context_setup_ms)
}
}
_ => {
let context = engine.create_context(CreateJavascriptContextRequest {
vm_id: String::from("vm-bench"),
bootstrap_module: None,
compile_cache_root,
});
let context_setup_ms = context_started_at.elapsed().as_secs_f64() * 1000.0;
(context, context_setup_ms)
}
};
let startup_started_at = Instant::now();
let execution = engine.start_execution(StartJavascriptExecutionRequest {
vm_id: String::from("vm-bench"),
context_id: context.context_id,
argv: vec![String::from(scenario.entrypoint)],
env: scenario_env(workspace, scenario),
cwd: workspace.root.clone(),
inline_code: None,
})?;
let startup_ms = startup_started_at.elapsed().as_secs_f64() * 1000.0;
let completion_started_at = Instant::now();
let result = execution.wait()?;
let completion_total_ms = completion_started_at.elapsed().as_secs_f64() * 1000.0;
let stdout = String::from_utf8(result.stdout)?;
let stderr = String::from_utf8(result.stderr)?;
if result.exit_code != 0 {
return Err(JavascriptBenchmarkError::NonZeroExit {
scenario: scenario.id,
exit_code: result.exit_code,
stderr,
});
}
let parsed_metrics =
parse_benchmark_metrics(scenario.id, &stdout, scenario.expect_import_metric)?;
let guest_import_ms = parsed_metrics.import_ms;
let completion_ms = guest_import_ms
.map(|guest_ms| saturating_delta_ms(completion_total_ms, guest_ms))
.unwrap_or(completion_total_ms);
let wall_ms = context_setup_ms + startup_ms + completion_total_ms;
Ok(SampleMeasurement {
wall_ms,
guest_import_ms,
context_setup_ms,
startup_ms,
completion_ms,
resource_usage: parsed_metrics.resource_usage,
})
}
fn run_host_node_sample(
workspace: &BenchmarkWorkspace,
scenario: &ScenarioDefinition,
) -> Result<SampleMeasurement, JavascriptBenchmarkError> {
let started_at = Instant::now();
let output = Command::new(crate::host_node::node_binary())
.arg(scenario.entrypoint)
.current_dir(&workspace.root)
.envs(scenario_env(workspace, scenario))
.output()?;
let wall_ms = started_at.elapsed().as_secs_f64() * 1000.0;
let stdout = String::from_utf8(output.stdout)?;
let stderr = String::from_utf8(output.stderr)?;
if !output.status.success() {
return Err(JavascriptBenchmarkError::NonZeroExit {
scenario: scenario.id,
exit_code: output.status.code().unwrap_or(-1),
stderr,
});
}
let parsed_metrics =
parse_benchmark_metrics(scenario.id, &stdout, scenario.expect_import_metric)?;
let guest_import_ms = parsed_metrics.import_ms;
let startup_ms = guest_import_ms
.map(|guest_ms| saturating_delta_ms(wall_ms, guest_ms))
.unwrap_or(wall_ms);
Ok(SampleMeasurement {
wall_ms,
guest_import_ms,
context_setup_ms: 0.0,
startup_ms,
completion_ms: 0.0,
resource_usage: parsed_metrics.resource_usage,
})
}
fn scenario_env(
workspace: &BenchmarkWorkspace,
scenario: &ScenarioDefinition,
) -> BTreeMap<String, String> {
match scenario.env {
ScenarioEnvironment::None => BTreeMap::new(),
ScenarioEnvironment::ProjectedWorkspaceNodeModules => {
let projected_node_modules = workspace.repo_root.join("node_modules");
let projected_node_modules_json =
serde_json::to_string(&vec![projected_node_modules.display().to_string()])
.expect("serialize projected node_modules read path");
let guest_path_mappings = serde_json::json!([{
"guestPath": "/root/node_modules",
"hostPath": projected_node_modules.display().to_string(),
}])
.to_string();
BTreeMap::from([
(
String::from("AGENT_OS_EXTRA_FS_READ_PATHS"),
projected_node_modules_json,
),
(
String::from("AGENT_OS_GUEST_PATH_MAPPINGS"),
guest_path_mappings,
),
])
}
}
}
fn measure_transport_rtt(
workspace: &BenchmarkWorkspace,
config: &JavascriptBenchmarkConfig,
) -> Result<Vec<BenchmarkTransportRttReport>, JavascriptBenchmarkError> {
let mut engine = JavascriptExecutionEngine::default();
let context = engine.create_context(CreateJavascriptContextRequest {
vm_id: String::from("vm-transport"),
bootstrap_module: None,
compile_cache_root: None,
});
let mut execution = engine.start_execution(StartJavascriptExecutionRequest {
vm_id: String::from("vm-transport"),
context_id: context.context_id,
argv: vec![String::from("./bench/transport-echo.mjs")],
env: BTreeMap::from([(String::from("AGENT_OS_KEEP_STDIN_OPEN"), String::from("1"))]),
cwd: workspace.root.clone(),
inline_code: None,
})?;
let mut stdout_buffer = String::new();
let mut stderr_buffer = String::new();
let mut reports = Vec::with_capacity(TRANSPORT_RTT_PAYLOAD_BYTES.len());
for payload_bytes in TRANSPORT_RTT_PAYLOAD_BYTES {
for warmup_index in 0..config.warmup_iterations {
let label = format!("warmup-{}-{warmup_index}", payload_bytes);
measure_transport_roundtrip(
&mut execution,
payload_bytes,
&label,
&mut stdout_buffer,
&mut stderr_buffer,
)?;
}
let mut samples_ms = Vec::with_capacity(config.iterations);
for iteration in 0..config.iterations {
let label = format!("measure-{}-{iteration}", payload_bytes);
samples_ms.push(measure_transport_roundtrip(
&mut execution,
payload_bytes,
&label,
&mut stdout_buffer,
&mut stderr_buffer,
)?);
}
reports.push(BenchmarkTransportRttReport {
channel: TRANSPORT_RTT_CHANNEL,
payload_bytes,
stats: compute_stats(&samples_ms),
samples_ms,
});
}
execution.close_stdin()?;
let result = execution.wait()?;
if result.exit_code != 0 {
stderr_buffer.push_str(&String::from_utf8(result.stderr)?);
return Err(JavascriptBenchmarkError::TransportProbeExited {
exit_code: result.exit_code,
stderr: stderr_buffer,
});
}
Ok(reports)
}
fn measure_transport_roundtrip(
execution: &mut crate::JavascriptExecution,
payload_bytes: usize,
label: &str,
stdout_buffer: &mut String,
stderr_buffer: &mut String,
) -> Result<f64, JavascriptBenchmarkError> {
let payload = transport_probe_payload(payload_bytes, label);
let expected_line = format!("{payload}\n");
let started_at = Instant::now();
execution.write_stdin(expected_line.as_bytes())?;
loop {
if let Some(line) = take_complete_line(stdout_buffer) {
if line == payload {
return Ok(started_at.elapsed().as_secs_f64() * 1000.0);
}
return Err(JavascriptBenchmarkError::InvalidTransportProbeResponse {
payload_bytes,
expected: payload,
actual: line,
});
}
match execution.poll_event_blocking(TRANSPORT_POLL_TIMEOUT)? {
Some(crate::JavascriptExecutionEvent::Stdout(chunk)) => {
stdout_buffer.push_str(&String::from_utf8(chunk)?);
}
Some(crate::JavascriptExecutionEvent::Stderr(chunk)) => {
stderr_buffer.push_str(&String::from_utf8(chunk)?);
}
Some(crate::JavascriptExecutionEvent::SyncRpcRequest(request)) => {
return Err(JavascriptBenchmarkError::Execution(
JavascriptExecutionError::PendingSyncRpcRequest(request.id),
));
}
Some(crate::JavascriptExecutionEvent::SignalState { .. }) => {}
Some(crate::JavascriptExecutionEvent::Exited(exit_code)) => {
return Err(JavascriptBenchmarkError::TransportProbeExited {
exit_code,
stderr: stderr_buffer.clone(),
});
}
None => {
return Err(JavascriptBenchmarkError::TransportProbeTimeout { payload_bytes });
}
}
}
}
fn transport_probe_payload(payload_bytes: usize, label: &str) -> String {
if payload_bytes == 0 {
return format!("transport:{label}:");
}
let header = format!("transport:{label}:");
let fill_len = payload_bytes.saturating_sub(header.len());
format!("{header}{}", "x".repeat(fill_len))
}
fn take_complete_line(buffer: &mut String) -> Option<String> {
let newline_index = buffer.find('\n')?;
let line = buffer[..newline_index].trim_end_matches('\r').to_owned();
buffer.drain(..=newline_index);
Some(line)
}
#[derive(Debug, Default, Deserialize)]
struct ParsedBenchmarkMetrics {
#[serde(default)]
import_ms: Option<f64>,
#[serde(default)]
resource_usage: Option<BenchmarkResourceUsage<f64>>,
}
fn parse_benchmark_metrics(
scenario_id: &'static str,
stdout: &str,
expect_import_metric: bool,
) -> Result<ParsedBenchmarkMetrics, JavascriptBenchmarkError> {
let raw_value = stdout
.lines()
.rev()
.find_map(|line| line.strip_prefix(BENCHMARK_MARKER_PREFIX))
.ok_or(JavascriptBenchmarkError::MissingBenchmarkMetric(
scenario_id,
))?
.trim();
if let Ok(parsed) = serde_json::from_str::<ParsedBenchmarkMetrics>(raw_value) {
let has_resource_usage = match parsed.resource_usage.as_ref() {
Some(resource_usage) => !resource_usage.is_empty(),
None => false,
};
if parsed.import_ms.is_some() || has_resource_usage {
if expect_import_metric && parsed.import_ms.is_none() {
return Err(JavascriptBenchmarkError::MissingBenchmarkMetric(
scenario_id,
));
}
return Ok(parsed);
}
}
raw_value
.parse::<f64>()
.map(|import_ms| ParsedBenchmarkMetrics {
import_ms: Some(import_ms),
resource_usage: None,
})
.map_err(|_| JavascriptBenchmarkError::InvalidBenchmarkMetric {
scenario: scenario_id,
raw_value: raw_value.to_owned(),
})
}
fn workspace_root() -> Result<PathBuf, JavascriptBenchmarkError> {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
manifest_dir
.parent()
.and_then(Path::parent)
.map(Path::to_path_buf)
.ok_or(JavascriptBenchmarkError::InvalidWorkspaceRoot(manifest_dir))
}
fn load_benchmark_artifact(
baseline_path: &Path,
) -> Result<StoredBenchmarkArtifact, JavascriptBenchmarkError> {
let raw = fs::read_to_string(baseline_path)?;
serde_json::from_str(&raw).map_err(|err| JavascriptBenchmarkError::InvalidBaselineReport {
path: baseline_path.to_path_buf(),
message: err.to_string(),
})
}
fn benchmark_host() -> Result<BenchmarkHost, JavascriptBenchmarkError> {
let node_binary = crate::host_node::node_binary();
let output = Command::new(&node_binary)
.arg("--version")
.output()
.map_err(JavascriptBenchmarkError::NodeVersion)?;
let node_version = String::from_utf8(output.stdout)?;
Ok(BenchmarkHost {
node_binary,
node_version,
os: env::consts::OS,
arch: env::consts::ARCH,
logical_cpus: std::thread::available_parallelism()
.map(usize::from)
.unwrap_or(1),
})
}
fn write_benchmark_workspace(
root: &Path,
repo_root: &Path,
) -> Result<(), JavascriptBenchmarkError> {
fs::create_dir_all(root.join("bench"))?;
fs::create_dir_all(root.join("bench/local-graph"))?;
let host_node_modules = repo_root.join("node_modules");
let workspace_node_modules = root.join("node_modules");
if host_node_modules.exists() && !workspace_node_modules.exists() {
std::os::unix::fs::symlink(&host_node_modules, &workspace_node_modules)?;
}
fs::write(
root.join("package.json"),
"{\n \"name\": \"agent-os-execution-bench\",\n \"private\": true,\n \"type\": \"module\"\n}\n",
)?;
for index in 0..LOCAL_GRAPH_MODULE_COUNT {
let path = root
.join("bench/local-graph")
.join(format!("mod-{index:02}.mjs"));
let source = if index == 0 {
String::from("export const value = 1;\n")
} else {
format!(
"import {{ value as previous }} from './mod-{previous:02}.mjs';\nexport const value = previous + {index};\n",
previous = index - 1
)
};
fs::write(path, source)?;
}
let final_value = local_graph_terminal_value();
fs::write(
root.join("bench/local-graph/root.mjs"),
format!(
"import {{ value }} from './mod-{last:02}.mjs';\nexport {{ value }};\nexport const expected = {final_value};\n",
last = LOCAL_GRAPH_MODULE_COUNT - 1
),
)?;
fs::write(
root.join("bench/benchmark-metrics.mjs"),
benchmark_metrics_module_source(),
)?;
fs::write(
root.join("bench/isolate-startup.mjs"),
resource_only_entrypoint_source("console.log('isolate-ready');"),
)?;
fs::write(
root.join("bench/cold-local-import.mjs"),
local_import_entrypoint_source(final_value),
)?;
fs::write(
root.join("bench/warm-local-import.mjs"),
local_import_entrypoint_source(final_value),
)?;
fs::write(
root.join("bench/builtin-import.mjs"),
timed_entrypoint_source(
"const [pathMod, fsMod, urlMod] = await Promise.all([\n import('node:path'),\n import('node:fs/promises'),\n import('node:url'),\n]);\nif (typeof pathMod.basename !== 'function' || typeof fsMod.readFile !== 'function' || typeof urlMod.pathToFileURL !== 'function') {\n throw new Error('builtin import fixture did not load expected exports');\n}",
),
)?;
fs::write(
root.join("bench/hot-builtin-stream-import.mjs"),
single_import_entrypoint_source(
"node:stream",
"typeof imported.Readable === 'function'",
"node:stream import did not expose Readable",
),
)?;
fs::write(
root.join("bench/hot-builtin-stream-web-import.mjs"),
single_import_entrypoint_source(
"node:stream/web",
"typeof imported.ReadableStream === 'function'",
"node:stream/web import did not expose ReadableStream",
),
)?;
fs::write(
root.join("bench/hot-builtin-crypto-import.mjs"),
single_import_entrypoint_source(
"node:crypto",
"typeof imported.createHash === 'function'",
"node:crypto import did not expose createHash",
),
)?;
fs::write(
root.join("bench/hot-builtin-zlib-import.mjs"),
single_import_entrypoint_source(
"node:zlib",
"typeof imported.gzipSync === 'function'",
"node:zlib import did not expose gzipSync",
),
)?;
fs::write(
root.join("bench/hot-builtin-assert-import.mjs"),
single_import_entrypoint_source(
"node:assert/strict",
"typeof imported.strictEqual === 'function'",
"node:assert/strict import did not expose strictEqual",
),
)?;
fs::write(
root.join("bench/hot-builtin-url-import.mjs"),
single_import_entrypoint_source(
"node:url",
"typeof imported.pathToFileURL === 'function'",
"node:url import did not expose pathToFileURL",
),
)?;
fs::write(
root.join("bench/large-package-import.mjs"),
timed_entrypoint_source(
"const typescript = await import('typescript');\nif (typeof typescript.transpileModule !== 'function') {\n throw new Error('typescript import did not expose transpileModule');\n}",
),
)?;
fs::write(
root.join("bench/hot-projected-package-file-import.mjs"),
projected_package_file_import_entrypoint_source(),
)?;
fs::write(
root.join("bench/projected-package-import.mjs"),
projected_package_import_entrypoint_source(),
)?;
fs::write(
root.join("bench/pdf-lib-startup.mjs"),
pdf_lib_startup_entrypoint_source(),
)?;
fs::write(
root.join("bench/jszip-startup.mjs"),
jszip_startup_entrypoint_source(),
)?;
fs::write(
root.join("bench/jszip-end-to-end.mjs"),
jszip_end_to_end_entrypoint_source(),
)?;
fs::write(
root.join("bench/jszip-repeated-session-compressed.mjs"),
jszip_repeated_session_compressed_entrypoint_source(),
)?;
fs::write(
root.join("bench/transport-echo.mjs"),
"process.stdin.setEncoding('utf8');\nlet buffered = '';\nconst flushLines = () => {\n let newlineIndex = buffered.indexOf('\\n');\n while (newlineIndex >= 0) {\n const line = buffered.slice(0, newlineIndex).replace(/\\r$/, '');\n buffered = buffered.slice(newlineIndex + 1);\n process.stdout.write(line);\n newlineIndex = buffered.indexOf('\\n');\n }\n};\nprocess.stdin.on('data', (chunk) => {\n buffered += chunk;\n flushLines();\n});\nprocess.stdin.on('end', () => {\n if (buffered.length > 0) {\n process.stdout.write(buffered.replace(/\\r$/, ''));\n }\n});\n",
)?;
Ok(())
}
fn local_import_entrypoint_source(final_value: usize) -> String {
timed_entrypoint_source(&format!(
"const graph = await import('./local-graph/root.mjs');\nif (graph.value !== {final_value} || graph.expected !== {final_value}) {{\n throw new Error(`local graph import returned ${{\n graph.value\n }} instead of {final_value}`);\n}}"
))
}
fn single_import_entrypoint_source(
specifier: &str,
validation_expression: &str,
error_message: &str,
) -> String {
timed_entrypoint_source(&format!(
"const imported = await import('{specifier}');\nif (!({validation_expression})) {{\n throw new Error('{error_message}');\n}}"
))
}
fn projected_package_file_import_entrypoint_source() -> String {
timed_entrypoint_source(
"const typescriptModule = await import('../node_modules/typescript/lib/typescript.js');\nconst typescript = typescriptModule.default ?? typescriptModule;\nif (typeof typescript.transpileModule !== 'function') {\n throw new Error('projected package file import did not expose transpileModule');\n}",
)
}
fn projected_package_import_entrypoint_source() -> String {
timed_entrypoint_source(
"const typescriptModule = await import('../node_modules/typescript/lib/typescript.js');\nconst typescript = typescriptModule.default ?? typescriptModule;\nconst sourceFile = typescript.createSourceFile(\n 'bench.ts',\n 'const answer: number = 42;',\n typescript.ScriptTarget.ES2022,\n true,\n);\nif (\n typeof typescript.transpileModule !== 'function' ||\n typeof typescript.createSourceFile !== 'function' ||\n !sourceFile ||\n sourceFile.statements.length !== 1\n) {\n throw new Error('projected package import did not expose TypeScript compiler APIs');\n}",
)
}
fn pdf_lib_startup_entrypoint_source() -> String {
timed_entrypoint_source(
"const pdfLib = await import('pdf-lib');\nconst pdfDoc = await pdfLib.PDFDocument.create();\nconst page = pdfDoc.addPage([612, 792]);\nconst font = await pdfDoc.embedFont(pdfLib.StandardFonts.Helvetica);\npage.drawText('Agent OS pdf-lib benchmark', {\n x: 50,\n y: 750,\n font,\n size: 18,\n});\nif (pdfDoc.getPageCount() !== 1 || page.getSize().width !== 612) {\n throw new Error('pdf-lib fixture did not create the expected document');\n}",
)
}
fn jszip_startup_entrypoint_source() -> String {
timed_entrypoint_source(
"const jszipModule = await import('jszip');\nconst JSZip = jszipModule.default ?? jszipModule;\nconst zip = new JSZip();\nzip.file('README.txt', 'agent-os benchmark archive');\nconst notes = zip.folder('notes');\nif (!notes) {\n throw new Error('jszip fixture failed to create nested folder');\n}\nnotes.file('todo.txt', 'benchmark staging payload');\nconst fileCount = Object.values(zip.files).filter((entry) => !entry.dir).length;\nif (typeof zip.generateAsync !== 'function' || fileCount !== 2) {\n throw new Error('jszip fixture did not stage the expected archive');\n}",
)
}
fn jszip_end_to_end_entrypoint_source() -> String {
timed_entrypoint_source(
"const jszipModule = await import('jszip');\nconst JSZip = jszipModule.default ?? jszipModule;\nconst zip = new JSZip();\nconst repeatedPayload = 'agent-os benchmark payload '.repeat(512);\nzip.file('README.txt', repeatedPayload);\nconst notes = zip.folder('notes');\nif (!notes) {\n throw new Error('jszip end-to-end fixture failed to create notes folder');\n}\nnotes.file('todo.txt', 'complete the archive roundtrip');\nconst data = zip.folder('data');\nif (!data) {\n throw new Error('jszip end-to-end fixture failed to create data folder');\n}\ndata.file('payload.json', JSON.stringify({\n repeatedPayloadLength: repeatedPayload.length,\n mode: 'cold-end-to-end',\n}));\nconst archiveBytes = await zip.generateAsync({\n type: 'uint8array',\n compression: 'DEFLATE',\n compressionOptions: { level: 6 },\n});\nconst restored = await JSZip.loadAsync(archiveBytes);\nconst restoredFileCount = Object.values(restored.files).filter((entry) => !entry.dir).length;\nconst restoredReadme = await restored.file('README.txt')?.async('string');\nconst restoredTodo = await restored.file('notes/todo.txt')?.async('string');\nconst restoredPayload = await restored.file('data/payload.json')?.async('string');\nif (\n archiveBytes.byteLength >= repeatedPayload.length ||\n restoredFileCount !== 3 ||\n restoredReadme !== repeatedPayload ||\n restoredTodo !== 'complete the archive roundtrip' ||\n !restoredPayload?.includes('cold-end-to-end')\n) {\n throw new Error('jszip end-to-end fixture did not complete the compressed archive roundtrip');\n}",
)
}
fn jszip_repeated_session_compressed_entrypoint_source() -> String {
timed_entrypoint_source(
"const jszipModule = await import('jszip');\nconst JSZip = jszipModule.default ?? jszipModule;\nconst zip = new JSZip();\nconst repeatedPayload = 'agent-os benchmark payload '.repeat(512);\nzip.file('README.txt', repeatedPayload);\nconst notes = zip.folder('notes');\nif (!notes) {\n throw new Error('jszip repeated-session fixture failed to create notes folder');\n}\nnotes.file('todo.txt', 'repeat this session workload');\nconst data = zip.folder('data');\nif (!data) {\n throw new Error('jszip repeated-session fixture failed to create data folder');\n}\ndata.file('payload.json', JSON.stringify({\n repeatedPayloadLength: repeatedPayload.length,\n repeatedSessions: true,\n}));\nconst archiveBytes = await zip.generateAsync({\n type: 'uint8array',\n compression: 'DEFLATE',\n compressionOptions: { level: 6 },\n});\nconst restored = await JSZip.loadAsync(archiveBytes);\nconst restoredFileCount = Object.values(restored.files).filter((entry) => !entry.dir).length;\nconst restoredReadme = await restored.file('README.txt')?.async('string');\nconst restoredTodo = await restored.file('notes/todo.txt')?.async('string');\nif (\n archiveBytes.byteLength >= repeatedPayload.length ||\n restoredFileCount !== 3 ||\n restoredReadme !== repeatedPayload ||\n restoredTodo !== 'repeat this session workload'\n) {\n throw new Error('jszip repeated-session fixture did not complete the compressed archive roundtrip');\n}",
)
}
fn benchmark_metrics_module_source() -> String {
format!(
"const BENCHMARK_MARKER_PREFIX = '{BENCHMARK_MARKER_PREFIX}';\n\nexport function emitBenchmarkMetrics(importMs) {{\n const memoryUsage = process.memoryUsage();\n const resourceUsage = typeof process.resourceUsage === 'function'\n ? process.resourceUsage()\n : null;\n const payload = {{\n resource_usage: {{\n rss_bytes: memoryUsage.rss,\n heap_used_bytes: memoryUsage.heapUsed,\n ...(resourceUsage\n ? {{\n cpu_user_us: resourceUsage.userCPUTime,\n cpu_system_us: resourceUsage.systemCPUTime,\n cpu_total_us: resourceUsage.userCPUTime + resourceUsage.systemCPUTime,\n }}\n : {{}}),\n }},\n }};\n\n if (typeof importMs === 'number') {{\n payload.import_ms = importMs;\n }}\n\n console.log(BENCHMARK_MARKER_PREFIX + JSON.stringify(payload));\n}}\n"
)
}
fn resource_only_entrypoint_source(body: &str) -> String {
format!(
"import {{ emitBenchmarkMetrics }} from './benchmark-metrics.mjs';\n{body}\nemitBenchmarkMetrics();\n"
)
}
fn timed_entrypoint_source(body: &str) -> String {
format!(
"import {{ performance }} from 'node:perf_hooks';\nimport {{ emitBenchmarkMetrics }} from './benchmark-metrics.mjs';\nconst started = performance.now();\n{body}\nemitBenchmarkMetrics(performance.now() - started);\n"
)
}
fn local_graph_terminal_value() -> usize {
let mut value = 1;
for index in 1..LOCAL_GRAPH_MODULE_COUNT {
value += index;
}
value
}
fn compute_distribution_stats(samples: &[f64]) -> BenchmarkDistributionStats {
let mut sorted = samples.to_vec();
sorted.sort_by(|a, b| a.total_cmp(b));
let mean = sorted.iter().sum::<f64>() / sorted.len() as f64;
BenchmarkDistributionStats {
mean,
p50: percentile(&sorted, 50.0),
p95: percentile(&sorted, 95.0),
min: *sorted.first().unwrap_or(&0.0),
max: *sorted.last().unwrap_or(&0.0),
stddev: standard_deviation(&sorted, mean),
}
}
fn compute_stats(samples: &[f64]) -> BenchmarkStats {
let stats = compute_distribution_stats(samples);
BenchmarkStats {
mean_ms: stats.mean,
p50_ms: stats.p50,
p95_ms: stats.p95,
min_ms: stats.min,
max_ms: stats.max,
stddev_ms: stats.stddev,
}
}
fn compute_resource_usage_stats(
samples: &BenchmarkResourceUsage<Vec<f64>>,
) -> Option<BenchmarkResourceUsage<BenchmarkDistributionStats>> {
let stats = BenchmarkResourceUsage {
rss_bytes: samples
.rss_bytes
.as_ref()
.map(|samples| compute_distribution_stats(samples)),
heap_used_bytes: samples
.heap_used_bytes
.as_ref()
.map(|samples| compute_distribution_stats(samples)),
cpu_user_us: samples
.cpu_user_us
.as_ref()
.map(|samples| compute_distribution_stats(samples)),
cpu_system_us: samples
.cpu_system_us
.as_ref()
.map(|samples| compute_distribution_stats(samples)),
cpu_total_us: samples
.cpu_total_us
.as_ref()
.map(|samples| compute_distribution_stats(samples)),
};
(!stats.is_empty()).then_some(stats)
}
fn standard_deviation(samples: &[f64], mean: f64) -> f64 {
if samples.is_empty() {
return 0.0;
}
let variance = samples
.iter()
.map(|sample| {
let delta = sample - mean;
delta * delta
})
.sum::<f64>()
/ samples.len() as f64;
variance.sqrt()
}
fn percentile(sorted: &[f64], p: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
let rank = ((p / 100.0) * sorted.len() as f64).ceil() as usize;
let index = rank.saturating_sub(1).min(sorted.len() - 1);
sorted[index]
}
fn percentage_reduction(original: f64, current: f64) -> f64 {
if original <= 0.0 {
0.0
} else {
((original - current) / original) * 100.0
}
}
fn percentage_share(part: f64, total: f64) -> f64 {
if total <= 0.0 {
0.0
} else {
(part / total) * 100.0
}
}
fn safe_ratio(lhs: f64, rhs: f64) -> f64 {
if rhs <= 0.0 {
0.0
} else {
lhs / rhs
}
}
fn saturating_delta_ms(total_ms: f64, subtracted_ms: f64) -> f64 {
(total_ms - subtracted_ms).max(0.0)
}
fn format_ms(value: f64) -> String {
format!("{value:.2}")
}
fn format_hotspot_value(unit: &str, value: f64) -> String {
match unit {
"pct" => format!("{value:.1}%"),
"MiB" => format_mib(value),
_ => format_ms(value),
}
}
fn format_sample_list(samples: &[f64]) -> String {
format_scaled_sample_list(samples, std::convert::identity)
}
fn format_scaled_sample_list(samples: &[f64], scale: impl Fn(f64) -> f64) -> String {
let mut formatted = String::from("[");
for (index, sample) in samples.iter().enumerate() {
if index > 0 {
formatted.push_str(", ");
}
let _ = write!(&mut formatted, "{:.2}", scale(*sample));
}
formatted.push(']');
formatted
}
fn format_mib(value: f64) -> String {
format!("{value:.2}")
}
fn format_label_list(labels: &[&str]) -> String {
labels
.iter()
.map(|label| format!("`{label}`"))
.collect::<Vec<_>>()
.join(", ")
}
fn format_string_label_list(labels: &[&str]) -> String {
labels
.iter()
.map(|label| format!("`{label}`"))
.collect::<Vec<_>>()
.join(", ")
}
fn push_unique_label<'a>(labels: &mut Vec<&'a str>, value: &'a str) {
if !labels.contains(&value) {
labels.push(value);
}
}
fn format_delta_ms(value: f64) -> String {
format!("{value:+.2}")
}
fn format_delta_pct(value: f64) -> String {
format!("{value:+.1}%")
}
fn push_optional_sample(samples: &mut Option<Vec<f64>>, value: Option<f64>) {
if let Some(value) = value {
samples.get_or_insert_with(Vec::new).push(value);
}
}
fn bytes_to_mib(value: f64) -> f64 {
value / (1024.0 * 1024.0)
}
fn micros_to_ms(value: f64) -> f64 {
value / 1000.0
}
fn hotspot_wall_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
Some(scenario.wall_stats.mean_ms)
}
fn hotspot_wall_stddev_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
Some(scenario.wall_stats.stddev_ms)
}
fn hotspot_wall_range_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
Some(scenario.wall_range_ms())
}
fn hotspot_guest_import_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario
.guest_import_stats
.as_ref()
.map(|stats| stats.mean_ms)
}
fn hotspot_startup_overhead_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario
.startup_overhead_stats
.as_ref()
.map(|stats| stats.mean_ms)
}
fn hotspot_context_setup_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
Some(scenario.phase_stats.context_setup_ms.mean_ms)
}
fn hotspot_startup_phase_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
Some(scenario.phase_stats.startup_ms.mean_ms)
}
fn hotspot_guest_execution_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario
.phase_stats
.guest_execution_ms
.as_ref()
.map(|stats| stats.mean_ms)
}
fn hotspot_completion_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
Some(scenario.phase_stats.completion_ms.mean_ms)
}
fn hotspot_startup_share_pct(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario.mean_startup_share_pct()
}
fn hotspot_rss_mean_mib(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario
.resource_usage_stats
.as_ref()?
.rss_bytes
.as_ref()
.map(|stats| bytes_to_mib(stats.mean))
}
fn hotspot_heap_mean_mib(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario
.resource_usage_stats
.as_ref()?
.heap_used_bytes
.as_ref()
.map(|stats| bytes_to_mib(stats.mean))
}
fn hotspot_total_cpu_mean_ms(scenario: &BenchmarkScenarioReport) -> Option<f64> {
scenario
.resource_usage_stats
.as_ref()?
.cpu_total_us
.as_ref()
.map(|stats| micros_to_ms(stats.mean))
}
#[cfg(test)]
mod tests {
use super::*;
use std::cell::RefCell;
use tempfile::tempdir;
fn synthetic_transport_reports() -> Vec<BenchmarkTransportRttReport> {
TRANSPORT_RTT_PAYLOAD_BYTES
.iter()
.enumerate()
.map(|(index, payload_bytes)| {
let sample = index as f64 + 1.0;
BenchmarkTransportRttReport {
channel: TRANSPORT_RTT_CHANNEL,
payload_bytes: *payload_bytes,
samples_ms: vec![sample],
stats: compute_stats(&[sample]),
}
})
.collect()
}
fn synthetic_scenario_report(
definition: ScenarioDefinition,
wall_sample_ms: f64,
) -> BenchmarkScenarioReport {
let context_setup_ms = wall_sample_ms / 5.0;
let startup_ms = wall_sample_ms / 4.0;
let guest_execution_ms = definition
.expect_import_metric
.then_some(wall_sample_ms / 3.0);
let completion_ms =
wall_sample_ms - context_setup_ms - startup_ms - guest_execution_ms.unwrap_or(0.0);
let startup_overhead_ms = definition
.expect_import_metric
.then_some(context_setup_ms + startup_ms + completion_ms);
let resource_usage_samples = BenchmarkResourceUsage {
rss_bytes: Some(vec![64.0 * 1024.0 * 1024.0]),
heap_used_bytes: Some(vec![12.0 * 1024.0 * 1024.0]),
cpu_user_us: None,
cpu_system_us: None,
cpu_total_us: Some(vec![wall_sample_ms * 1000.0]),
};
BenchmarkScenarioReport {
id: definition.id,
workload: definition.workload,
runtime: definition.runtime.label(),
mode: definition.mode.label(),
description: definition.description,
fixture: definition.fixture,
compile_cache: definition.compile_cache.label(),
wall_samples_ms: vec![wall_sample_ms],
wall_stats: compute_stats(&[wall_sample_ms]),
guest_import_samples_ms: guest_execution_ms.map(|sample| vec![sample]),
guest_import_stats: guest_execution_ms.map(|sample| compute_stats(&[sample])),
startup_overhead_samples_ms: startup_overhead_ms.map(|sample| vec![sample]),
startup_overhead_stats: startup_overhead_ms.map(|sample| compute_stats(&[sample])),
phase_samples_ms: BenchmarkScenarioPhases {
context_setup_ms: vec![context_setup_ms],
startup_ms: vec![startup_ms],
guest_execution_ms: guest_execution_ms.map(|sample| vec![sample]),
completion_ms: vec![completion_ms],
},
phase_stats: BenchmarkScenarioPhases {
context_setup_ms: compute_stats(&[context_setup_ms]),
startup_ms: compute_stats(&[startup_ms]),
guest_execution_ms: guest_execution_ms.map(|sample| compute_stats(&[sample])),
completion_ms: compute_stats(&[completion_ms]),
},
resource_usage_stats: compute_resource_usage_stats(&resource_usage_samples),
resource_usage_samples: Some(resource_usage_samples),
}
}
fn synthetic_host() -> BenchmarkHost {
BenchmarkHost {
node_binary: String::from("node"),
node_version: String::from("v22.0.0"),
os: "linux",
arch: "x86_64",
logical_cpus: 8,
}
}
#[test]
fn javascript_benchmark_orchestration_resumes_completed_stages_from_run_state() {
let tempdir = tempdir().expect("create tempdir");
let repo_root = tempdir.path().join("repo");
let artifact_dir = tempdir.path().join("artifacts");
fs::create_dir_all(&repo_root).expect("create repo root");
let config = JavascriptBenchmarkConfig {
iterations: 1,
warmup_iterations: 0,
};
let host = synthetic_host();
let definitions = benchmark_scenarios();
let mut state = StoredBenchmarkRunState::new(&config, &host, &repo_root);
state.record_transport_rtt(&synthetic_transport_reports());
state.record_scenario(&synthetic_scenario_report(definitions[0], 10.0));
persist_benchmark_run_state(&benchmark_run_state_path(&artifact_dir), &state)
.expect("persist initial run state");
let transport_calls = RefCell::new(0usize);
let scenario_calls = RefCell::new(Vec::new());
let (report, resumed_stage_count, _) = orchestrate_javascript_benchmark_report(
&config,
&repo_root,
&host,
&artifact_dir,
|| {
*transport_calls.borrow_mut() += 1;
Ok(synthetic_transport_reports())
},
|definition| {
scenario_calls.borrow_mut().push(definition.id.to_owned());
Ok(synthetic_scenario_report(definition, 20.0))
},
)
.expect("resume benchmark orchestration");
assert_eq!(resumed_stage_count, 2);
assert_eq!(*transport_calls.borrow(), 0);
assert_eq!(
scenario_calls.borrow().as_slice(),
&definitions[1..]
.iter()
.map(|definition| definition.id.to_owned())
.collect::<Vec<_>>()
);
assert_eq!(
report.transport_rtt.len(),
TRANSPORT_RTT_PAYLOAD_BYTES.len()
);
assert_eq!(report.scenarios.len(), definitions.len());
assert_eq!(report.scenarios[0].id, definitions[0].id);
assert_eq!(report.scenarios[1].id, definitions[1].id);
}
#[test]
fn javascript_benchmark_orchestration_persists_completed_stages_before_failure() {
let tempdir = tempdir().expect("create tempdir");
let repo_root = tempdir.path().join("repo");
let artifact_dir = tempdir.path().join("artifacts");
fs::create_dir_all(&repo_root).expect("create repo root");
let config = JavascriptBenchmarkConfig {
iterations: 1,
warmup_iterations: 0,
};
let host = synthetic_host();
let state_path = benchmark_run_state_path(&artifact_dir);
let failure = orchestrate_javascript_benchmark_report(
&config,
&repo_root,
&host,
&artifact_dir,
|| Ok(synthetic_transport_reports()),
|definition| {
if definition.id == "cold-local-import" {
Err(JavascriptBenchmarkError::InvalidConfig("synthetic failure"))
} else {
Ok(synthetic_scenario_report(definition, 15.0))
}
},
)
.expect_err("expected synthetic orchestration failure");
assert!(matches!(
failure,
JavascriptBenchmarkError::InvalidConfig("synthetic failure")
));
let stored_state = serde_json::from_str::<StoredBenchmarkRunState>(
&fs::read_to_string(&state_path).expect("read persisted run state"),
)
.expect("parse persisted run state");
assert!(stored_state.transport_rtt.is_some());
assert_eq!(
stored_state
.scenarios
.iter()
.map(|scenario| scenario.id.as_str())
.collect::<Vec<_>>(),
vec!["isolate-startup", "prewarmed-isolate-startup"]
);
}
}