import argparse
import subprocess
import sys
from statistics import median
from typing import Dict, List, Optional
def run_benchmark_docker() -> str:
cmd = [
"docker", "run", "--rm",
"--privileged",
"--cpuset-cpus=0,1",
"--memory=4g",
"--memory-swappiness=0",
"clockworker-bench"
]
result = subprocess.run(cmd, capture_output=True, text=True)
return result.stdout + result.stderr
def run_benchmark_local() -> str:
cmd = ["cargo", "bench", "--bench", "priority", "--", "--nocapture"]
result = subprocess.run(cmd, capture_output=True, text=True)
return result.stdout + result.stderr
def parse_benchmark_output(output: str) -> Dict[str, List[float]]:
metrics = {}
for line in output.split('\n'):
line = line.strip()
if not line.startswith('|'):
continue
if 'name' in line.lower() or '---' in line:
continue
parts = [p.strip() for p in line.split('|')[1:-1]]
if len(parts) < 6:
continue
metric = parts[0]
if not metric or metric == 'name':
continue
try:
values = []
for p in parts[1:6]:
p = p.replace('ms', '').replace('/s', '').strip()
values.append(float(p))
metrics[metric] = values
except (ValueError, IndexError):
continue
return metrics
def aggregate_runs(all_runs: List[Dict[str, List[float]]]) -> Dict[str, List[float]]:
if not all_runs:
return {}
aggregated = {}
all_metrics = set()
for run in all_runs:
all_metrics.update(run.keys())
for metric in all_metrics:
medians = []
for config_idx in range(5): values = [
run[metric][config_idx]
for run in all_runs
if metric in run and len(run[metric]) > config_idx
]
if values:
medians.append(median(values))
else:
medians.append(None)
aggregated[metric] = medians
return aggregated
def format_value(value: Optional[float], metric: str, baseline: Optional[float] = None) -> str:
if value is None:
return "-"
if 'bg_iter' in metric:
formatted = f"{value:.0f}"
else:
formatted = f"{value:.2f}ms"
if baseline is not None and baseline > 0:
pct_change = ((value - baseline) / baseline) * 100
if pct_change > 0:
formatted += f" (+{pct_change:.0f}%)"
elif pct_change < 0:
formatted += f" ({pct_change:.0f}%)"
else:
formatted += " (0%)"
return formatted
def print_table(aggregated: Dict[str, List[float]], show_pct: bool = True,
include_queue: bool = True, include_latency: bool = True,
include_throughput: bool = True):
orig_configs = ['CW (fg+bg)', 'CW (fg only)', 'Tokio (fg+bg)', 'Tokio (fg only)', 'Two-RT/OS']
new_order = [3, 0, 1, 2, 4] configs = [orig_configs[i] for i in new_order]
baseline_idx = 0
all_metrics = [
('p50_queue_delay', 'p50 queue delay', 'queue'),
('p90_queue_delay', 'p90 queue delay', 'queue'),
('p99_queue_delay', 'p99 queue delay', 'queue'),
('p50_total_latency', 'p50 total latency', 'latency'),
('p90_total_latency', 'p90 total latency', 'latency'),
('p99_total_latency', 'p99 total latency', 'latency'),
('bg_iter_s', 'BG throughput', 'throughput'),
]
metrics_to_show = []
for key, name, category in all_metrics:
if category == 'queue' and not include_queue:
continue
if category == 'latency' and not include_latency:
continue
if category == 'throughput' and not include_throughput:
continue
metrics_to_show.append((key, name))
col_widths = [18] + [22] * 5
header = "| {:^{}} |".format("Metric", col_widths[0])
for i, config in enumerate(configs):
header += " {:^{}} |".format(config, col_widths[i+1])
print(header)
sep = "|" + "|".join(["-" * (w + 2) for w in col_widths]) + "|"
print(sep)
for metric_key, metric_name in metrics_to_show:
if metric_key not in aggregated:
continue
orig_values = aggregated[metric_key]
values = [orig_values[i] if i < len(orig_values) else None for i in new_order]
baseline = values[baseline_idx] if show_pct else None
row = "| {:^{}} |".format(metric_name, col_widths[0])
for i, val in enumerate(values):
if i == baseline_idx or 'bg_iter' in metric_key:
formatted = format_value(val, metric_key, None)
else:
formatted = format_value(val, metric_key, baseline)
row += " {:^{}} |".format(formatted, col_widths[i+1])
print(row)
def main():
parser = argparse.ArgumentParser(
description='Aggregate priority benchmark results',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --runs 3 --docker # Run 3 times in Docker
%(prog)s --runs 3 --docker --no-queue # Exclude queue delay metrics
%(prog)s --runs 3 --docker --latency-only # Only show total latency metrics
%(prog)s --runs 3 --docker --no-pct # Hide percentage changes
"""
)
parser.add_argument('--runs', type=int, default=3, help='Number of benchmark runs')
parser.add_argument('--docker', action='store_true', help='Run in Docker')
parser.add_argument('--no-pct', action='store_true', help='Hide percentage changes')
parser.add_argument('--no-queue', action='store_true', help='Exclude queue delay metrics')
parser.add_argument('--no-latency', action='store_true', help='Exclude total latency metrics')
parser.add_argument('--no-throughput', action='store_true', help='Exclude throughput metrics')
parser.add_argument('--latency-only', action='store_true', help='Only show total latency + throughput')
parser.add_argument('--queue-only', action='store_true', help='Only show queue delay + throughput')
args = parser.parse_args()
include_queue = not args.no_queue
include_latency = not args.no_latency
include_throughput = not args.no_throughput
if args.latency_only:
include_queue = False
include_latency = True
include_throughput = True
if args.queue_only:
include_queue = True
include_latency = False
include_throughput = True
all_runs = []
if sys.stdin.isatty() or args.docker or args.runs > 0:
for i in range(args.runs):
print(f"Running benchmark {i+1}/{args.runs}...", file=sys.stderr)
if args.docker:
output = run_benchmark_docker()
else:
output = run_benchmark_local()
metrics = parse_benchmark_output(output)
if metrics:
all_runs.append(metrics)
print(f" Collected {len(metrics)} metrics", file=sys.stderr)
else:
print(f" Warning: No metrics parsed from run {i+1}", file=sys.stderr)
if not all_runs and not sys.stdin.isatty():
output = sys.stdin.read()
if '========== RUN' in output:
parts = output.split('========== RUN')
for part in parts[1:]: metrics = parse_benchmark_output(part)
if metrics:
all_runs.append(metrics)
else:
metrics = parse_benchmark_output(output)
if metrics:
all_runs.append(metrics)
if not all_runs:
print("Error: No benchmark data collected", file=sys.stderr)
sys.exit(1)
print(f"\nAggregating {len(all_runs)} runs (median):\n", file=sys.stderr)
aggregated = aggregate_runs(all_runs)
print_table(
aggregated,
show_pct=not args.no_pct,
include_queue=include_queue,
include_latency=include_latency,
include_throughput=include_throughput
)
if __name__ == '__main__':
main()