import csv
import sys
import math
from pathlib import Path
from collections import defaultdict
OP_LABELS = {
"rms_and_peak": "rms + peak",
"stereo_to_mono": "stereo → mono",
"mono_to_stereo": "mono → stereo",
"format_f32_to_i16": "f32 → i16",
"format_i16_to_f32": "i16 → f32",
"lowpass_1000hz_order2": "lowpass (order-2)",
"highpass_1000hz_order2": "highpass (order-2)",
"resample_44100_to_16000": "resample 44.1 → 16 kHz",
"scale_by_half": "scale × 0.5",
"clip": "clip [−0.5, 0.5]",
"normalize_peak": "normalize (peak)",
"trim_middle": "trim (middle 50 %)",
"pad_end": "pad (+ 0.5 s end)",
"fade_in": "fade-in (0.5 s)",
}
COMPARISONS = {
"rms_and_peak": [("audio_samples", "c_native")],
"stereo_to_mono": [("audio_samples", "ffmpeg_swresample")],
"mono_to_stereo": [("audio_samples", "ffmpeg_swresample")],
"format_f32_to_i16": [("audio_samples", "ffmpeg_swresample")],
"format_i16_to_f32": [("audio_samples", "ffmpeg_swresample")],
"lowpass_1000hz_order2": [("audio_samples_butterworth", "ffmpeg_lowpass_order2")],
"highpass_1000hz_order2": [("audio_samples_butterworth", "ffmpeg_highpass_order2")],
"resample_44100_to_16000": [
("audio_samples_fast", "ffmpeg_default"), ("audio_samples_medium", "ffmpeg_soxr"),
("audio_samples_high", "ffmpeg_soxr"),
],
"scale_by_half": [("audio_samples", "c_native")],
"clip": [("audio_samples", "c_native")],
"normalize_peak": [("audio_samples", "c_native")],
"trim_middle": [("audio_samples", "c_native")],
"pad_end": [("audio_samples", "c_native")],
"fade_in": [("audio_samples", "c_native")],
}
IMPL_ORDER = {
"rms_and_peak": ["c_native", "audio_samples"],
"stereo_to_mono": ["ffmpeg_swresample", "audio_samples"],
"mono_to_stereo": ["ffmpeg_swresample", "audio_samples"],
"format_f32_to_i16": ["ffmpeg_swresample", "audio_samples"],
"format_i16_to_f32": ["ffmpeg_swresample", "audio_samples"],
"lowpass_1000hz_order2": ["ffmpeg_lowpass_order2", "audio_samples_butterworth"],
"highpass_1000hz_order2": ["ffmpeg_highpass_order2", "audio_samples_butterworth"],
"resample_44100_to_16000": [
"ffmpeg_default", "ffmpeg_soxr",
"audio_samples_fast", "audio_samples_medium", "audio_samples_high",
],
"scale_by_half": ["c_native", "audio_samples"],
"clip": ["c_native", "audio_samples"],
"normalize_peak": ["c_native", "audio_samples"],
"trim_middle": ["c_native", "audio_samples"],
"pad_end": ["c_native", "audio_samples"],
"fade_in": ["c_native", "audio_samples"],
}
OUR_IMPL_PREFIX = "audio_samples"
IMPL_LABELS = {
"c_native": "C (gcc -O3 -march=native -ffast-math)",
"ffmpeg_swresample": "FFmpeg (libswresample)",
"ffmpeg_lowpass_order2": "FFmpeg (libavfilter)",
"ffmpeg_highpass_order2": "FFmpeg (libavfilter)",
"ffmpeg_default": "FFmpeg (default)",
"ffmpeg_soxr": "FFmpeg (SoXr)",
"audio_samples": "audio_samples",
"audio_samples_butterworth": "audio_samples",
"audio_samples_fast": "audio_samples (fast)",
"audio_samples_medium": "audio_samples (medium)",
"audio_samples_high": "audio_samples (high)",
}
IMPL_COLORS = {
"c_native": "#2ca02c",
"ffmpeg_swresample": "#DD8452",
"ffmpeg_lowpass_order2": "#c45e28",
"ffmpeg_highpass_order2": "#c45e28",
"ffmpeg_default": "#DD8452",
"ffmpeg_soxr": "#a04020",
"audio_samples": "#4C72B0",
"audio_samples_butterworth": "#4C72B0",
"audio_samples_fast": "#8ab0d8",
"audio_samples_medium": "#4C72B0",
"audio_samples_high": "#2d4f82",
}
def load(path: Path) -> list[dict]:
rows = []
with path.open() as f:
for row in csv.DictReader(f):
row["duration_s"] = int(row["duration_s"])
row["n_samples"] = int(row["n_samples"])
row["iterations"] = int(row["iterations"])
row["warmup"] = int(row["warmup"])
row["min_us"] = float(row["min_us"])
row["mean_us"] = float(row["mean_us"])
row["median_us"] = float(row["median_us"])
row["max_us"] = float(row["max_us"])
row["stddev_us"] = float(row["stddev_us"])
rows.append(row)
return rows
def op_prefix(operation: str) -> str:
parts = operation.rsplit("_", 1)
if len(parts) == 2 and parts[1].endswith("s") and parts[1][:-1].isdigit():
return parts[0]
return operation
def group(rows: list[dict]):
data: dict[str, dict[int, dict[str, dict]]] = defaultdict(lambda: defaultdict(dict))
for row in rows:
prefix = op_prefix(row["operation"])
if prefix not in OP_LABELS:
continue
data[prefix][row["duration_s"]][row["implementation"]] = row
return data
def fmt_us(v: float) -> str:
if v >= 1000:
return f"{v/1000:.1f} ms"
if v >= 10:
return f"{v:.0f} µs"
return f"{v:.2f} µs"
def ratio_str(ours: float, ref: float) -> str:
r = ours / ref
if r <= 1.05:
label = f"**{1/r:.1f}× faster**" if r < 0.95 else "**≈ tied**"
else:
label = f"{r:.1f}× slower"
return label
def _quality_suffix(impl: str) -> str:
for q in ("_fast", "_medium", "_high"):
if impl.endswith(q):
return f" ({q[1:]})"
return ""
def markdown_table(data) -> str:
lines = [
"## Benchmark Summary\n",
"Metric: **median latency** (lower is better).\n",
"| Operation | Duration | audio_samples | Reference | vs Reference |",
"|---|---|---|---|---|",
]
for prefix in OP_LABELS:
if prefix not in data:
continue
op_label = OP_LABELS[prefix]
pairs = COMPARISONS.get(prefix, [])
dur_map = data[prefix]
for dur in sorted(dur_map):
impls = dur_map[dur]
for our_impl, ref_impl in pairs:
our_row = impls.get(our_impl)
ref_row = impls.get(ref_impl)
if our_row is None or ref_row is None:
continue
our_med = our_row["median_us"]
ref_med = ref_row["median_us"]
ref_name = IMPL_LABELS.get(ref_impl, ref_impl)
row_label = op_label + _quality_suffix(our_impl)
lines.append(
f"| {row_label} | {dur}s | {fmt_us(our_med)} "
f"| {fmt_us(ref_med)} ({ref_name}) "
f"| {ratio_str(our_med, ref_med)} |"
)
return "\n".join(lines)
def _impl_color(impl: str) -> str:
return IMPL_COLORS.get(impl, "#888888")
def plot(data, out_path: Path) -> None:
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
except ImportError:
print("matplotlib not available — skipping plot (pip install matplotlib)", file=sys.stderr)
return
prefixes = [p for p in OP_LABELS if p in data]
n_ops = len(prefixes)
ncols = 2
nrows = math.ceil(n_ops / ncols)
fig, axes = plt.subplots(nrows, ncols, figsize=(13, 3.5 * nrows))
axes = axes.flatten() if n_ops > 1 else [axes]
for ax, prefix in zip(axes, prefixes):
dur_map = data[prefix]
durations = sorted(dur_map)
order = IMPL_ORDER.get(prefix)
if order is None:
seen = {}
for d in durations:
for k in dur_map[d]:
seen[k] = True
order = list(seen)
avail = {impl for d in durations for impl in dur_map[d]}
impls = [im for im in order if im in avail]
n_impls = len(impls)
width = min(0.3, 0.75 / n_impls)
offsets = np.arange(n_impls) * width - (n_impls - 1) * width / 2
x = np.arange(len(durations))
labels = [f"{d}s" for d in durations]
for impl, offset in zip(impls, offsets):
medians = [dur_map[d].get(impl, {}).get("median_us", float("nan")) for d in durations]
errs = [dur_map[d].get(impl, {}).get("stddev_us", 0) for d in durations]
color = _impl_color(impl)
label = IMPL_LABELS.get(impl, impl)
ax.bar(x + offset, medians, width,
label=label, color=color, alpha=0.87)
if n_impls == 2:
ref_impl, our_impl = impls[0], impls[1]
for xi, dur in enumerate(durations):
rm = dur_map[dur].get(ref_impl, {}).get("median_us", float("nan"))
om = dur_map[dur].get(our_impl, {}).get("median_us", float("nan"))
if math.isnan(rm) or math.isnan(om) or rm == 0:
continue
r = om / rm
ymax = max(rm, om)
label = f"{1/r:.1f}×↑" if r < 0.95 else (f"{r:.1f}×↓" if r > 1.05 else "≈")
color = _impl_color(our_impl) if r < 0.95 else ("red" if r > 1.05 else "gray")
ax.text(xi, ymax * 1.06, label, ha="center", va="bottom",
fontsize=8, color=color, fontweight="bold")
ax.set_title(OP_LABELS[prefix], fontsize=11, fontweight="bold")
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_ylabel("Median latency (µs)")
ax.legend(fontsize=7, loc="upper left")
ax.yaxis.set_major_formatter(ticker.FuncFormatter(
lambda v, _: f"{v/1000:.0f} ms" if v >= 1000 else f"{v:.0f} µs"
))
ax.set_ylim(bottom=0)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.grid(axis="y", linestyle="--", alpha=0.4)
for ax in axes[n_ops:]:
ax.set_visible(False)
fig.suptitle("audio_samples vs FFmpeg/C — median latency (lower is better)",
fontsize=13, fontweight="bold", y=1.01)
fig.tight_layout()
fig.savefig(out_path, dpi=300, bbox_inches="tight")
fig.savefig(out_path.with_suffix(".pdf"), dpi=300, bbox_inches="tight")
print(f"Chart saved to {out_path}", file=sys.stderr)
def main() -> None:
csv_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).parent / "results.csv"
if not csv_path.exists():
sys.exit(f"results.csv not found at {csv_path}. Run benchmarks/run.sh first.")
rows = load(csv_path)
data = group(rows)
print(markdown_table(data))
out_png = csv_path.with_suffix(".png")
plot(data, out_png)
if __name__ == "__main__":
main()