name: Benchmark suite
on:
schedule:
- cron: "0 3 * * *" workflow_dispatch:
permissions:
contents: write
jobs:
bench:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-bench-${{ hashFiles('Cargo.lock') }}
restore-keys: ${{ runner.os }}-bench-
- name: Install Node.js
uses: actions/setup-node@v4
with:
node-version: "20"
- name: Collect hardware info and check baseline
run: |
python3 - << 'PYEOF'
import json, os, pathlib, sys
cpu_model = "unknown"
cpu_count = os.cpu_count() or 0
mem_gb = 0
try:
lines = pathlib.Path("/proc/cpuinfo").read_text().splitlines()
for l in lines:
if l.startswith("model name"):
cpu_model = l.split(":", 1)[1].strip()
break
except Exception:
pass
try:
mem_kb = int(next(
l.split()[1] for l in pathlib.Path("/proc/meminfo").read_text().splitlines()
if l.startswith("MemTotal")
))
mem_gb = round(mem_kb / 1024 / 1024, 1)
except Exception:
pass
hw = {"cpu_model": cpu_model, "cpu_count": cpu_count, "mem_gb": mem_gb}
print(f"Hardware detected: {hw}")
baseline_path = pathlib.Path("bench/hw-baseline.json")
if not baseline_path.exists():
print("No hw-baseline.json found — seeding from current run.")
baseline_path.write_text(json.dumps(hw, indent=2) + "\n")
else:
baseline = json.loads(baseline_path.read_text())
mismatches = []
if baseline["cpu_model"] != hw["cpu_model"]:
mismatches.append(
f" cpu_model: baseline={baseline['cpu_model']!r} actual={hw['cpu_model']!r}"
)
if baseline["cpu_count"] != hw["cpu_count"]:
mismatches.append(
f" cpu_count: baseline={baseline['cpu_count']} actual={hw['cpu_count']}"
)
if abs(baseline["mem_gb"] - hw["mem_gb"]) > 0.5:
mismatches.append(
f" mem_gb: baseline={baseline['mem_gb']} actual={hw['mem_gb']}"
)
if mismatches:
print("HARDWARE MISMATCH — results rejected:")
for m in mismatches:
print(m)
print()
print("Re-seed by deleting bench/hw-baseline.json and re-running.")
sys.exit(1)
else:
print("Hardware matches baseline — proceeding.")
# Write hw info to a temp file so run.sh can embed it
pathlib.Path("bench/.hw-info.json").write_text(json.dumps(hw))
PYEOF
- name: Run benchmark suite
run: bash bench/run.sh --no-rust
- name: Compile Rust baselines and re-run
run: |
mkdir -p bench/.build
for d in bench/*/; do
b=$(basename "$d")
rs="$d$b.rs"
if [ -f "$rs" ]; then
rustc -O -o "bench/.build/${b}_rs" "$rs" || true
fi
done
bash bench/run.sh
- name: Check for regression (>10% vs previous)
run: |
if [ -f bench/results-prev.json ]; then
python3 - bench/results.json bench/results-prev.json << 'PYEOF'
import sys, json
cur_doc = json.load(open(sys.argv[1]))
prev_doc = json.load(open(sys.argv[2]))
# Skip comparison if hardware shape changed between runs
cur_hw = cur_doc.get("hardware", {})
prev_hw = prev_doc.get("hardware", {})
if cur_hw and prev_hw and cur_hw != prev_hw:
print(f"Hardware changed ({prev_hw} -> {cur_hw}); skipping regression check.")
sys.exit(0)
cur = cur_doc["benchmarks"]
prev = prev_doc["benchmarks"]
failures = []
for bench, langs in cur.items():
for lang, ns in langs.items():
prev_ns = prev.get(bench, {}).get(lang)
if prev_ns and ns > prev_ns * 1.10:
pct = (ns - prev_ns) / prev_ns * 100
failures.append(f" {bench}/{lang}: {prev_ns}ns -> {ns}ns (+{pct:.1f}%)")
if failures:
print("REGRESSION DETECTED (>10%):")
for f in failures:
print(f)
sys.exit(1)
else:
print("No regressions detected.")
PYEOF
fi
- name: Rotate results
run: |
cp bench/results.json bench/results-prev.json 2>/dev/null || true
- name: Commit updated results
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: "chore(bench): update nightly results [skip ci]"
file_pattern: "bench/results.json bench/results-prev.json bench/hw-baseline.json"
branch: main