1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Benchmark regression check.
#
# Runs on PRs (non-draft) and can be called by other workflows (e.g. release.yml).
#
# How regression detection works:
# 1. baseline.yml saves a baseline JSON after each merge to main (cached by commit SHA).
# 2. This workflow restores that baseline and passes it via --baseline to fluxbench.
# If no cached baseline exists, one is generated from main on-the-fly.
# 3. Each benchmark has a per-bench threshold — regressions beyond this are flagged.
# 4. Exit codes are controlled by #[verify] expressions with severity levels:
# - critical: exits non-zero -> job fails -> PR blocked
# - warning: exits zero -> shows warnings in summary
# - info: logged in the summary only
name: Benchmark
on:
pull_request:
branches:
types:
workflow_call:
workflow_dispatch:
concurrency:
group: benchmark-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
jobs:
test:
name: Test Suite
if: github.event.pull_request.draft == false
uses: ./.github/workflows/test.yml
benchmark:
needs: test
name: Regression Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
with:
prefix-key: bench
- name: Build benchmarks
run: cargo build --bench ci_regression --release
# Restore the most recent baseline saved by baseline.yml on main.
# Uses prefix matching — the exact key won't match, but restore-keys
# picks the latest cache entry starting with "numr-bench-baseline-".
- name: Restore baseline from main
id: baseline-cache
uses: actions/cache/restore@v5
with:
path: target/fluxbench/baseline.json
key: numr-bench-baseline-dummy
restore-keys: numr-bench-baseline-
# If no cached baseline exists (expired or first run), generate one
# from main. Checks out main, runs benchmarks to save baseline, then
# returns to the PR branch.
- name: Generate baseline from main (if none cached)
if: steps.baseline-cache.outputs.cache-hit != 'true' && !hashFiles('target/fluxbench/baseline.json')
run: |
CURRENT_REF=$(git rev-parse HEAD)
git checkout origin/main
cargo bench --bench ci_regression -- --save-baseline
git checkout "$CURRENT_REF"
# --format github-summary: renders a markdown table for the step summary.
# --baseline (if file exists): enables regression comparison against main.
# Exit code reflects critical verification failures (see flux.toml: fail_on_critical).
- name: Run benchmarks
run: |
ARGS="--format github-summary"
if [ -f target/fluxbench/baseline.json ]; then
ARGS="$ARGS --baseline target/fluxbench/baseline.json"
fi
cargo bench --bench ci_regression -- $ARGS >> $GITHUB_STEP_SUMMARY