delaunay 0.7.2 - Docs.rs

name: Performance Regression Testing

# Run performance regression testing on important changes
# Security: All GitHub context variables are passed through safe environment variables
# to prevent code injection attacks via malicious branch names or commit data
on:
  # Manual trigger
  workflow_dispatch:

  # Pull requests to main branch
  pull_request:
    branches:
      - main
    # Only run on changes that could affect performance
    paths:
      - "src/**"
      - "benches/**"
      - "Cargo.toml"
      - "Cargo.lock"

  # On pushes to main branch
  push:
    branches:
      - main
    # Only run on changes that could affect performance
    paths:
      - "src/**"
      - "benches/**"
      - "Cargo.toml"
      - "Cargo.lock"

# Security: Define minimal required permissions
permissions:
  contents: read
  actions: read
  pull-requests: read

concurrency:
  group: perf-regress-${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

env:
  CARGO_TERM_COLOR: always
  RUST_BACKTRACE: 1
  BENCHMARK_TIMEOUT: 7200 # 2 hours is sufficient when compare runs in --dev mode
  DELAUNAY_BENCH_SEED_SEARCH_LIMIT: 4096 # allows ci_performance_suite to skip pathological deterministic seeds

jobs:
  performance-regression:
    runs-on: macos-15
    timeout-minutes: 135 # Allow 90min benchmark timeout + 15min for setup/teardown

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0 # required to diff against BASELINE_COMMIT

      - name: Install Rust toolchain
        uses: actions-rust-lang/setup-rust-toolchain@a0b538fa0b742a6aa35d6e2c169b4bd06d225a98 # v1.15.3
        with:
          cache: true
          # Toolchain from rust-toolchain.toml; no extra target needed on macOS-ARM

      - name: Install uv (Python package manager)
        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # v7.3.1
        with:
          version: "latest"

      - name: Verify uv installation
        run: uv --version

      - name: Find baseline artifact (latest semver tag baseline)
        id: find_baseline
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          script: |
            try {
              const prefix = 'performance-baseline-';
              // Accept both legacy dotful names (vX.Y.Z) and dotless names (vX_Y_Z).
              const semverRe = /^performance-baseline-v(\d+)[._](\d+)[._](\d+)(?:[._-].*)?$/;

              const parseSemver = (name) => {
                const m = name.match(semverRe);
                if (!m) return null;
                return [parseInt(m[1], 10), parseInt(m[2], 10), parseInt(m[3], 10)];
              };

              const compareSemver = (a, b) => {
                for (let i = 0; i < 3; i += 1) {
                  if (a[i] !== b[i]) return a[i] - b[i];
                }
                return 0;
              };

              // Fetch successful generate-baseline.yml runs (most recent first)
              // Note: Each run requires a follow-up listWorkflowRunArtifacts call, so keep this cap conservative
              // to avoid slowdowns and API rate limits.
              const MAX_RUNS = 50;
              console.log(`Fetching recent generate-baseline.yml runs (up to ${MAX_RUNS})...`);
              let count = 0;
              const runs = await github.paginate(
                github.rest.actions.listWorkflowRuns,
                {
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  workflow_id: 'generate-baseline.yml',
                  status: 'completed',
                  conclusion: 'success',
                  per_page: 100
                },
                (response, done) => {
                  // Limit to MAX_RUNS runs total across pages (no overshoot)
                  const remaining = Math.max(0, MAX_RUNS - count);
                  if (remaining === 0) { done(); return []; }
                  const slice = response.data.slice(0, remaining);
                  count += slice.length;
                  if (count >= MAX_RUNS) done();
                  return slice;
                }
              );

              console.log(`Found ${runs.length} successful generate-baseline runs`);

              // Build artifact cache: artifact name → {run_id, run_created_at}
              // Note: We keep the *newest* run for a given artifact name (runs are newest-first).
              const artifactCache = new Map();
              for (const run of runs) {
                try {
                  const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
                    owner: context.repo.owner,
                    repo: context.repo.repo,
                    run_id: run.id
                  });

                  for (const artifact of artifacts.data.artifacts) {
                    if (artifact.expired === true) continue;
                    if (!artifact.name.startsWith(prefix)) continue;

                    if (!artifactCache.has(artifact.name)) {
                      artifactCache.set(artifact.name, {
                        run_id: run.id,
                        run_created_at: run.created_at
                      });
                    }
                  }
                } catch (error) {
                  console.log(`Warning: Could not fetch artifacts for run ${run.id}: ${error.message}`);
                  continue;
                }
              }

              console.log(`Built cache of ${artifactCache.size} baseline artifacts`);

              // Prefer the highest semver tag baseline present in the cache.
              let best = null;
              for (const [artifactName, artifactInfo] of artifactCache.entries()) {
                const ver = parseSemver(artifactName);
                if (!ver) continue;

                const tag = artifactName.slice(prefix.length);
                if (!best || compareSemver(ver, best.ver) > 0) {
                  best = { name: artifactName, info: artifactInfo, ver, tag };
                }
              }

              if (best) {
                console.log(`Selected baseline ${best.name} (tag ${best.tag}) from run ${best.info.run_id}`);
                core.setOutput('found', 'true');
                core.setOutput('artifact_name', best.name);
                core.setOutput('run_id', best.info.run_id.toString());
                core.setOutput('tag', best.tag);
                return;
              }

              // Fallback: pick the most recent baseline artifact (including manual runs)
              if (artifactCache.size > 0) {
                let mostRecent = null;
                let mostRecentTime = null;

                for (const [artifactName, artifactInfo] of artifactCache.entries()) {
                  const runTime = new Date(artifactInfo.run_created_at);
                  if (!mostRecentTime || runTime > mostRecentTime) {
                    mostRecentTime = runTime;
                    mostRecent = { name: artifactName, info: artifactInfo };
                  }
                }

                if (mostRecent) {
                  console.log(
                    `Fallback: selected most recent baseline ${mostRecent.name} in run ${mostRecent.info.run_id} ` +
                    `(created: ${mostRecent.info.run_created_at})`
                  );
                  core.setOutput('found', 'true');
                  core.setOutput('artifact_name', mostRecent.name);
                  core.setOutput('run_id', mostRecent.info.run_id.toString());
                  core.setOutput('tag', mostRecent.name.slice(prefix.length));
                  return;
                }
              }

              console.log('No baseline artifacts found');
              core.setOutput('found', 'false');
            } catch (error) {
              console.error(`Error searching for baseline artifacts: ${error.message}`);
              core.setOutput('found', 'false');
            }

      - name: Download latest baseline artifact
        if: steps.find_baseline.outputs.found == 'true'
        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
        continue-on-error: true
        with:
          name: ${{ steps.find_baseline.outputs.artifact_name }}
          path: baseline-artifact/
          run-id: ${{ steps.find_baseline.outputs.run_id }}
          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Prepare baseline for comparison
        if: steps.find_baseline.outputs.found == 'true'
        run: uv run benchmark-utils prepare-baseline

      - name: Set baseline status if none found
        if: steps.find_baseline.outputs.found != 'true'
        run: uv run benchmark-utils set-no-baseline

      - name: Skip benchmarks - no baseline available
        if: env.BASELINE_EXISTS != 'true'
        run: uv run benchmark-utils display-no-baseline

      - name: Run performance regression test (compare vs tag baseline)
        id: compare_regression
        if: env.BASELINE_EXISTS == 'true'
        continue-on-error: true
        run: |
          set -euo pipefail

          # Ensure regression-summary reports this as a real run.
          echo "SKIP_BENCHMARKS=false" >> "$GITHUB_ENV"
          echo "SKIP_REASON=running" >> "$GITHUB_ENV"

          echo "   Baseline origin: ${BASELINE_ORIGIN:-unknown}"
          echo "   Baseline tag:    ${BASELINE_TAG:-unknown}"

          uv run benchmark-utils compare \
            --baseline "baseline-artifact/baseline_results.txt" \
            --bench-timeout "${BENCHMARK_TIMEOUT}" \
            --dev
      - name: Classify benchmark comparison outcome
        if: env.BASELINE_EXISTS == 'true' && env.SKIP_BENCHMARKS == 'false'
        run: |
          set -euo pipefail

          results_file="benches/compare_results.txt"

          # Successful compare step => no regressions beyond configured threshold.
          if [ "${{ steps.compare_regression.outcome }}" = "success" ]; then
            echo "BENCHMARK_REGRESSION_DETECTED=false" >> "$GITHUB_ENV"
            exit 0
          fi

          # Compare step failed. Distinguish "expected regression" from real benchmark errors.
          if [ ! -f "$results_file" ]; then
            echo "::error::Benchmark comparison failed and produced no results file."
            exit 1
          fi

          if grep -q "❌ Error:" "$results_file"; then
            echo "::error::Benchmark comparison failed due to benchmark execution error."
            echo "::group::Benchmark comparison error details"
            cat "$results_file"
            echo "::endgroup::"
            exit 1
          fi

          if grep -q "REGRESSION" "$results_file"; then
            echo "BENCHMARK_REGRESSION_DETECTED=true" >> "$GITHUB_ENV"
            warning_msg="Performance regressions detected vs baseline ${BASELINE_TAG:-unknown};"
            warning_msg="${warning_msg} workflow allowed to pass by policy."
            echo "::warning::${warning_msg}"
            {
              echo "### ⚠️ Performance Regression Detected"
              echo ""
              echo "- Baseline tag: \`${BASELINE_TAG:-unknown}\`"
              echo "- Policy: regressions are warning-only in this workflow."
              echo "- See uploaded artifact \`performance-regression-results-${{ github.run_number }}\`"
              echo "  and logs for details."
            } >> "$GITHUB_STEP_SUMMARY"
            exit 0
          fi

          echo "::error::Benchmark comparison failed for an unknown reason."
          echo "::group::Benchmark comparison output"
          cat "$results_file"
          echo "::endgroup::"
          exit 1

      - name: Display regression test results
        if: env.BASELINE_EXISTS == 'true' && env.SKIP_BENCHMARKS == 'false' && always()
        run: uv run benchmark-utils display-results

      - name: Upload regression test results
        if: env.BASELINE_EXISTS == 'true' && env.SKIP_BENCHMARKS == 'false' && always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: performance-regression-results-${{ github.run_number }}
          path: |
            benches/compare_results.txt
            baseline-artifact/baseline_results.txt
          if-no-files-found: warn
          retention-days: 30

      - name: Summary
        if: always()
        run: uv run benchmark-utils regression-summary