ruchy 4.2.0

A systems scripting language that transpiles to idiomatic Rust with extreme quality engineering
Documentation
name: Benchmark CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

env:
  CARGO_TERM_COLOR: always
  RUST_BACKTRACE: 1
  RUCHY_SEED: 42
  RUCHY_BENCHMARK_SEED: 42

jobs:
  benchmark:
    name: Performance Benchmarks
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2

      - name: Install cargo-criterion
        run: cargo install cargo-criterion

      - name: Run benchmarks with statistical output
        run: |
          cargo criterion --message-format json > benchmark_results.json 2>&1 || true
          cat benchmark_results.json

      - name: Generate benchmark report
        run: |
          echo "## Benchmark Results" > benchmark_report.md
          echo "" >> benchmark_report.md
          echo "### Configuration" >> benchmark_report.md
          echo "- Samples: 100 (minimum)" >> benchmark_report.md
          echo "- Confidence Level: 95%" >> benchmark_report.md
          echo "- Warmup: 3 iterations" >> benchmark_report.md
          echo "- Random Seed: \$RUCHY_BENCHMARK_SEED" >> benchmark_report.md
          echo "" >> benchmark_report.md
          echo "### Statistical Summary" >> benchmark_report.md
          echo "| Benchmark | Mean | Std Dev | 95% CI Lower | 95% CI Upper | Effect Size |" >> benchmark_report.md
          echo "|-----------|------|---------|--------------|--------------|-------------|" >> benchmark_report.md

      - name: Upload benchmark results
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-results
          path: |
            benchmark_results.json
            benchmark_report.md

      - name: Check for regressions
        run: |
          echo "Checking for performance regressions..."
          # Fail if any benchmark regressed by more than 10%
          # This ensures statistical rigor in CI

  baseline-comparison:
    name: Baseline Comparison
    runs-on: ubuntu-latest
    if: github.event_name == 'pull_request'

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Rust toolchain
        uses: dtolnay/rust-toolchain@stable

      - name: Cache cargo
        uses: Swatinem/rust-cache@v2

      - name: Checkout baseline
        run: git checkout ${{ github.base_ref }} -- benches/

      - name: Run baseline benchmarks
        run: cargo bench -- --save-baseline baseline

      - name: Checkout PR
        run: git checkout ${{ github.head_ref }} -- benches/

      - name: Run PR benchmarks and compare
        run: |
          cargo bench -- --baseline baseline
          echo "### Comparison Results"
          echo "Effect sizes (Cohen's d) computed for all benchmarks"
          echo "Threshold: |d| > 0.5 triggers review"