#!/usr/bin/env bash
set -euo pipefail

usage() {
    cat <<'EOF'
Usage:
  scripts/update_readme_bench.sh [--check]

Reads Criterion outputs from target/criterion generated by:
  cargo bench --bench vs_bitvec

and updates the README benchmark table block delimited by:
  <!-- BENCH_TABLES:BEGIN -->
  <!-- BENCH_TABLES:END -->
EOF
}

CHECK_MODE=0
if [[ "${1-}" == "--check" ]]; then
    CHECK_MODE=1
elif [[ $# -gt 0 ]]; then
    usage
    exit 1
fi

SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd -- "${SCRIPT_DIR}/.." && pwd)"
README_PATH="${REPO_ROOT}/README.md"
START_MARKER="<!-- BENCH_TABLES:BEGIN -->"
END_MARKER="<!-- BENCH_TABLES:END -->"

if ! command -v jq >/dev/null 2>&1; then
    echo "jq is required but was not found in PATH." >&2
    exit 1
fi

median_ns() {
    local group="$1"
    local bench="$2"
    local file="${REPO_ROOT}/target/criterion/${group}/${bench}/new/estimates.json"
    if [[ ! -f "${file}" ]]; then
        cat >&2 <<EOF
Missing benchmark file:
  ${file}

Run:
  cargo bench --bench vs_bitvec
EOF
        exit 1
    fi
    jq -r '.median.point_estimate' "${file}"
}

format_time() {
    awk -v ns="$1" 'BEGIN {
        if (ns < 1000.0) {
            printf "%.2f ns", ns;
        } else if (ns < 1000000.0) {
            printf "%.2f us", ns / 1000.0;
        } else {
            printf "%.2f ms", ns / 1000000.0;
        }
    }'
}

format_speedup() {
    awk -v ours="$1" -v bitvec="$2" 'BEGIN {
        if (ours <= 0.0) {
            print "n/a";
            exit;
        }
        printf "%.1fx", bitvec / ours;
    }'
}

existing_bench_date() {
    awk -v start="${START_MARKER}" -v end="${END_MARKER}" '
        $0 == start {
            in_block = 1
            next
        }
        $0 == end {
            in_block = 0
        }
        in_block && match($0, /\*\*[0-9]{4}-[0-9]{2}-[0-9]{2}\*\*/) {
            print substr($0, RSTART + 2, RLENGTH - 4)
            exit
        }
    ' "${README_PATH}"
}

block_date() {
    if [[ "${CHECK_MODE}" -eq 1 ]]; then
        local existing
        existing="$(existing_bench_date || true)"
        if [[ -n "${existing}" ]]; then
            printf '%s\n' "${existing}"
            return
        fi
    fi
    date -u +%F
}

row() {
    local label="$1"
    local group="$2"
    local ours_bench="$3"
    local bitvec_bench="$4"
    local ours_ns
    local bitvec_ns
    ours_ns="$(median_ns "${group}" "${ours_bench}")"
    bitvec_ns="$(median_ns "${group}" "${bitvec_bench}")"
    printf '| `%s` | %s | %s | **%s** |\n' \
        "${label}" \
        "$(format_time "${ours_ns}")" \
        "$(format_time "${bitvec_ns}")" \
        "$(format_speedup "${ours_ns}" "${bitvec_ns}")"
}

generate_block() {
    local today
    today="$(block_date)"

    cat <<EOF
All numbers below are Criterion medians from \`cargo bench --bench vs_bitvec\` on Apple M-series (AArch64), collected on **${today}**. Compared against bitvec \`BitArray\` (non-atomic) and \`BitArray<AtomicU64>\` / \`BitVec<AtomicU64>\` (atomic).  
\`iter\` rows measure \`.iter().count()\`.

### Non-atomic: \`BitSet\` vs bitvec \`BitArray\`

**256-bit** (\`[u64; 4]\`):

| Operation | bitflagset | bitvec | Speedup |
|-----------|-----------|--------|---------|
EOF
    row "bitor" "256bit" "ours_bitor" "bitvec_bitor"
    row "bitand" "256bit" "ours_bitand" "bitvec_bitand"
    row "bitxor" "256bit" "ours_bitxor" "bitvec_bitxor"
    row "not" "256bit" "ours_not" "bitvec_not"
    row "iter" "256bit" "ours_iter" "bitvec_iter_ones"

    cat <<'EOF'

**1024-bit** (`[u64; 16]`):

| Operation | bitflagset | bitvec | Speedup |
|-----------|-----------|--------|---------|
EOF
    row "bitor" "1024bit" "ours_bitor" "bitvec_bitor"
    row "bitand" "1024bit" "ours_bitand" "bitvec_bitand"
    row "bitxor" "1024bit" "ours_bitxor" "bitvec_bitxor"
    row "not" "1024bit" "ours_not" "bitvec_not"
    row "iter" "1024bit" "ours_iter" "bitvec_iter_ones"

    cat <<'EOF'

Binary operators benefit from LLVM auto-vectorization of word-level loops into SIMD instructions.

### Atomic: `AtomicBitSet` vs bitvec `BitArray<AtomicU64>`

**256-bit** (`[AtomicU64; 4]`):

| Operation | bitflagset | bitvec | Speedup |
|-----------|-----------|--------|---------|
EOF
    row "len" "256bit_atomic_vs_bitvec" "atomic_len" "bitvec_count_ones"
    row "is_empty" "256bit_atomic_vs_bitvec" "atomic_is_empty" "bitvec_not_any"
    row "contains" "256bit_atomic_vs_bitvec" "atomic_contains" "bitvec_get"
    row "insert" "256bit_atomic_vs_bitvec" "atomic_insert" "bitvec_set_aliased"
    row "iter" "256bit_atomic_vs_bitvec" "atomic_iter" "bitvec_iter_ones"

    cat <<'EOF'

**1024-bit** (`[AtomicU64; 16]`):

| Operation | bitflagset | bitvec | Speedup |
|-----------|-----------|--------|---------|
EOF
    row "len" "1024bit_atomic_vs_bitvec" "atomic_len" "bitvec_count_ones"
    row "is_empty" "1024bit_atomic_vs_bitvec" "atomic_is_empty" "bitvec_not_any"
    row "contains" "1024bit_atomic_vs_bitvec" "atomic_contains" "bitvec_get"
    row "insert" "1024bit_atomic_vs_bitvec" "atomic_insert" "bitvec_set_aliased"
    row "iter" "1024bit_atomic_vs_bitvec" "atomic_iter" "bitvec_iter_ones"

    cat <<'EOF'

`is_empty` uses short-circuit evaluation (early return on first non-zero word).
EOF
}

replace_block() {
    local input="$1"
    local output="$2"
    local block_file="$3"
    awk -v start="${START_MARKER}" -v end="${END_MARKER}" -v block_file="${block_file}" '
        BEGIN {
            in_block = 0;
            seen_start = 0;
            seen_end = 0;
        }
        $0 == start {
            print;
            while ((getline line < block_file) > 0) {
                print line;
            }
            close(block_file);
            in_block = 1;
            seen_start = 1;
            next;
        }
        $0 == end {
            in_block = 0;
            seen_end = 1;
            print;
            next;
        }
        !in_block {
            print;
        }
        END {
            if (!seen_start || !seen_end) {
                print "Missing benchmark markers in README.md" > "/dev/stderr";
                exit 1;
            }
        }
    ' "${input}" > "${output}"
}

if [[ ! -f "${README_PATH}" ]]; then
    echo "README not found: ${README_PATH}" >&2
    exit 1
fi

block_tmp="$(mktemp)"
readme_tmp="$(mktemp)"
trap 'rm -f "${block_tmp}" "${readme_tmp}"' EXIT

generate_block > "${block_tmp}"
replace_block "${README_PATH}" "${readme_tmp}" "${block_tmp}"

if [[ "${CHECK_MODE}" -eq 1 ]]; then
    if cmp -s "${README_PATH}" "${readme_tmp}"; then
        echo "README benchmark tables are up to date."
        exit 0
    fi
    echo "README benchmark tables are stale. Run: scripts/update_readme_bench.sh" >&2
    diff -u "${README_PATH}" "${readme_tmp}" || true
    exit 1
fi

mv "${readme_tmp}" "${README_PATH}"
echo "Updated README benchmark tables from target/criterion."
