#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"

ARTIFACTS_DIR="${ARTIFACTS_DIR:-$ROOT_DIR/artifacts}"
BASELINE_JSON="${BASELINE_JSON:-$ARTIFACTS_DIR/perf_baseline_report.json}"
TUNED_JSON="${TUNED_JSON:-$ARTIFACTS_DIR/perf_tuned_report.json}"
OUT_JSON="${OUT_JSON:-$ARTIFACTS_DIR/perf_comparison_report.json}"
OUT_MD="${OUT_MD:-$ARTIFACTS_DIR/perf_comparison_report.md}"

extract_json_number() {
  local key="$1"
  local file="$2"
  grep -E "\"$key\"[[:space:]]*:" "$file" | head -n1 | sed -E 's/.*: ([^,}]+).*/\1/'
}

extract_workload_percentile() {
  local file="$1"
  local workload="$2"
  local metric="$3"
  grep -E "\"$workload\"[[:space:]]*:" "$file" | head -n1 | sed -E "s/.*\"$metric\": ([0-9.]+).*/\\1/"
}

percent_delta() {
  local baseline="$1"
  local tuned="$2"
  awk -v b="$baseline" -v t="$tuned" 'BEGIN {
    if (b == "" || t == "" || b == 0) { print "0.00"; } else { printf "%.2f", ((t - b) / b) * 100.0; }
  }'
}

if [[ ! -f "$BASELINE_JSON" ]]; then
  echo "missing baseline report: $BASELINE_JSON" >&2
  exit 1
fi
if [[ ! -f "$TUNED_JSON" ]]; then
  echo "missing tuned report: $TUNED_JSON" >&2
  exit 1
fi

TIMESTAMP_UTC="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
COMMIT_SHA="$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")"

B_FRAUD_P95="$(extract_workload_percentile "$BASELINE_JSON" "fraud" "p95")"
T_FRAUD_P95="$(extract_workload_percentile "$TUNED_JSON" "fraud" "p95")"
B_RECO_P95="$(extract_workload_percentile "$BASELINE_JSON" "recommendation" "p95")"
T_RECO_P95="$(extract_workload_percentile "$TUNED_JSON" "recommendation" "p95")"
B_SUPPLY_P95="$(extract_workload_percentile "$BASELINE_JSON" "supply_chain" "p95")"
T_SUPPLY_P95="$(extract_workload_percentile "$TUNED_JSON" "supply_chain" "p95")"
B_INGEST_EPS="$(extract_json_number "events_per_sec" "$BASELINE_JSON")"
T_INGEST_EPS="$(extract_json_number "events_per_sec" "$TUNED_JSON")"

D_FRAUD_P95="$(percent_delta "$B_FRAUD_P95" "$T_FRAUD_P95")"
D_RECO_P95="$(percent_delta "$B_RECO_P95" "$T_RECO_P95")"
D_SUPPLY_P95="$(percent_delta "$B_SUPPLY_P95" "$T_SUPPLY_P95")"
D_INGEST_EPS="$(percent_delta "$B_INGEST_EPS" "$T_INGEST_EPS")"

cat >"$OUT_JSON" <<EOF
{
  "timestamp_utc": "$TIMESTAMP_UTC",
  "commit_sha": "$COMMIT_SHA",
  "baseline_report": "$(basename "$BASELINE_JSON")",
  "tuned_report": "$(basename "$TUNED_JSON")",
  "deltas_percent": {
    "fraud_p95_latency": $D_FRAUD_P95,
    "recommendation_p95_latency": $D_RECO_P95,
    "supply_chain_p95_latency": $D_SUPPLY_P95,
    "ingest_events_per_sec": $D_INGEST_EPS
  },
  "raw": {
    "baseline": {
      "fraud_p95_latency_micros": $B_FRAUD_P95,
      "recommendation_p95_latency_micros": $B_RECO_P95,
      "supply_chain_p95_latency_micros": $B_SUPPLY_P95,
      "ingest_events_per_sec": $B_INGEST_EPS
    },
    "tuned": {
      "fraud_p95_latency_micros": $T_FRAUD_P95,
      "recommendation_p95_latency_micros": $T_RECO_P95,
      "supply_chain_p95_latency_micros": $T_SUPPLY_P95,
      "ingest_events_per_sec": $T_INGEST_EPS
    }
  }
}
EOF

cat >"$OUT_MD" <<EOF
# Performance Comparison Report

- Timestamp (UTC): $TIMESTAMP_UTC
- Commit: $COMMIT_SHA
- Baseline: $(basename "$BASELINE_JSON")
- Tuned: $(basename "$TUNED_JSON")

## Delta (%)
- Fraud p95 latency: $D_FRAUD_P95
- Recommendation p95 latency: $D_RECO_P95
- Supply-chain p95 latency: $D_SUPPLY_P95
- Ingest events/sec: $D_INGEST_EPS
EOF

echo "Wrote:"
echo "  $OUT_JSON"
echo "  $OUT_MD"
