overgraph 0.11.0

from __future__ import annotations

import json
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path


REPO_ROOT = Path(__file__).resolve().parents[3]
COMPARE = REPO_ROOT / "tools/bench/compare_baseline.py"


def make_run_payload(
    *,
    language: str,
    profile_name: str,
    harness_stage: str,
    scenarios: list[dict[str, object]],
) -> dict[str, object]:
    return {
        "schema_version": 1,
        "language": language,
        "profile_name": profile_name,
        "harness_stage": harness_stage,
        "status": "ok",
        "parsed_stdout_json": {
            "schema_version": 1,
            "language": language,
            "profile_name": profile_name,
            "harness_stage": harness_stage,
            "scenarios": scenarios,
        },
    }


class CompareBaselineTests(unittest.TestCase):
    def test_compare_passes_for_small_delta(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"
            report = tmpdir / "report.json"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="python",
                        profile_name="small",
                        harness_stage="connector-benchmark-v1",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 100.0}},
                            {"scenario_id": "S-CRUD-002", "name": "upsert_edge", "category": "crud", "stats": {"p95_us": 200.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="python",
                        profile_name="small",
                        harness_stage="connector-benchmark-v1",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 104.0}},
                            {"scenario_id": "S-CRUD-002", "name": "upsert_edge", "category": "crud", "stats": {"p95_us": 206.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                    "--report-json",
                    str(report),
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertEqual(proc.returncode, 0, msg=proc.stderr)
            payload = json.loads(report.read_text(encoding="utf-8"))
            self.assertEqual(payload["summary"]["failure_count"], 0)
            self.assertEqual(payload["summary"]["warning_count"], 0)

    def test_compare_fails_when_regression_exceeds_fail_threshold(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"
            report_md = tmpdir / "report.md"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="node",
                        profile_name="small",
                        harness_stage="connector-benchmark-v2",
                        scenarios=[
                            {"scenario_id": "S-MAIN-001", "name": "flush", "category": "maintenance", "stats": {"p95_us": 1000.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="node",
                        profile_name="small",
                        harness_stage="connector-benchmark-v2",
                        scenarios=[
                            {"scenario_id": "S-MAIN-001", "name": "flush", "category": "maintenance", "stats": {"p95_us": 1400.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                    "--warn-threshold-pct",
                    "10",
                    "--fail-threshold-pct",
                    "20",
                    "--report-md",
                    str(report_md),
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertEqual(proc.returncode, 2)
            report_text = report_md.read_text(encoding="utf-8")
            self.assertIn("fail", report_text)
            self.assertIn("+40.00%", report_text)

    def test_allowlist_suppresses_failure(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"
            allowlist = tmpdir / "allowlist.json"
            report = tmpdir / "report.json"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="rust",
                        profile_name="small",
                        harness_stage="criterion-quick-core-advanced-maintenance-recovery",
                        scenarios=[
                            {"scenario_id": "S-MAIN-001", "name": "flush", "category": "maintenance", "stats": {"p95_us": 100.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="rust",
                        profile_name="small",
                        harness_stage="criterion-quick-core-advanced-maintenance-recovery",
                        scenarios=[
                            {"scenario_id": "S-MAIN-001", "name": "flush", "category": "maintenance", "stats": {"p95_us": 180.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            allowlist.write_text(
                json.dumps(
                    {
                        "schema_version": 1,
                        "entries": [
                            {
                                "scenario_id": "S-MAIN-001",
                                "language": "rust",
                                "profile_name": "small",
                                "harness_stage": "criterion-quick-core-advanced-maintenance-recovery",
                                "max_regression_pct": 100,
                                "reason": "expected maintenance variance in shared runner",
                            }
                        ],
                    }
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                    "--allowlist",
                    str(allowlist),
                    "--report-json",
                    str(report),
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertEqual(proc.returncode, 0, msg=proc.stderr)
            payload = json.loads(report.read_text(encoding="utf-8"))
            self.assertEqual(payload["summary"]["failure_count"], 0)
            self.assertEqual(payload["summary"]["allowlisted_count"], 1)

    def test_profile_mismatch_fails_fast(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="rust",
                        profile_name="small",
                        harness_stage="core-benchmark-v1-parity",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 100.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="rust",
                        profile_name="large",
                        harness_stage="core-benchmark-v1-parity",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 101.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertNotEqual(proc.returncode, 0)
            self.assertIn("Profile mismatch", proc.stderr + proc.stdout)

    def test_harness_stage_mismatch_fails_fast(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="node",
                        profile_name="small",
                        harness_stage="connector-benchmark-v2",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 100.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="node",
                        profile_name="small",
                        harness_stage="connector-benchmark-v3-parity",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 101.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertNotEqual(proc.returncode, 0)
            self.assertIn("Harness stage mismatch", proc.stderr + proc.stdout)

    def test_no_overlapping_scenarios_fails_fast(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="python",
                        profile_name="small",
                        harness_stage="connector-benchmark-v2-parity",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 100.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="python",
                        profile_name="small",
                        harness_stage="connector-benchmark-v2-parity",
                        scenarios=[
                            {"scenario_id": "S-ADV-005", "name": "export_adjacency", "category": "advanced", "stats": {"p95_us": 101.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertNotEqual(proc.returncode, 0)
            self.assertIn("No overlapping scenario IDs", proc.stderr + proc.stdout)

    def test_fail_on_warning_returns_exit_code_one(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmpdir = Path(tmp)
            baseline = tmpdir / "baseline.json"
            candidate = tmpdir / "candidate.json"

            baseline.write_text(
                json.dumps(
                    make_run_payload(
                        language="rust",
                        profile_name="small",
                        harness_stage="core-benchmark-v1-parity",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 100.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )
            candidate.write_text(
                json.dumps(
                    make_run_payload(
                        language="rust",
                        profile_name="small",
                        harness_stage="core-benchmark-v1-parity",
                        scenarios=[
                            {"scenario_id": "S-CRUD-001", "name": "upsert_node", "category": "crud", "stats": {"p95_us": 112.0}},
                        ],
                    )
                ),
                encoding="utf-8",
            )

            proc = subprocess.run(
                [
                    sys.executable,
                    str(COMPARE),
                    "--baseline",
                    str(baseline),
                    "--candidate",
                    str(candidate),
                    "--warn-threshold-pct",
                    "10",
                    "--fail-threshold-pct",
                    "20",
                    "--fail-on-warning",
                ],
                cwd=REPO_ROOT,
                capture_output=True,
                text=True,
                check=False,
            )
            self.assertEqual(proc.returncode, 1)
            self.assertIn("warnings=1", proc.stdout)


if __name__ == "__main__":
    unittest.main()