matcher_rs 0.15.0

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# ///

from __future__ import annotations

import argparse
import pathlib

from bench_utils import (
    METRIC_CHOICES,
    compare_result_maps,
    load_aggregate_input,
    print_change_section,
    print_path_section,
)


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description=(
            "Compare two benchmark inputs. Accepts a run directory, "
            "an aggregate.json file, or a single raw benchmark output."
        )
    )
    parser.add_argument("baseline", type=pathlib.Path)
    parser.add_argument("candidate", type=pathlib.Path)
    parser.add_argument(
        "--metric",
        choices=METRIC_CHOICES,
        default="median",
        help="Metric to aggregate when raw benchmark files are provided.",
    )
    parser.add_argument(
        "--min-change-pct",
        type=float,
        default=3.0,
        help="Ignore rows whose absolute percentage change is smaller than this threshold.",
    )
    parser.add_argument(
        "--noisy-threshold-pct",
        type=float,
        default=5.0,
        help="Mark rows noisy when spread exceeds this threshold. Default: 5.",
    )
    parser.add_argument(
        "--show-noisy",
        action="store_true",
        help="Include noisy rows in comparison output.",
    )
    parser.add_argument(
        "--show-missing",
        action="store_true",
        help="Also print benchmarks that exist in only one input.",
    )
    return parser


def format_metadata(label: str, metadata: dict[str, str]) -> str:
    if not metadata:
        return f"{label} metadata: none"
    return (
        f"{label} metadata: "
        + ", ".join(f"{key}={value}" for key, value in sorted(metadata.items()))
    )


def main() -> int:
    args = build_parser().parse_args()

    baseline = load_aggregate_input(
        args.baseline,
        metric=args.metric,
        noisy_threshold_pct=args.noisy_threshold_pct,
    )
    candidate = load_aggregate_input(
        args.candidate,
        metric=args.metric,
        noisy_threshold_pct=args.noisy_threshold_pct,
    )

    shared_paths = sorted(set(baseline.rows) & set(candidate.rows))
    noisy_rows = {
        path
        for path in shared_paths
        if baseline.rows[path].noisy or candidate.rows[path].noisy
    }

    baseline_values = {
        path: row.value_s
        for path, row in baseline.rows.items()
        if args.show_noisy or path not in noisy_rows
    }
    candidate_values = {
        path: row.value_s
        for path, row in candidate.rows.items()
        if args.show_noisy or path not in noisy_rows
    }

    regressions, improvements, baseline_only, candidate_only = compare_result_maps(
        baseline_values,
        candidate_values,
        min_change_pct=args.min_change_pct,
    )

    print(
        f"Baseline: {baseline.path} | Candidate: {candidate.path} | Metric: {baseline.metric}"
    )
    print(format_metadata("Baseline", baseline.metadata))
    print(format_metadata("Candidate", candidate.metadata))
    print()

    print_change_section("Regression", regressions)
    print()
    print_change_section("Improvement", improvements)

    suppressed_noisy = sorted(noisy_rows) if not args.show_noisy else []
    if suppressed_noisy:
        print()
        print_path_section("Suppressed noisy rows", suppressed_noisy)

    if args.show_missing and (baseline_only or candidate_only):
        print()
        print_path_section("Only in baseline", baseline_only)
        print()
        print_path_section("Only in candidate", candidate_only)

    return 0


if __name__ == "__main__":
    raise SystemExit(main())