from __future__ import annotations
import json
import re
from typing import TYPE_CHECKING
import pytest
import bench_compare
if TYPE_CHECKING:
from pathlib import Path
def _write_estimates(path: Path, stat: str, median: float) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
stat: {
"point_estimate": median,
"confidence_interval": {"lower_bound": median * 0.9, "upper_bound": median * 1.1},
}
}
),
encoding="utf-8",
)
def _build_criterion_tree(criterion_dir: Path, stat: str = "median") -> None:
for d, det, det_exact in [(2, 1.0, 4000.0), (3, 5.0, 21000.0)]:
group = criterion_dir / f"exact_d{d}"
_write_estimates(group / "det" / "new" / "estimates.json", stat, det)
_write_estimates(group / "det_exact" / "new" / "estimates.json", stat, det_exact)
_write_estimates(group / "det_exact_f64_result" / "new" / "estimates.json", stat, det_exact * 1.1)
_write_estimates(group / "det_exact_rounded_f64" / "new" / "estimates.json", stat, det_exact * 1.2)
_write_estimates(group / "solve_exact_f64_result" / "new" / "estimates.json", stat, det_exact * 1.3)
_write_estimates(group / "solve_exact_rounded_f64" / "new" / "estimates.json", stat, det_exact * 1.4)
ns_group = criterion_dir / "exact_near_singular_3x3"
_write_estimates(ns_group / "det_sign_exact" / "new" / "estimates.json", stat, 12000.0)
_write_estimates(ns_group / "solve_exact_f64_result" / "new" / "estimates.json", stat, 51000.0)
_write_estimates(ns_group / "solve_exact_rounded_f64" / "new" / "estimates.json", stat, 52000.0)
random_group = criterion_dir / "exact_random_percentile_d3"
_write_estimates(random_group / "det_exact_p95" / "new" / "estimates.json", stat, 33000.0)
_write_estimates(random_group / "solve_exact_f64_result_p95" / "new" / "estimates.json", stat, 54000.0)
_write_estimates(random_group / "solve_exact_rounded_f64_p95" / "new" / "estimates.json", stat, 55000.0)
def _build_vs_linalg_tree(criterion_dir: Path, stat: str = "median") -> None:
group = criterion_dir / "d2"
_write_estimates(group / "la_stack_lu_solve" / "new" / "estimates.json", stat, 10.0)
_write_estimates(group / "la_stack_lu_solve" / "last" / "estimates.json", stat, 20.0)
_write_estimates(group / "nalgebra_lu_solve" / "last" / "estimates.json", stat, 30.0)
_write_estimates(group / "faer_lu_solve" / "last" / "estimates.json", stat, 40.0)
_write_estimates(group / "la_stack_ldlt_solve" / "new" / "estimates.json", stat, 12.0)
_write_estimates(group / "la_stack_ldlt_solve" / "last" / "estimates.json", stat, 24.0)
_write_estimates(group / "nalgebra_cholesky_solve" / "last" / "estimates.json", stat, 36.0)
_write_estimates(group / "faer_ldlt_solve" / "last" / "estimates.json", stat, 48.0)
_write_estimates(group / "nalgebra_lu_solve" / "new" / "estimates.json", stat, 31.0)
class TestFormatTime:
def test_nanoseconds(self) -> None:
assert bench_compare._format_time(42.5) == "42.5 ns"
def test_microseconds(self) -> None:
assert bench_compare._format_time(1_500.0) == "1.50 µs"
def test_milliseconds(self) -> None:
assert bench_compare._format_time(2_500_000.0) == "2.50 ms"
class TestFormatPct:
def test_improvement_is_bold(self) -> None:
assert bench_compare._format_pct(-10.0) == "**-10.0%**"
def test_regression_not_bold(self) -> None:
assert bench_compare._format_pct(10.0) == "+10.0%"
def test_small_change(self) -> None:
assert bench_compare._format_pct(0.5) == "+0.5%"
class TestGroupHeading:
def test_dimension_group(self) -> None:
assert bench_compare._group_heading("exact_d3") == "D=3"
def test_near_singular(self) -> None:
assert bench_compare._group_heading("exact_near_singular_3x3") == "Near-singular 3x3"
def test_random_percentile(self) -> None:
assert bench_compare._group_heading("exact_random_percentile_d4") == "Random percentile D=4"
def test_large_entries(self) -> None:
assert bench_compare._group_heading("exact_large_entries_3x3") == "Large entries 3x3"
def test_hilbert_4x4(self) -> None:
assert bench_compare._group_heading("exact_hilbert_4x4") == "Hilbert 4x4"
def test_hilbert_5x5(self) -> None:
assert bench_compare._group_heading("exact_hilbert_5x5") == "Hilbert 5x5"
def test_unknown_passthrough(self) -> None:
assert bench_compare._group_heading("something_else") == "something_else"
def test_read_estimate_success(tmp_path: Path) -> None:
est = tmp_path / "estimates.json"
_write_estimates(est, "median", 42.0)
point, lo, hi = bench_compare._read_estimate(est, "median")
assert point == 42.0
assert lo == pytest.approx(42.0 * 0.9)
assert hi == pytest.approx(42.0 * 1.1)
def test_read_estimate_no_ci(tmp_path: Path) -> None:
est = tmp_path / "estimates.json"
est.parent.mkdir(parents=True, exist_ok=True)
est.write_text(
json.dumps({"median": {"point_estimate": 99.0}}),
encoding="utf-8",
)
point, lo, hi = bench_compare._read_estimate(est, "median")
assert point == 99.0
assert lo == 99.0
assert hi == 99.0
def test_read_estimate_missing_stat(tmp_path: Path) -> None:
est = tmp_path / "estimates.json"
_write_estimates(est, "median", 1.0)
with pytest.raises(KeyError, match="stat 'mean' not found"):
bench_compare._read_estimate(est, "mean")
def test_read_estimate_malformed_json_names_file(tmp_path: Path) -> None:
est = tmp_path / "estimates.json"
est.write_text("{not json", encoding="utf-8")
with pytest.raises(
ValueError,
match=re.escape(f"malformed Criterion estimates JSON in {est}"),
):
bench_compare._read_estimate(est, "median")
def test_read_estimate_missing_point_estimate_names_field(tmp_path: Path) -> None:
est = tmp_path / "estimates.json"
est.write_text(json.dumps({"median": {}}), encoding="utf-8")
with pytest.raises(KeyError, match="field 'point_estimate' for stat 'median' not found"):
bench_compare._read_estimate(est, "median")
def test_read_estimate_non_numeric_ci_bound_names_field(tmp_path: Path) -> None:
est = tmp_path / "estimates.json"
est.write_text(
json.dumps(
{
"median": {
"point_estimate": 1.0,
"confidence_interval": {"lower_bound": "fast", "upper_bound": 2.0},
}
}
),
encoding="utf-8",
)
with pytest.raises(ValueError, match=r"field 'lower_bound' for stat 'median'.*not numeric"):
bench_compare._read_estimate(est, "median")
def test_collect_results(tmp_path: Path) -> None:
_build_criterion_tree(tmp_path)
results = bench_compare._collect_results(tmp_path, "new", "median")
assert len(results) == 18 groups = {r.group for r in results}
assert "exact_d2" in groups
assert "exact_d3" in groups
assert "exact_random_percentile_d3" in groups
assert "exact_near_singular_3x3" in groups
def test_collect_results_empty_dir(tmp_path: Path) -> None:
results = bench_compare._collect_results(tmp_path, "new", "median")
assert results == []
def test_collect_comparisons(tmp_path: Path) -> None:
_build_criterion_tree(tmp_path)
for d, det, det_exact in [(2, 2.0, 8000.0), (3, 10.0, 42000.0)]:
group = tmp_path / f"exact_d{d}"
_write_estimates(group / "det" / "v0.3.0" / "estimates.json", "median", det)
_write_estimates(group / "det_exact" / "v0.3.0" / "estimates.json", "median", det_exact)
_write_estimates(group / "det_exact_f64" / "v0.3.0" / "estimates.json", "median", det_exact * 1.1)
_write_estimates(group / "solve_exact_f64" / "v0.3.0" / "estimates.json", "median", det_exact * 1.3)
random_group = tmp_path / "exact_random_percentile_d3"
_write_estimates(random_group / "det_exact_p95" / "v0.3.0" / "estimates.json", "median", 66000.0)
_write_estimates(random_group / "solve_exact_f64_p95" / "v0.3.0" / "estimates.json", "median", 108000.0)
comparisons = bench_compare._collect_comparisons(tmp_path, "v0.3.0", "median")
assert len(comparisons) == 12 assert {c.group for c in comparisons} == {"exact_d2", "exact_d3"}
for c in comparisons:
assert c.speedup == pytest.approx(c.baseline_ns / c.current_ns)
assert {(c.bench, c.baseline_bench) for c in comparisons if c.baseline_bench is not None} == {
("det_exact_f64_result", "det_exact_f64"),
("det_exact_rounded_f64", "det_exact_f64"),
("solve_exact_f64_result", "solve_exact_f64"),
("solve_exact_rounded_f64", "solve_exact_f64"),
}
all_comparisons = bench_compare._collect_comparisons(
tmp_path,
"v0.3.0",
"median",
scope="all-benches",
)
assert len(all_comparisons) == 15
random_comparisons = [c for c in all_comparisons if c.group == "exact_random_percentile_d3"]
assert {(c.bench, c.baseline_bench) for c in random_comparisons} == {
("det_exact_p95", None),
("solve_exact_f64_result_p95", "solve_exact_f64_p95"),
("solve_exact_rounded_f64_p95", "solve_exact_f64_p95"),
}
def test_collect_comparisons_zero_current(tmp_path: Path) -> None:
group = tmp_path / "exact_d2"
_write_estimates(group / "det" / "new" / "estimates.json", "median", 0.0)
_write_estimates(group / "det" / "v0.3.0" / "estimates.json", "median", 5.0)
comparisons = bench_compare._collect_comparisons(tmp_path, "v0.3.0", "median")
assert len(comparisons) == 1
c = comparisons[0]
assert c.speedup == float("inf")
assert c.pct_change == pytest.approx(-100.0)
def test_collect_comparisons_missing_baseline(tmp_path: Path) -> None:
_build_criterion_tree(tmp_path)
comparisons = bench_compare._collect_comparisons(tmp_path, "v0.3.0", "median")
assert comparisons == []
def test_collect_vs_linalg_release_signal_uses_baseline_peer_context(tmp_path: Path) -> None:
_build_vs_linalg_tree(tmp_path)
comparisons = bench_compare._collect_comparisons(tmp_path, "last", "median")
assert [c.bench for c in comparisons] == ["la_stack_lu_solve", "la_stack_ldlt_solve"]
lu = comparisons[0]
assert lu.baseline_ns == 20.0
assert lu.current_ns == 10.0
assert lu.baseline_nalgebra_ns == 30.0
assert lu.baseline_faer_ns == 40.0
ldlt = comparisons[1]
assert ldlt.baseline_nalgebra_ns == 36.0
assert ldlt.baseline_faer_ns == 48.0
def test_collect_vs_linalg_all_benches_includes_latest_peer_rows(tmp_path: Path) -> None:
_build_vs_linalg_tree(tmp_path)
comparisons = bench_compare._collect_comparisons(tmp_path, "last", "median", scope="all-benches")
assert [c.bench for c in comparisons] == [
"la_stack_lu_solve",
"nalgebra_lu_solve",
"la_stack_ldlt_solve",
]
def test_snapshot_tables_per_dimension(tmp_path: Path) -> None:
_build_criterion_tree(tmp_path)
results = bench_compare._collect_results(tmp_path, "new", "median")
tables = bench_compare._snapshot_tables(results, "median")
assert "### D=2" in tables
assert "### D=3" in tables
assert "### Random percentile D=3" in tables
assert "### Near-singular 3x3" in tables
assert "| Benchmark | Median | 95% CI |" in tables
def test_snapshot_tables_uses_stat_label(tmp_path: Path) -> None:
_build_criterion_tree(tmp_path, stat="mean")
results = bench_compare._collect_results(tmp_path, "new", "mean")
tables = bench_compare._snapshot_tables(results, "mean")
assert "| Benchmark | Mean | 95% CI |" in tables
assert "Median" not in tables
def test_comparison_tables_per_dimension(tmp_path: Path) -> None:
_build_criterion_tree(tmp_path)
for d, det, det_exact in [(2, 2.0, 8000.0), (3, 10.0, 42000.0)]:
group = tmp_path / f"exact_d{d}"
_write_estimates(group / "det" / "v0.3.0" / "estimates.json", "median", det)
_write_estimates(group / "det_exact" / "v0.3.0" / "estimates.json", "median", det_exact)
_write_estimates(group / "det_exact_f64" / "v0.3.0" / "estimates.json", "median", det_exact * 1.1)
_write_estimates(group / "solve_exact_f64" / "v0.3.0" / "estimates.json", "median", det_exact * 1.3)
comparisons = bench_compare._collect_comparisons(tmp_path, "v0.3.0", "median")
tables = bench_compare._comparison_tables(comparisons, "v0.3.0")
assert "### D=2" in tables
assert "### D=3" in tables
assert "| Benchmark | v0.3.0 | Latest | Change | Speedup |" in tables
assert "det_exact_rounded_f64 (vs det_exact_f64)" in tables
assert "solve_exact_f64_result (vs solve_exact_f64)" in tables
def test_comparison_tables_include_vs_linalg_peer_context(tmp_path: Path) -> None:
_build_vs_linalg_tree(tmp_path)
comparisons = bench_compare._collect_comparisons(tmp_path, "last", "median")
tables = bench_compare._comparison_tables(comparisons, "last")
assert "| Benchmark | last | Latest | Change | Speedup | last nalgebra | last faer |" in tables
assert "| la_stack_lu_solve | 20.0 ns | 10.0 ns | **-50.0%** | 2.00x | 30.0 ns | 40.0 ns |" in tables
assert "| la_stack_ldlt_solve | 24.0 ns | 12.0 ns | **-50.0%** | 2.00x | 36.0 ns | 48.0 ns |" in tables
def test_main_snapshot_writes_output(tmp_path: Path) -> None:
criterion_dir = tmp_path / "criterion"
_build_criterion_tree(criterion_dir)
output = tmp_path / "PERFORMANCE.md"
rc = bench_compare.main(["--snapshot", "--criterion-dir", str(criterion_dir), "--output", str(output)])
assert rc == 0
assert output.exists()
text = output.read_text(encoding="utf-8")
assert "### D=2" in text
assert "### Random percentile D=3" in text
assert "### Near-singular 3x3" in text
assert "just performance-local" in text
assert "just performance-release" in text
assert "just performance-github-assets" in text
assert "just performance-release <current-tag> <previous-tag>" in text
assert "Older curated release-to-release reports are archived in `docs/archive/performance/`." in text
assert "git checkout" not in text
def test_main_no_criterion_dir(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
rc = bench_compare.main(["--criterion-dir", str(tmp_path / "nonexistent"), "--output", str(tmp_path / "out.md")])
assert rc == 2
assert "No Criterion results" in capsys.readouterr().err
def test_main_comparison_no_baseline(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
criterion_dir = tmp_path / "criterion"
_build_criterion_tree(criterion_dir)
rc = bench_compare.main(["v0.3.0", "--criterion-dir", str(criterion_dir), "--output", str(tmp_path / "out.md")])
assert rc == 2
assert "No comparison data" in capsys.readouterr().err