import json
import math
import os
import re
import subprocess
import sys
import tempfile
import time
from io import StringIO
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from benchmark_models import (
BenchmarkData,
CircumspherePerformanceData,
CircumsphereTestCase,
)
from benchmark_utils import (
DEFAULT_REGRESSION_THRESHOLD,
DEV_MODE_BENCH_ARGS,
BaselineGenerator,
BenchmarkRegressionHelper,
CriterionParser,
PerformanceComparator,
PerformanceSummaryGenerator,
ProjectRootNotFoundError,
WorkflowHelper,
create_argument_parser,
find_project_root,
main,
)
THRESHOLD_PERCENT = f"{DEFAULT_REGRESSION_THRESHOLD:.1f}%"
def compute_average_time_change(current_results, baseline_results):
time_changes = []
for current in current_results:
key = f"{current.points}_{current.dimension}"
baseline = baseline_results.get(key)
if not baseline or baseline.time_mean <= 0:
continue
time_change = ((current.time_mean - baseline.time_mean) / baseline.time_mean) * 100.0
time_changes.append(time_change)
if not time_changes:
return 0.0
ratios = [1.0 + (tc / 100.0) for tc in time_changes if (1.0 + (tc / 100.0)) > 0.0]
if not ratios:
return 0.0
avg_log = sum(math.log(ratio) for ratio in ratios) / len(ratios)
avg_ratio = math.exp(avg_log)
return (avg_ratio - 1.0) * 100.0
@pytest.fixture
def sample_estimates_data():
return {
"mean": {
"point_estimate": 110000.0, "confidence_interval": {"lower_bound": 100000.0, "upper_bound": 120000.0},
},
}
@pytest.fixture
def sample_benchmark_data():
return {
"2d_1000": BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs"),
"2d_2000": BenchmarkData(2000, "2D").with_timing(190.0, 200.0, 210.0, "µs"),
"3d_1000": BenchmarkData(1000, "3D").with_timing(200.0, 220.0, 240.0, "µs"),
}
class TestCriterionParser:
def test_parse_estimates_json_valid_data(self, sample_estimates_data):
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(sample_estimates_data, f)
f.flush()
estimates_path = Path(f.name)
try:
result = CriterionParser.parse_estimates_json(estimates_path, 1000, "2D")
assert result is not None
assert result.points == 1000
assert result.dimension == "2D"
assert result.time_mean == 110.0 assert result.time_low == 100.0
assert result.time_high == 120.0
assert result.time_unit == "µs"
assert result.throughput_mean is not None
assert result.throughput_mean == pytest.approx(9090.909, abs=0.001) finally:
estimates_path.unlink()
def test_parse_estimates_json_zero_mean(self):
estimates_data = {"mean": {"point_estimate": 0.0, "confidence_interval": {"lower_bound": 0.0, "upper_bound": 0.0}}}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(estimates_data, f)
f.flush()
estimates_path = Path(f.name)
try:
result = CriterionParser.parse_estimates_json(estimates_path, 1000, "2D")
assert result is None
finally:
estimates_path.unlink()
def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(self):
estimates_data = {
"mean": {
"point_estimate": 1000.0, "confidence_interval": {
"lower_bound": 0.0, "upper_bound": 2000.0,
},
},
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(estimates_data, f)
f.flush()
estimates_path = Path(f.name)
try:
result = CriterionParser.parse_estimates_json(estimates_path, 1000, "2D")
assert result is not None
assert result.points == 1000
assert result.dimension == "2D"
assert result.time_mean == 1.0 assert result.time_low == 0.0 assert result.time_high == 2.0
assert result.throughput_high is not None
assert result.throughput_high > 1e12 assert result.throughput_mean is not None
assert result.throughput_low is not None
finally:
estimates_path.unlink()
def test_parse_estimates_json_invalid_file(self):
result = CriterionParser.parse_estimates_json(Path("nonexistent.json"), 1000, "2D")
assert result is None
def test_parse_estimates_json_malformed_json(self):
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
f.write("{ invalid json")
f.flush()
estimates_path = Path(f.name)
try:
result = CriterionParser.parse_estimates_json(estimates_path, 1000, "2D")
assert result is None
finally:
estimates_path.unlink()
@patch("benchmark_utils.Path.exists")
@patch("benchmark_utils.Path.iterdir")
def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists):
mock_exists.return_value = False
target_dir = Path("/fake/target")
results = CriterionParser.find_criterion_results(target_dir)
assert results == []
def test_find_criterion_results_sorting(self):
test_results = [
BenchmarkData(5000, "3D").with_timing(200.0, 220.0, 240.0, "µs"),
BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs"),
BenchmarkData(1000, "4D").with_timing(300.0, 320.0, 340.0, "µs"),
BenchmarkData(2000, "2D").with_timing(150.0, 160.0, 170.0, "µs"),
]
test_results.sort(key=lambda x: (int(x.dimension.rstrip("D")), x.points))
assert test_results[0].dimension == "2D"
assert test_results[0].points == 1000
assert test_results[1].dimension == "2D"
assert test_results[1].points == 2000
assert test_results[2].dimension == "3D"
assert test_results[2].points == 5000
assert test_results[3].dimension == "4D"
assert test_results[3].points == 1000
def test_ci_performance_suite_patterns(self):
ci_suite_results = [
BenchmarkData(10, "2D").with_timing(18.0, 20.0, 22.0, "µs"),
BenchmarkData(25, "2D").with_timing(38.0, 40.0, 42.0, "µs"),
BenchmarkData(50, "2D").with_timing(78.0, 80.0, 82.0, "µs"),
BenchmarkData(10, "3D").with_timing(48.0, 50.0, 52.0, "µs"),
BenchmarkData(25, "3D").with_timing(118.0, 125.0, 132.0, "µs"),
BenchmarkData(50, "3D").with_timing(245.0, 250.0, 255.0, "µs"),
BenchmarkData(10, "4D").with_timing(58.0, 60.0, 62.0, "µs"),
BenchmarkData(25, "4D").with_timing(118.0, 120.0, 122.0, "µs"),
BenchmarkData(50, "4D").with_timing(290.0, 300.0, 310.0, "µs"),
BenchmarkData(10, "5D").with_timing(78.0, 80.0, 82.0, "µs"),
BenchmarkData(25, "5D").with_timing(145.0, 150.0, 155.0, "µs"),
BenchmarkData(50, "5D").with_timing(290.0, 300.0, 310.0, "µs"),
]
ci_suite_results.sort(key=lambda x: (int(x.dimension.rstrip("D")), x.points))
expected_order = [(d, p) for d in ("2D", "3D", "4D", "5D") for p in (10, 25, 50)]
actual_order = [(b.dimension, b.points) for b in ci_suite_results]
assert actual_order == expected_order
class TestPerformanceComparator:
@pytest.fixture
def comparator(self):
project_root = Path("/fake/project")
return PerformanceComparator(project_root)
@pytest.fixture
def sample_baseline_content(self):
return """Date: 2023-06-15 10:30:00 PDT
Git commit: abc123def456
Hardware Information:
OS: macOS
CPU: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
CPU Cores: 6
CPU Threads: 12
Memory: 16.0 GB
Rust: rustc 1.70.0 (90c541806 2023-05-31)
Target: x86_64-apple-darwin
=== 1000 Points (2D) ===
Time: [100.0, 110.0, 120.0] µs
Throughput: [8.333, 9.091, 10.0] Kelem/s
=== 2000 Points (2D) ===
Time: [190.0, 200.0, 210.0] µs
Throughput: [9.524, 10.0, 10.526] Kelem/s
=== 1000 Points (3D) ===
Time: [200.0, 220.0, 240.0] µs
Throughput: [4.167, 4.545, 5.0] Kelem/s
"""
def test_parse_baseline_file(self, comparator, sample_baseline_content):
results = comparator._parse_baseline_file(sample_baseline_content)
assert len(results) == 3
assert "1000_2D" in results
assert "2000_2D" in results
assert "1000_3D" in results
bench_2d_1000 = results["1000_2D"]
assert bench_2d_1000.points == 1000
assert bench_2d_1000.dimension == "2D"
assert bench_2d_1000.time_mean == 110.0
assert bench_2d_1000.throughput_mean == 9.091
def test_write_time_comparison_no_regression(self, comparator):
current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
baseline = BenchmarkData(1000, "2D").with_timing(95.0, 105.0, 115.0, "µs")
output = StringIO()
time_change, is_regression = comparator._write_time_comparison(output, current, baseline)
assert time_change == pytest.approx(4.76, abs=0.01)
assert not is_regression
result = output.getvalue()
assert "4.8%" in result
assert "✅ OK: Time change +4.8% within acceptable range" in result
def test_write_time_comparison_with_regression(self, comparator):
current = BenchmarkData(1000, "2D").with_timing(100.0, 115.0, 130.0, "µs")
baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
output = StringIO()
time_change, is_regression = comparator._write_time_comparison(output, current, baseline)
assert time_change == pytest.approx(15.0, abs=1e-9)
assert is_regression
result = output.getvalue()
assert "15.0%" in result
assert "⚠️ REGRESSION" in result
def test_write_time_comparison_with_improvement(self, comparator):
current = BenchmarkData(1000, "2D").with_timing(80.0, 90.0, 100.0, "µs")
baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
output = StringIO()
time_change, is_regression = comparator._write_time_comparison(output, current, baseline)
assert time_change == pytest.approx(-10.0, abs=1e-9)
assert not is_regression
result = output.getvalue()
assert "10.0%" in result
assert "✅ IMPROVEMENT: Time decreased by 10.0% (faster performance)" in result
def test_write_time_comparison_zero_baseline(self, comparator):
current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
baseline = BenchmarkData(1000, "2D").with_timing(0.0, 0.0, 0.0, "µs")
output = StringIO()
time_change, is_regression = comparator._write_time_comparison(output, current, baseline)
assert time_change is None
assert not is_regression
result = output.getvalue()
assert "N/A (baseline mean is 0)" in result
@pytest.mark.parametrize("dev_mode", [False, True])
@patch("benchmark_utils.run_cargo_command")
def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
baseline_file = temp_path / "baseline.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123
=== 10 Points (2D) ===
Time: [1.0, 1.0, 1.0] µs
"""
baseline_file.write_text(baseline_content)
mock_result = Mock()
mock_result.returncode = 0
mock_result.stdout = ""
mock_cargo.return_value = mock_result
comparator = PerformanceComparator(temp_path)
comparator.compare_with_baseline(baseline_file, dev_mode=dev_mode)
assert mock_cargo.call_count >= 1
args = mock_cargo.call_args[0][0]
assert "--quiet" not in args if dev_mode:
for arg in DEV_MODE_BENCH_ARGS:
assert arg in args
assert mock_cargo.call_args.kwargs.get("capture_output") is True
def test_write_performance_comparison_no_average_regression(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(108.0, 120.0, 132.0, "µs"),
BenchmarkData(2000, "2D").with_timing(186.0, 196.0, 206.0, "µs"),
BenchmarkData(1000, "3D").with_timing(170.0, 187.0, 204.0, "µs"),
]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
"2000_2D": BenchmarkData(2000, "2D").with_timing(190.0, 200.0, 210.0, "µs"),
"1000_3D": BenchmarkData(1000, "3D").with_timing(200.0, 220.0, 240.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert not regression_found
result = output.getvalue()
assert "SUMMARY" in result
assert "Total benchmarks compared: 3" in result
assert f"Individual regressions (>{THRESHOLD_PERCENT}): 1" in result assert re.search(r"Average time change:\s*-?0\.0%", result)
assert "✅ OVERALL OK" in result
def test_write_performance_comparison_with_average_regression(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(118.0, 120.0, 122.0, "µs"),
BenchmarkData(2000, "2D").with_timing(222.0, 230.0, 238.0, "µs"),
BenchmarkData(1000, "3D").with_timing(209.0, 217.8, 226.6, "µs"),
]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
"2000_2D": BenchmarkData(2000, "2D").with_timing(190.0, 200.0, 210.0, "µs"),
"1000_3D": BenchmarkData(1000, "3D").with_timing(200.0, 220.0, 240.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert regression_found
result = output.getvalue()
assert "SUMMARY" in result
assert "Total benchmarks compared: 3" in result
assert f"Individual regressions (>{THRESHOLD_PERCENT}): 2" in result assert "Average time change: 11.0%" in result
assert "🚨 OVERALL REGRESSION" in result
def test_write_performance_comparison_with_average_improvement(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(81.0, 90.0, 99.0, "µs"),
BenchmarkData(2000, "2D").with_timing(175.2, 184.0, 192.8, "µs"),
BenchmarkData(1000, "3D").with_timing(209.0, 224.4, 239.8, "µs"),
]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
"2000_2D": BenchmarkData(2000, "2D").with_timing(190.0, 200.0, 210.0, "µs"),
"1000_3D": BenchmarkData(1000, "3D").with_timing(200.0, 220.0, 240.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert not regression_found
result = output.getvalue()
assert "SUMMARY" in result
assert "Total benchmarks compared: 3" in result
assert f"Individual regressions (>{THRESHOLD_PERCENT}): 0" in result
expected_average_change = compute_average_time_change(current_results, baseline_results)
expected_average_line = f"Average time change: {expected_average_change:.1f}%"
assert expected_average_line in result
assert "✅ OVERALL OK" in result
def test_write_performance_comparison_missing_baseline(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
BenchmarkData(3000, "2D").with_timing(300.0, 310.0, 320.0, "µs"), ]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert regression_found
result = output.getvalue()
assert "Total benchmarks compared: 1" in result
assert "3000 Points (2D)" in result
def test_write_performance_comparison_no_benchmarks(self, comparator):
output = StringIO()
regression_found = comparator._write_performance_comparison(output, [], {})
assert not regression_found
@patch("benchmark_utils.get_git_commit_hash")
@patch("benchmark_utils.datetime")
def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator, sample_baseline_content):
mock_now = Mock()
mock_now.strftime.return_value = "Thu Jun 15 14:30:00 PDT 2023"
mock_datetime.now.return_value.astimezone.return_value = mock_now
mock_git.return_value = "def456abc789"
metadata = comparator._prepare_comparison_metadata(sample_baseline_content)
assert metadata["current_date"] == "Thu Jun 15 14:30:00 PDT 2023"
assert metadata["current_commit"] == "def456abc789"
assert metadata["baseline_date"] == "2023-06-15 10:30:00 PDT"
assert metadata["baseline_commit"] == "abc123def456"
@patch("benchmark_utils.get_git_commit_hash")
def test_prepare_comparison_metadata_git_failure(self, mock_git, comparator, sample_baseline_content):
mock_git.side_effect = Exception("Git not available")
metadata = comparator._prepare_comparison_metadata(sample_baseline_content)
assert metadata["current_commit"] == "unknown"
def test_regression_threshold_configuration(self, comparator):
assert comparator.regression_threshold == DEFAULT_REGRESSION_THRESHOLD
comparator.regression_threshold = 10.0
current = BenchmarkData(1000, "2D").with_timing(100.0, 107.0, 114.0, "µs")
baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
output = StringIO()
time_change, is_regression = comparator._write_time_comparison(output, current, baseline)
assert time_change == pytest.approx(7.0, abs=0.001) assert not is_regression
def test_write_error_file_baseline_not_found(self, comparator):
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "error_results.txt"
baseline_file = Path(temp_dir) / "nonexistent_baseline.txt"
comparator._write_error_file(output_file, "Baseline file not found", baseline_file)
assert output_file.exists()
content = output_file.read_text()
assert "Comparison Results" in content
assert "❌ Error: Baseline file not found" in content
assert str(baseline_file) in content
assert "This error prevented the benchmark comparison from completing successfully" in content
def test_write_error_file_benchmark_error(self, comparator):
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "error_results.txt"
error_message = "Failed to compile benchmarks: error[E0277]: trait bound not satisfied"
comparator._write_error_file(output_file, "Benchmark execution error", error_message)
assert output_file.exists()
content = output_file.read_text()
assert "❌ Error: Benchmark execution error" in content
assert error_message in content
assert "Please check the CI logs for more information" in content
def test_write_error_file_creates_parent_directory(self, comparator):
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "nested" / "path" / "error_results.txt"
comparator._write_error_file(output_file, "Test error", "Test details")
assert output_file.exists()
assert output_file.parent.exists()
content = output_file.read_text()
assert "❌ Error: Test error" in content
def test_write_error_file_handles_write_failure(self, comparator):
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "error_results.txt"
with patch.object(Path, "open", side_effect=OSError("Permission denied")):
comparator._write_error_file(output_file, "Test error", "Test details")
assert not output_file.exists()
class TestIntegrationScenarios:
@pytest.fixture
def comparator(self):
project_root = Path("/fake/project")
return PerformanceComparator(project_root)
def test_realistic_mixed_performance_scenario(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(98.0, 103.0, 108.0, "µs"),
BenchmarkData(5000, "2D").with_timing(432.0, 540.0, 648.0, "µs"),
BenchmarkData(10000, "2D").with_timing(931.2, 980.0, 1028.8, "µs"),
BenchmarkData(1000, "3D").with_timing(176.0, 220.0, 264.0, "µs"),
BenchmarkData(5000, "3D").with_timing(1040.0, 1300.0, 1560.0, "µs"),
]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
"5000_2D": BenchmarkData(5000, "2D").with_timing(450.0, 500.0, 550.0, "µs"),
"10000_2D": BenchmarkData(10000, "2D").with_timing(950.0, 1000.0, 1050.0, "µs"),
"1000_3D": BenchmarkData(1000, "3D").with_timing(225.0, 250.0, 275.0, "µs"),
"5000_3D": BenchmarkData(5000, "3D").with_timing(1200.0, 1250.0, 1300.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert not regression_found
result = output.getvalue()
assert "Total benchmarks compared: 5" in result
assert f"Individual regressions (>{THRESHOLD_PERCENT}): 1" in result expected_average_change = compute_average_time_change(current_results, baseline_results)
expected_average_line = f"Average time change: {expected_average_change:.1f}%"
assert expected_average_line in result
assert "✅ OVERALL OK" in result
def test_gradual_performance_degradation_scenario(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(104.0, 109.0, 114.0, "µs"), BenchmarkData(5000, "2D").with_timing(520.0, 545.0, 570.0, "µs"), BenchmarkData(10000, "2D").with_timing(1050.0, 1090.0, 1130.0, "µs"), BenchmarkData(1000, "3D").with_timing(240.0, 272.5, 305.0, "µs"), BenchmarkData(5000, "3D").with_timing(1335.0, 1362.5, 1390.0, "µs"), ]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
"5000_2D": BenchmarkData(5000, "2D").with_timing(450.0, 500.0, 550.0, "µs"),
"10000_2D": BenchmarkData(10000, "2D").with_timing(950.0, 1000.0, 1050.0, "µs"),
"1000_3D": BenchmarkData(1000, "3D").with_timing(225.0, 250.0, 275.0, "µs"),
"5000_3D": BenchmarkData(5000, "3D").with_timing(1200.0, 1250.0, 1300.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert regression_found
result = output.getvalue()
assert "Total benchmarks compared: 5" in result
assert f"Individual regressions (>{THRESHOLD_PERCENT}): 5" in result assert "Average time change: 9.0%" in result
assert "🚨 OVERALL REGRESSION" in result
def test_noisy_benchmarks_scenario(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(75.0, 102.0, 140.0, "µs"), BenchmarkData(5000, "2D").with_timing(350.0, 480.0, 650.0, "µs"), BenchmarkData(10000, "2D").with_timing(800.0, 1030.0, 1350.0, "µs"), BenchmarkData(1000, "3D").with_timing(280.0, 350.0, 420.0, "µs"), BenchmarkData(5000, "3D").with_timing(950.0, 1125.0, 1300.0, "µs"), ]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
"5000_2D": BenchmarkData(5000, "2D").with_timing(450.0, 500.0, 550.0, "µs"),
"10000_2D": BenchmarkData(10000, "2D").with_timing(950.0, 1000.0, 1050.0, "µs"),
"1000_3D": BenchmarkData(1000, "3D").with_timing(225.0, 250.0, 275.0, "µs"),
"5000_3D": BenchmarkData(5000, "3D").with_timing(1200.0, 1250.0, 1300.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert not regression_found
result = output.getvalue()
assert "Total benchmarks compared: 5" in result
assert f"Individual regressions (>{THRESHOLD_PERCENT}): 1" in result assert "Average time change: 4.9%" in result
assert "✅ OVERALL OK" in result
class TestEdgeCases:
@pytest.fixture
def comparator(self):
project_root = Path("/fake/project")
return PerformanceComparator(project_root)
def test_empty_current_results(self, comparator):
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, [], baseline_results)
assert not regression_found
assert "SUMMARY" not in output.getvalue()
def test_empty_baseline_results(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
]
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, {})
assert not regression_found
result = output.getvalue()
assert "1000 Points (2D)" in result
assert "SUMMARY" not in result
def test_all_zero_baseline_times(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
BenchmarkData(2000, "2D").with_timing(205.0, 220.0, 235.0, "µs"),
]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(0.0, 0.0, 0.0, "µs"),
"2000_2D": BenchmarkData(2000, "2D").with_timing(0.0, 0.0, 0.0, "µs"),
}
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert not regression_found
result = output.getvalue()
assert "N/A (baseline mean is 0)" in result
assert "SUMMARY" not in result
def test_mixed_valid_invalid_baselines(self, comparator):
current_results = [
BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
BenchmarkData(2000, "2D").with_timing(205.0, 220.0, 235.0, "µs"),
]
baseline_results = {
"1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"), "2000_2D": BenchmarkData(2000, "2D").with_timing(0.0, 0.0, 0.0, "µs"), }
output = StringIO()
regression_found = comparator._write_performance_comparison(output, current_results, baseline_results)
assert regression_found
result = output.getvalue()
assert "Total benchmarks compared: 1" in result assert "N/A (baseline mean is 0)" in result
assert "10.0%" in result
class TestWorkflowHelper:
@patch.dict(os.environ, {"GITHUB_REF": "refs/tags/v1.2.3"}, clear=False)
def test_determine_tag_name_from_github_ref(self):
tag_name = WorkflowHelper.determine_tag_name()
assert tag_name == "v1.2.3"
@patch.dict(os.environ, {"GITHUB_REF": "refs/heads/main"}, clear=False)
@patch("benchmark_utils.datetime")
def test_determine_tag_name_generated(self, mock_datetime):
mock_now = Mock()
mock_now.strftime.return_value = "20231215-143000"
mock_datetime.now.return_value = mock_now
tag_name = WorkflowHelper.determine_tag_name()
assert tag_name == "manual-20231215-143000"
@patch.dict(os.environ, {"GITHUB_REF": "refs/tags/v2.0.0"}, clear=False)
def test_determine_tag_name_with_github_output(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
output_file = f.name
try:
with patch.dict(os.environ, {"GITHUB_OUTPUT": output_file}):
tag_name = WorkflowHelper.determine_tag_name()
assert tag_name == "v2.0.0"
with open(output_file, encoding="utf-8") as f:
content = f.read()
assert "tag_name=v2.0.0\n" in content
finally:
Path(output_file).unlink(missing_ok=True)
def test_create_metadata_success(self):
with tempfile.TemporaryDirectory() as temp_dir:
output_dir = Path(temp_dir)
with patch.dict(
os.environ,
{
"GITHUB_SHA": "abc123def456",
"GITHUB_RUN_ID": "123456789",
"RUNNER_OS": "macOS",
"RUNNER_ARCH": "ARM64",
},
):
success = WorkflowHelper.create_metadata("v1.0.0", output_dir)
assert success
metadata_file = output_dir / "metadata.json"
assert metadata_file.exists()
with metadata_file.open("r", encoding="utf-8") as f:
metadata = json.load(f)
assert metadata["tag"] == "v1.0.0"
assert metadata["commit"] == "abc123def456"
assert metadata["workflow_run_id"] == "123456789"
assert metadata["runner_os"] == "macOS"
assert metadata["runner_arch"] == "ARM64"
assert "generated_at" in metadata
assert metadata["generated_at"].endswith("Z")
def test_create_metadata_with_safe_env_vars(self):
with tempfile.TemporaryDirectory() as temp_dir:
output_dir = Path(temp_dir)
with patch.dict(
os.environ,
{
"SAFE_COMMIT_SHA": "def456abc789",
"SAFE_RUN_ID": "987654321",
"RUNNER_OS": "Linux",
"RUNNER_ARCH": "X64",
},
clear=True,
):
success = WorkflowHelper.create_metadata("v2.0.0", output_dir)
assert success
metadata_file = output_dir / "metadata.json"
assert metadata_file.exists()
with metadata_file.open("r", encoding="utf-8") as f:
metadata = json.load(f)
assert metadata["commit"] == "def456abc789"
assert metadata["workflow_run_id"] == "987654321"
def test_create_metadata_missing_env_vars(self):
with tempfile.TemporaryDirectory() as temp_dir:
output_dir = Path(temp_dir)
with patch.dict(os.environ, {}, clear=True):
success = WorkflowHelper.create_metadata("v1.0.0", output_dir)
assert success
metadata_file = output_dir / "metadata.json"
with metadata_file.open("r", encoding="utf-8") as f:
metadata = json.load(f)
assert metadata["tag"] == "v1.0.0"
assert metadata["commit"] == "unknown"
assert metadata["workflow_run_id"] == "unknown"
assert metadata["runner_os"] == "unknown"
assert metadata["runner_arch"] == "unknown"
def test_create_metadata_directory_creation(self):
with tempfile.TemporaryDirectory() as temp_dir:
output_dir = Path(temp_dir) / "nested" / "path"
success = WorkflowHelper.create_metadata("v1.0.0", output_dir)
assert success
assert output_dir.exists()
assert (output_dir / "metadata.json").exists()
def test_display_baseline_summary_success(self, capsys):
baseline_content = """Date: 2023-12-15 14:30:00 UTC
Git commit: abc123def456
Hardware Information:
OS: macOS
CPU: Apple M4 Max
Memory: 64.0 GB
=== 1000 Points (2D) ===
Time: [95.0, 100.0, 105.0] µs
Throughput: [9.524, 10.0, 10.526] Kelem/s
=== 2000 Points (2D) ===
Time: [190.0, 200.0, 210.0] µs
=== 1000 Points (3D) ===
Time: [220.0, 250.0, 280.0] µs
"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
f.write(baseline_content)
f.flush()
baseline_file = Path(f.name)
try:
success = WorkflowHelper.display_baseline_summary(baseline_file)
assert success
captured = capsys.readouterr()
assert "📊 Baseline summary:" in captured.out
assert "Total benchmarks: 3" in captured.out
assert "Date: 2023-12-15 14:30:00 UTC" in captured.out
finally:
baseline_file.unlink()
def test_display_baseline_summary_nonexistent_file(self, capsys):
baseline_file = Path("/nonexistent/file.txt")
success = WorkflowHelper.display_baseline_summary(baseline_file)
assert not success
captured = capsys.readouterr()
assert "❌ Baseline file not found" in captured.err
def test_display_baseline_summary_long_file(self, capsys):
baseline_content = "\n".join([f"Line {i}" for i in range(20)])
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
f.write(baseline_content)
f.flush()
baseline_file = Path(f.name)
try:
success = WorkflowHelper.display_baseline_summary(baseline_file)
assert success
captured = capsys.readouterr()
assert "..." in captured.out
finally:
baseline_file.unlink()
def test_sanitize_artifact_name_basic(self):
artifact_name = WorkflowHelper.sanitize_artifact_name("v1.2.3")
assert artifact_name == "performance-baseline-v1_2_3"
def test_sanitize_artifact_name_with_special_chars(self):
artifact_name = WorkflowHelper.sanitize_artifact_name("manual-2023/12/15-14:30:00")
assert artifact_name == "performance-baseline-manual-2023_12_15-14_30_00"
def test_sanitize_artifact_name_with_github_output(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
output_file = f.name
try:
with patch.dict(os.environ, {"GITHUB_OUTPUT": output_file}):
artifact_name = WorkflowHelper.sanitize_artifact_name("v2.0.0-beta.1")
assert artifact_name == "performance-baseline-v2_0_0-beta_1"
with open(output_file, encoding="utf-8") as f:
content = f.read()
assert "artifact_name=performance-baseline-v2_0_0-beta_1\n" in content
finally:
Path(output_file).unlink(missing_ok=True)
@pytest.mark.parametrize(
("input_tag", "expected_output"),
[
("v1.0.0-alpha.1", "performance-baseline-v1_0_0-alpha_1"),
("tag with spaces", "performance-baseline-tag_with_spaces"),
("v1.0.0+build.123", "performance-baseline-v1_0_0_build_123"),
],
)
def test_sanitize_artifact_name_edge_cases(self, input_tag, expected_output):
result = WorkflowHelper.sanitize_artifact_name(input_tag)
assert result == expected_output
def test_sanitize_artifact_name_special_characters(self):
special_chars_input = "@#$%^&*()[]{}|\\<>?"
result = WorkflowHelper.sanitize_artifact_name(special_chars_input)
assert re.fullmatch(r"performance-baseline-[A-Za-z0-9._-]+", result)
assert "_" in result
class TestBenchmarkRegressionHelper:
def test_prepare_baseline_success(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Hardware Information:
OS: macOS
CPU: Apple M4 Max
=== 1000 Points (2D) ===
Time: [95.0, 100.0, 105.0] µs
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=true" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
assert "BASELINE_ORIGIN=artifact" in env_content
captured = capsys.readouterr()
assert "📦 Prepared baseline from artifact" in captured.out
assert "=== Baseline Information" in captured.out
finally:
Path(env_path).unlink(missing_ok=True)
def test_prepare_baseline_copy_error_handling(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline-v1.0.0.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Tag: v1.0.0
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}), patch("benchmark_utils.copyfile", side_effect=OSError("Permission denied")):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert not success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=false" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
assert "BASELINE_ORIGIN=artifact" in env_content
captured = capsys.readouterr()
assert "❌ Failed to prepare baseline: Permission denied" in captured.err
standard_file = baseline_dir / "baseline_results.txt"
assert not standard_file.exists()
finally:
Path(env_path).unlink(missing_ok=True)
def test_prepare_baseline_read_summary_error_handling(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Tag: v1.0.0
Hardware Information:
OS: macOS
CPU: Apple M4 Max
=== 1000 Points (2D) ===
Time: [95.0, 100.0, 105.0] µs
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
original_path_open = Path.open
def mock_path_open(self, mode="r", *args, **kwargs):
if self.name == "baseline_results.txt" and "r" in mode:
msg = "Read permission denied"
raise OSError(msg)
return original_path_open(self, mode, *args, **kwargs)
with patch.dict(os.environ, {"GITHUB_ENV": env_path}), patch.object(Path, "open", mock_path_open):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=true" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
assert "BASELINE_ORIGIN=artifact" in env_content
assert "BASELINE_SOURCE_FILE=baseline_results.txt" in env_content
assert "BASELINE_TAG=" not in env_content
captured = capsys.readouterr()
assert "⚠️ Failed to read baseline summary: Read permission denied" in captured.err
assert "=== Baseline Information (from artifact) ===" in captured.out
finally:
Path(env_path).unlink(missing_ok=True)
def test_prepare_baseline_missing_file(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert not success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=false" in env_content
assert "BASELINE_SOURCE=missing" in env_content
captured = capsys.readouterr()
assert "❌ Downloaded artifact but no baseline*.txt files found" in captured.err
finally:
Path(env_path).unlink(missing_ok=True)
def test_set_no_baseline_status(self, capsys):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
BenchmarkRegressionHelper.set_no_baseline_status()
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=false" in env_content
assert "BASELINE_SOURCE=none" in env_content
assert "BASELINE_ORIGIN=none" in env_content
captured = capsys.readouterr()
assert "📈 No baseline artifact found" in captured.out
finally:
Path(env_path).unlink(missing_ok=True)
def test_extract_baseline_commit_from_baseline_file(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Hardware Information:
OS: macOS
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "abc123def456"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=abc123def456" in env_content
assert "BASELINE_COMMIT_SOURCE=baseline" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_extract_baseline_commit_from_metadata(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
metadata_file = baseline_dir / "metadata.json"
metadata = {"tag": "v1.0.0", "commit": "def456abc789", "generated_at": "2023-12-15T10:30:00Z"}
with metadata_file.open("w", encoding="utf-8") as f:
json.dump(metadata, f)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "def456abc789"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=def456abc789" in env_content
assert "BASELINE_COMMIT_SOURCE=metadata" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_extract_baseline_commit_unknown(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "unknown"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=unknown" in env_content
assert "BASELINE_COMMIT_SOURCE=unknown" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_determine_benchmark_skip_unknown_baseline(self):
should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("unknown", "def4567")
assert not should_skip
assert reason == "unknown_baseline"
def test_determine_benchmark_skip_same_commit(self):
should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "abc1234")
assert should_skip
assert reason == "same_commit"
@patch("benchmark_utils.run_git_command")
def test_determine_benchmark_skip_baseline_not_found(self, mock_git):
mock_git.side_effect = subprocess.CalledProcessError(1, "git")
should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
assert not should_skip
assert reason == "baseline_commit_not_found"
@patch("benchmark_utils.run_git_command")
def test_determine_benchmark_skip_no_changes(self, mock_git):
mock_git.side_effect = [
Mock(returncode=0), Mock(returncode=0, stdout="docs/README.md\n.github/workflows/other.yml\n", stderr=""), ]
should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
assert should_skip
assert reason == "no_relevant_changes"
@patch("benchmark_utils.run_git_command")
def test_determine_benchmark_skip_changes_detected(self, mock_git):
mock_git.side_effect = [
Mock(returncode=0), Mock(returncode=0, stdout="src/core/mod.rs\nbenches/performance.rs\n", stderr=""), ]
should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
assert not should_skip
assert reason == "changes_detected"
def test_display_skip_message(self, capsys):
BenchmarkRegressionHelper.display_skip_message("same_commit", "abc1234")
captured = capsys.readouterr()
assert "🔍 Current commit matches baseline (abc1234)" in captured.out
def test_display_no_baseline_message(self, capsys):
BenchmarkRegressionHelper.display_no_baseline_message()
captured = capsys.readouterr()
assert "⚠️ No performance baseline available" in captured.out
assert "💡 To enable performance regression testing:" in captured.out
def test_run_regression_test_success(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_file = Path(temp_dir) / "baseline.txt"
baseline_file.write_text("mock baseline content")
with patch("benchmark_utils.PerformanceComparator") as mock_comparator_class:
mock_comparator = Mock()
mock_comparator.compare_with_baseline.return_value = (True, False) mock_comparator_class.return_value = mock_comparator
success = BenchmarkRegressionHelper.run_regression_test(baseline_file)
assert success
mock_comparator.compare_with_baseline.assert_called_once_with(baseline_file, dev_mode=False, bench_timeout=1800)
captured = capsys.readouterr()
assert "🚀 Running performance regression test" in captured.out
def test_run_regression_test_dev_mode(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_file = Path(temp_dir) / "baseline.txt"
baseline_file.write_text("mock baseline content")
with patch("benchmark_utils.PerformanceComparator") as mock_comparator_class:
mock_comparator = Mock()
mock_comparator.compare_with_baseline.return_value = (True, False) mock_comparator_class.return_value = mock_comparator
success = BenchmarkRegressionHelper.run_regression_test(baseline_file, dev_mode=True)
assert success
mock_comparator.compare_with_baseline.assert_called_once_with(baseline_file, dev_mode=True, bench_timeout=1800)
captured = capsys.readouterr()
assert "dev mode (10x faster)" in captured.out
def test_run_regression_test_failure(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_file = Path(temp_dir) / "baseline.txt"
baseline_file.write_text("mock baseline content")
with patch("benchmark_utils.PerformanceComparator") as mock_comparator_class:
mock_comparator = Mock()
mock_comparator.compare_with_baseline.return_value = (False, False) mock_comparator_class.return_value = mock_comparator
success = BenchmarkRegressionHelper.run_regression_test(baseline_file)
assert not success
def test_run_regression_test_custom_timeout(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_file = Path(temp_dir) / "baseline.txt"
baseline_file.write_text("mock baseline content")
with patch("benchmark_utils.PerformanceComparator") as mock_comparator_class:
mock_comparator = Mock()
mock_comparator.compare_with_baseline.return_value = (True, False) mock_comparator_class.return_value = mock_comparator
success = BenchmarkRegressionHelper.run_regression_test(baseline_file, bench_timeout=3600)
assert success
mock_comparator.compare_with_baseline.assert_called_once_with(baseline_file, dev_mode=False, bench_timeout=3600)
captured = capsys.readouterr()
assert "🚀 Running performance regression test" in captured.out
def test_display_results_file_exists(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
results_file = Path(temp_dir) / "results.txt"
results_content = "=== Performance Test Results ===\nAll tests passed\n"
results_file.write_text(results_content)
BenchmarkRegressionHelper.display_results(results_file)
captured = capsys.readouterr()
assert "=== Performance Regression Test Results ===" in captured.out
assert "All tests passed" in captured.out
def test_display_results_file_missing(self, capsys):
missing_file = Path("/nonexistent/results.txt")
BenchmarkRegressionHelper.display_results(missing_file)
captured = capsys.readouterr()
assert "⚠️ No comparison results file found" in captured.out
def test_generate_summary_with_regression(self, temp_chdir, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
results_file = Path(temp_dir) / "benches" / "compare_results.txt"
results_file.parent.mkdir(parents=True)
results_file.write_text("REGRESSION detected in benchmark xyz")
env_vars = {
"BASELINE_SOURCE": "artifact",
"BASELINE_ORIGIN": "release",
"BASELINE_TAG": "v1.0.0",
"BASELINE_EXISTS": "true",
"SKIP_BENCHMARKS": "false",
"SKIP_REASON": "changes_detected",
}
with patch.dict(os.environ, env_vars), temp_chdir(temp_dir):
BenchmarkRegressionHelper.generate_summary()
captured = capsys.readouterr()
assert "📊 Performance Regression Testing Summary" in captured.out
assert "Baseline source: artifact" in captured.out
assert "Result: ⚠️ Performance regressions detected" in captured.out
def test_generate_summary_skip_same_commit(self, capsys):
env_vars = {
"BASELINE_SOURCE": "artifact",
"BASELINE_ORIGIN": "manual",
"BASELINE_EXISTS": "true",
"SKIP_BENCHMARKS": "true",
"SKIP_REASON": "same_commit",
}
with patch.dict(os.environ, env_vars):
BenchmarkRegressionHelper.generate_summary()
captured = capsys.readouterr()
assert "Result: ⏭️ Benchmarks skipped (same commit as baseline)" in captured.out
def test_generate_summary_no_baseline(self, capsys):
env_vars = {
"BASELINE_EXISTS": "false",
"SKIP_BENCHMARKS": "unknown",
}
with patch.dict(os.environ, env_vars, clear=True):
BenchmarkRegressionHelper.generate_summary()
captured = capsys.readouterr()
assert "Result: ⏭️ Benchmarks skipped (no baseline available)" in captured.out
def test_generate_summary_sets_regression_environment_variable(self, temp_chdir, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
results_file = Path(temp_dir) / "benches" / "compare_results.txt"
results_file.parent.mkdir(parents=True)
results_file.write_text("REGRESSION detected in benchmark xyz")
env_vars = {
"BASELINE_EXISTS": "true",
"SKIP_BENCHMARKS": "false",
}
if "BENCHMARK_REGRESSION_DETECTED" in os.environ:
del os.environ["BENCHMARK_REGRESSION_DETECTED"]
with patch.dict(os.environ, env_vars, clear=True), temp_chdir(temp_dir):
BenchmarkRegressionHelper.generate_summary()
assert os.environ.get("BENCHMARK_REGRESSION_DETECTED") == "true"
captured = capsys.readouterr()
assert "Exported BENCHMARK_REGRESSION_DETECTED=true for downstream CI steps" in captured.out
def test_generate_summary_github_env_export(self, temp_chdir):
with tempfile.TemporaryDirectory() as temp_dir:
results_file = Path(temp_dir) / "benches" / "compare_results.txt"
results_file.parent.mkdir(parents=True)
results_file.write_text("REGRESSION detected in benchmark xyz")
github_env_file = Path(temp_dir) / "github_env"
env_vars = {
"BASELINE_EXISTS": "true",
"SKIP_BENCHMARKS": "false",
"GITHUB_ENV": str(github_env_file),
}
with patch.dict(os.environ, env_vars, clear=True), temp_chdir(temp_dir):
BenchmarkRegressionHelper.generate_summary()
assert github_env_file.exists()
github_env_content = github_env_file.read_text()
assert "BENCHMARK_REGRESSION_DETECTED=true" in github_env_content
def test_generate_summary_with_error_file(self, temp_chdir, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
results_file = Path(temp_dir) / "benches" / "compare_results.txt"
results_file.parent.mkdir(parents=True)
results_file.write_text(
"Comparison Results\n"
"==================\n\n"
"❌ Error: Benchmark execution timeout\n\n"
"Details: Command timed out after 1800 seconds\n\n"
"This error prevented the benchmark comparison from completing successfully.\n"
"Please check the CI logs for more information.\n"
)
env_vars = {
"BASELINE_SOURCE": "artifact",
"BASELINE_ORIGIN": "release",
"BASELINE_TAG": "v1.0.0",
"BASELINE_EXISTS": "true",
"SKIP_BENCHMARKS": "false",
"SKIP_REASON": "n/a",
}
with patch.dict(os.environ, env_vars), temp_chdir(temp_dir):
BenchmarkRegressionHelper.generate_summary()
captured = capsys.readouterr()
assert "📊 Performance Regression Testing Summary" in captured.out
assert "Baseline source: artifact" in captured.out
assert "Result: ❌ Benchmark comparison failed" in captured.out
assert "(see benches/compare_results.txt for details)" in captured.out
assert "✅ No significant performance regressions" not in captured.out
class TestProjectRootHandling:
def test_find_project_root_success(self, temp_chdir):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
cargo_toml = temp_path / "Cargo.toml"
cargo_toml.write_text('[package]\nname = "test"\n')
sub_dir = temp_path / "subdir"
sub_dir.mkdir()
with temp_chdir(sub_dir):
result = find_project_root()
assert result.resolve() == temp_path.resolve()
def test_find_project_root_not_found(self, temp_chdir):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
with temp_chdir(temp_path), pytest.raises(ProjectRootNotFoundError, match=r"Could not locate Cargo\.toml"):
find_project_root()
class TestTimeoutHandling:
@pytest.mark.parametrize(
("component_class", "method_name", "setup_func"),
[
(
"BaselineGenerator",
"generate_baseline",
lambda _: None, ),
(
"PerformanceComparator",
"compare_with_baseline",
lambda temp_dir: (Path(temp_dir) / "baseline.txt").write_text("mock baseline"),
),
],
)
def test_timeout_parameter_passed(self, component_class, method_name, setup_func):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
setup_func(temp_dir)
if component_class == "BaselineGenerator":
component = BaselineGenerator(project_root)
method_args = ()
else: component = PerformanceComparator(project_root)
method_args = (Path(temp_dir) / "baseline.txt",)
with patch("benchmark_utils.run_cargo_command") as mock_cargo:
mock_cargo.side_effect = subprocess.TimeoutExpired("cargo", 120)
method = getattr(component, method_name)
result = method(*method_args, bench_timeout=120)
if component_class == "BaselineGenerator":
assert result is False
else:
success, regression = result
assert not success
assert not regression
assert mock_cargo.call_count >= 1
assert any(call.kwargs.get("timeout") == 120 for call in mock_cargo.call_args_list)
def test_timeout_error_handling_baseline_generator(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = BaselineGenerator(project_root)
with patch("benchmark_utils.run_cargo_command") as mock_cargo:
mock_cargo.side_effect = subprocess.TimeoutExpired("cargo bench", 1800)
success = generator.generate_baseline(bench_timeout=1800)
assert not success
captured = capsys.readouterr()
assert "timed out after 1800 seconds" in captured.err
assert "Consider increasing --bench-timeout" in captured.err
def test_timeout_error_handling_performance_comparator(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
baseline_file = Path(temp_dir) / "baseline.txt"
baseline_file.write_text("mock baseline")
comparator = PerformanceComparator(project_root)
with patch("benchmark_utils.run_cargo_command") as mock_cargo:
mock_cargo.side_effect = subprocess.TimeoutExpired("cargo bench", 1800)
success, regression = comparator.compare_with_baseline(baseline_file, bench_timeout=1800)
assert not success
assert not regression
captured = capsys.readouterr()
assert "timed out after 1800 seconds" in captured.err
assert "Consider increasing --bench-timeout" in captured.err
error_file = project_root / "benches" / "compare_results.txt"
assert error_file.exists()
error_content = error_file.read_text()
assert "❌ Error: Benchmark execution timeout" in error_content
assert "cargo bench" in error_content assert "timeout after 1800 seconds" in error_content
def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
(temp_path / "Cargo.toml").write_text('[package]\nname = "test"\n')
with temp_chdir(temp_path):
monkeypatch.setattr(sys, "argv", ["benchmark_utils.py", "generate-baseline", "--bench-timeout", "0"])
with pytest.raises(SystemExit) as exc_info:
main()
assert exc_info.value.code == 2
baseline_file = temp_path / "baseline.txt"
baseline_file.write_text("mock baseline")
monkeypatch.setattr(sys, "argv", ["benchmark_utils.py", "compare", "--baseline", str(baseline_file), "--bench-timeout", "-100"])
with pytest.raises(SystemExit) as exc_info:
main()
assert exc_info.value.code == 2
parser = create_argument_parser()
args = parser.parse_args(["run-regression-test", "--baseline", str(baseline_file), "--bench-timeout", "3600"])
assert args.bench_timeout == 3600
assert hasattr(args, "validate_bench_timeout")
assert args.validate_bench_timeout
class TestPerformanceSummaryGenerator:
def test_init(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
assert generator.project_root == project_root
assert generator.baseline_file == project_root / "baseline-artifact" / "baseline_results.txt"
assert generator._baseline_fallback == project_root / "benches" / "baseline_results.txt"
assert generator.comparison_file == project_root / "benches" / "compare_results.txt"
assert generator.circumsphere_results_dir == project_root / "target" / "criterion"
assert isinstance(generator.current_version, str)
assert isinstance(generator.current_date, str)
@patch("benchmark_utils.run_git_command")
def test_get_current_version_with_tag(self, mock_git_command):
mock_result = Mock()
mock_result.stdout.strip.return_value = "v1.2.3"
mock_git_command.return_value = mock_result
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
version = generator._get_current_version()
assert version == "1.2.3" mock_git_command.assert_called_with(["describe", "--tags", "--abbrev=0", "--match=v*"], cwd=project_root)
@patch("benchmark_utils.run_git_command")
def test_get_current_version_fallback(self, mock_git_command):
mock_result = Mock()
mock_result.stdout.strip.return_value = "v0.1.0\nv0.2.0"
def side_effect(*args, **kwargs):
if "describe" in args[0]:
raise subprocess.CalledProcessError(1, "git describe", "describe failed")
return mock_result
mock_git_command.side_effect = side_effect
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
version = generator._get_current_version()
assert version == "0.1.0"
@patch("benchmark_utils.run_git_command")
def test_get_current_version_no_tags(self, mock_git_command):
mock_git_command.side_effect = Exception("No tags found")
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
version = generator._get_current_version()
assert version == "unknown"
@patch("benchmark_utils.run_git_command")
@patch("benchmark_utils.datetime")
def test_get_version_date_with_tag(self, mock_datetime, mock_git_command):
mock_result = Mock()
mock_result.stdout.strip.return_value = "2024-01-15"
mock_git_command.return_value = mock_result
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
generator.current_version = "1.2.3"
date = generator._get_version_date()
assert date == "2024-01-15"
mock_git_command.assert_called_with(["log", "-1", "--format=%cd", "--date=format:%Y-%m-%d", "v1.2.3"], cwd=project_root)
@patch("benchmark_utils.run_git_command")
@patch("benchmark_utils.datetime")
def test_get_version_date_fallback(self, mock_datetime, mock_git_command):
mock_git_command.side_effect = Exception("Git command failed")
mock_now = Mock()
mock_now.strftime.return_value = "2024-01-15"
mock_datetime.now.return_value = mock_now
mock_datetime.UTC = Mock()
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
date = generator._get_version_date()
assert date == "2024-01-15"
mock_now.strftime.assert_called_with("%Y-%m-%d")
def test_parse_baseline_results_nonexistent_file(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._parse_baseline_results()
content = "\n".join(lines)
assert "### Baseline Results" in content
assert "Error parsing baseline results" in content
def test_parse_baseline_results_with_data(self):
baseline_content = """Date: 2024-01-15 10:30:00 UTC
Git commit: abc123def456
Hardware: Apple M2 Pro (10 cores)
Memory: 32 GB
=== 1000 Points (2D) ===
Time: [100.0, 110.0, 120.0] µs
Throughput: [8000.0, 9090.9, 10000.0] Kelem/s
=== 5000 Points (3D) ===
Time: [500.0, 550.0, 600.0] µs
Throughput: [8333.3, 9090.9, 10000.0] Kelem/s
"""
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
baseline_dir = project_root / "baseline-artifact"
baseline_dir.mkdir(parents=True)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_file.write_text(baseline_content)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._parse_baseline_results()
markdown_content = "\n".join(lines)
assert "### Current Baseline Information" in markdown_content
assert "Git commit: abc123def456" in markdown_content
assert "Hardware: Apple M2 Pro" in markdown_content
assert "### 2D Triangulation Performance" in markdown_content
assert "### 3D Triangulation Performance" in markdown_content
assert "| Points | Time (mean) | Throughput (mean) | Scaling |" in markdown_content
def test_parse_comparison_results_with_regression(self):
comparison_content = """Performance Comparison Results
⚠️ REGRESSION: Time increased by 15.2% (slower performance)
✅ OK: Time change +2.1% within acceptable range
✅ IMPROVEMENT: Time decreased by 8.5% (faster performance)
"""
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
benches_dir = project_root / "benches"
benches_dir.mkdir(parents=True)
comparison_file = benches_dir / "compare_results.txt"
comparison_file.write_text(comparison_content)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._parse_comparison_results()
markdown_content = "\n".join(lines)
assert "### ⚠️ Performance Regression Detected" in markdown_content
assert "REGRESSION: Time increased by 15.2%" in markdown_content
assert "IMPROVEMENT: Time decreased by 8.5%" in markdown_content
def test_parse_comparison_results_no_regression(self):
comparison_content = """Performance Comparison Results
✅ OK: Time change +2.1% within acceptable range
✅ IMPROVEMENT: Time decreased by 3.2% (faster performance)
✅ OK: Time change -1.8% within acceptable range
"""
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
benches_dir = project_root / "benches"
benches_dir.mkdir(parents=True)
comparison_file = benches_dir / "compare_results.txt"
comparison_file.write_text(comparison_content)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._parse_comparison_results()
markdown_content = "\n".join(lines)
assert "### ✅ Performance Status: Good" in markdown_content
assert "no significant performance regressions" in markdown_content
@patch("benchmark_utils.get_git_commit_hash")
@patch("benchmark_utils.run_git_command")
@patch("benchmark_utils.datetime")
def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_commit):
mock_run_git.side_effect = Exception("git unavailable in test")
mock_git_commit.return_value = "abc123def456"
mock_now = Mock()
mock_now.strftime.return_value = "2024-01-15 10:30:00 UTC"
mock_datetime.now.return_value = mock_now
mock_datetime.UTC = Mock()
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
content = generator._generate_markdown_content()
assert "# Delaunay Library Performance Results" in content
assert "**Last Updated**: 2024-01-15 10:30:00 UTC" in content
assert "**Generated By**: benchmark_utils.py" in content
assert "**Git Commit**: abc123def456" in content
assert "## Performance Results Summary" in content
assert "## Key Findings" in content
assert "### Performance Ranking" in content
assert "## Recommendations" in content
assert "## Performance Data Updates" in content
def test_get_circumsphere_performance_results(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._get_circumsphere_performance_results()
content = "\n".join(lines)
assert "### Circumsphere Performance Results" in content
assert "Basic 3D" in content or "Version unknown" in content
def test_get_update_instructions(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._get_update_instructions()
content = "\n".join(lines)
assert "## Performance Data Updates" in content
assert "uv run benchmark-utils generate-baseline" in content
assert "uv run benchmark-utils generate-summary" in content
assert "PerformanceSummaryGenerator" in content
def test_parse_numerical_accuracy_output_success(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
stdout_content = """Running benchmarks...
Method Comparisons (1000 total tests):
insphere vs insphere_distance: 845/1000 (84.5%)
insphere vs insphere_lifted: 12/1000 (1.2%)
insphere_distance vs insphere_lifted: 203/1000 (20.3%)
All three methods agree: 8/1000 (0.8%)
Benchmark completed."""
result = generator._parse_numerical_accuracy_output(stdout_content)
assert result is not None
assert isinstance(result, dict)
assert result["insphere_distance"] == "84.5%"
assert result["insphere_lifted"] == "1.2%"
assert result["distance_lifted"] == "20.3%"
assert result["all_agree"] == "0.8%"
def test_parse_numerical_accuracy_output_no_data(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
stdout_content = """Running benchmarks...
No method comparisons found.
Benchmark completed."""
result = generator._parse_numerical_accuracy_output(stdout_content)
assert result is None
def test_parse_numerical_accuracy_output_malformed(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
stdout_content = """Running benchmarks...
Method Comparisons (invalid format):
Benchmark completed."""
result = generator._parse_numerical_accuracy_output(stdout_content)
assert result is None
@patch("benchmark_utils.run_cargo_command")
def test_run_circumsphere_benchmarks_success(self, mock_cargo):
mock_cargo.return_value = Mock(stdout="")
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
success, numerical_data = generator._run_circumsphere_benchmarks()
assert success is True
assert numerical_data is None or isinstance(numerical_data, dict)
mock_cargo.assert_called_once()
@patch("benchmark_utils.run_cargo_command")
def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo):
mock_result = Mock()
mock_result.stdout = """Running benchmarks...
Method Comparisons (1000 total tests):
insphere vs insphere_distance: 820/1000 (82.0%)
insphere vs insphere_lifted: 5/1000 (0.5%)
insphere_distance vs insphere_lifted: 180/1000 (18.0%)
All three methods agree: 2/1000 (0.2%)
Benchmark completed."""
mock_cargo.return_value = mock_result
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
success, numerical_data = generator._run_circumsphere_benchmarks()
assert success is True
assert numerical_data is not None
assert isinstance(numerical_data, dict)
assert numerical_data["insphere_distance"] == "82.0%"
assert numerical_data["insphere_lifted"] == "0.5%"
assert numerical_data["distance_lifted"] == "18.0%"
assert numerical_data["all_agree"] == "0.2%"
mock_cargo.assert_called_once()
@patch("benchmark_utils.run_cargo_command")
def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
mock_cargo.side_effect = Exception("Benchmark failed")
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
success, numerical_data = generator._run_circumsphere_benchmarks()
assert success is False
assert numerical_data is None
captured = capsys.readouterr()
assert "Error running circumsphere benchmarks" in captured.out
@patch("benchmark_utils.run_git_command")
def test_generate_summary_success(self, mock_git, capsys):
mock_git.side_effect = Exception("git unavailable in test")
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
output_file = Path(temp_dir) / "test_summary.md"
success = generator.generate_summary(output_path=output_file)
assert success is True
assert output_file.exists()
content = output_file.read_text()
assert "# Delaunay Library Performance Results" in content
assert "## Performance Results Summary" in content
captured = capsys.readouterr()
assert "Generated performance summary" in captured.out
@patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
def test_generate_summary_with_benchmarks(self, mock_run_benchmarks):
mock_run_benchmarks.return_value = (True, None)
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
output_file = Path(temp_dir) / "test_summary.md"
success = generator.generate_summary(output_path=output_file, run_benchmarks=True)
assert success is True
mock_run_benchmarks.assert_called_once()
assert output_file.exists()
@patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
def test_generate_summary_benchmark_failure_continues(self, mock_run_benchmarks, capsys):
mock_run_benchmarks.return_value = (False, None)
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
output_file = Path(temp_dir) / "test_summary.md"
success = generator.generate_summary(output_path=output_file, run_benchmarks=True)
assert success is True assert output_file.exists()
captured = capsys.readouterr()
assert "Benchmark run failed" in captured.out
def test_generate_summary_exception_handling(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
output_file = Path(temp_dir) / "readonly" / "summary.md"
with patch.object(Path, "open", side_effect=OSError("permission denied")):
success = generator.generate_summary(output_path=output_file)
assert success is False
captured = capsys.readouterr()
assert "Failed to generate performance summary" in captured.err
def test_get_static_content(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._get_static_sections()
content = "\n".join(lines)
assert "## Historical Version Comparison" in content
assert "## Implementation Notes" in content
assert "## Benchmark Structure" in content
def test_empty_benchmark_results_edge_case(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
results = generator._parse_circumsphere_benchmark_results()
assert len(results) > 0
def test_malformed_estimates_json_edge_case(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
criterion_dir = project_root / "target" / "criterion" / "basic-insphere" / "base"
criterion_dir.mkdir(parents=True)
estimates_file = criterion_dir / "estimates.json"
estimates_file.write_text("{ invalid json")
generator = PerformanceSummaryGenerator(project_root)
results = generator._parse_circumsphere_benchmark_results()
assert len(results) > 0
def test_missing_git_info_edge_case(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
output_file = Path(temp_dir) / "test_output.md"
with (
patch("benchmark_utils.run_git_command") as mock_git,
patch("benchmark_utils.get_git_commit_hash") as mock_commit,
):
mock_git.side_effect = Exception("Git not available")
mock_commit.side_effect = Exception("Git not available")
generator = PerformanceSummaryGenerator(project_root)
success = generator.generate_summary(output_file)
assert success
content = output_file.read_text()
assert "Version unknown" in content
def test_baseline_fallback_behavior_edge_case(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
output_file = Path(temp_dir) / "baseline_fallback_test.md"
benches_dir = project_root / "benches"
benches_dir.mkdir()
fallback_baseline = benches_dir / "baseline_results.txt"
fallback_baseline.write_text(
"Generated at: 2025-01-15 10:00:00\n"
"Git commit: abc123\n"
"=== 1000 Points (3D) ===\n"
"Time: [805.0, 810.0, 815.0] µs\n"
"Throughput: [1200.0, 1235.0, 1245.0] Kelem/s\n",
)
with (
patch("benchmark_utils.get_git_commit_hash") as mock_commit,
):
mock_commit.return_value = "abc123def456"
generator = PerformanceSummaryGenerator(project_root)
assert not generator.baseline_file.exists() assert generator._baseline_fallback.exists()
success = generator.generate_summary(output_file)
assert success
content = output_file.read_text()
assert "Triangulation Data Structure Performance" in content
assert "Generated at: 2025-01-15 10:00:00" in content assert "Git commit: abc123" in content
def test_full_generation_workflow_integration(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
output_file = Path(temp_dir) / "full_test.md"
baseline_dir = project_root / "benches"
baseline_dir.mkdir()
baseline_file = baseline_dir / "baseline_results.txt"
baseline_file.write_text(
"Generated at: 2025-01-15 10:00:00\n"
"Git commit: abc123\n"
"=== 10 Points (2D) ===\n"
"Time: [100.0, 110.0, 120.0] µs\n"
"Throughput: [8000.0, 9000.0, 10000.0] Kelem/s\n",
)
comparison_file = baseline_dir / "compare_results.txt"
comparison_file.write_text("✅ OK: All benchmarks within acceptable range\n")
with (
patch("benchmark_utils.get_git_commit_hash") as mock_commit,
):
mock_commit.return_value = "abc123def456"
generator = PerformanceSummaryGenerator(project_root)
success = generator.generate_summary(output_file)
assert success
content = output_file.read_text()
assert "# Delaunay Library Performance Results" in content
assert "Single Query Performance (3D)" in content
assert "Triangulation Data Structure Performance" in content
assert "Performance Status: Good" in content
assert "Key Findings" in content
assert "Performance Ranking" in content
assert "Recommendations" in content
assert "Performance Data Updates" in content
def test_dimension_sorting_numeric_order(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
test_cases = [
CircumsphereTestCase("Test10", "10D", {"insphere": CircumspherePerformanceData("insphere", 1000)}),
CircumsphereTestCase("Test2", "2D", {"insphere": CircumspherePerformanceData("insphere", 1000)}),
CircumsphereTestCase("Test3", "3D", {"insphere": CircumspherePerformanceData("insphere", 1000)}),
CircumsphereTestCase("Test1", "1D", {"insphere": CircumspherePerformanceData("insphere", 1000)}),
CircumsphereTestCase("Test9", "9D", {"insphere": CircumspherePerformanceData("insphere", 1000)}),
]
with patch.object(generator, "_parse_circumsphere_benchmark_results", return_value=test_cases):
result_lines = generator._get_circumsphere_performance_results()
content = "\n".join(result_lines)
dimension_headers = re.findall(r"#### Single Query Performance \((\d+D)\)", content)
expected_order = ["1D", "2D", "3D", "9D", "10D"]
assert dimension_headers == expected_order, f"Expected {expected_order}, got {dimension_headers}"
assert "Test1" in content assert "Test2" in content assert "Test3" in content assert "Test9" in content assert "Test10" in content
def test_hardware_metadata_parsing_with_cores(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
baseline_dir = project_root / "baseline-artifact"
baseline_dir.mkdir()
baseline_file = baseline_dir / "baseline_results.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Hardware Information:
OS: macOS
CPU: Apple M4 Max
CPU Cores: 14
Memory: 64.0 GB
=== 1000 Points (2D) ===
Time: [95.0, 100.0, 105.0] µs
"""
baseline_file.write_text(baseline_content)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._parse_baseline_results()
content = "\n".join(lines)
assert "Apple M4 Max (14 cores)" in content
baseline_content_short = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Hardware Information:
OS: macOS
CPU: Apple M4 Max
"""
baseline_file.write_text(baseline_content_short)
lines = generator._parse_baseline_results()
content = "\n".join(lines)
assert "Apple M4 Max" in content
assert "(" not in content.split("Apple M4 Max")[1].split("\n")[0] if "Apple M4 Max" in content else True
def test_dev_mode_args_consistency(self):
assert isinstance(DEV_MODE_BENCH_ARGS, list)
assert "--sample-size" in DEV_MODE_BENCH_ARGS
assert "--measurement-time" in DEV_MODE_BENCH_ARGS
assert "--warm-up-time" in DEV_MODE_BENCH_ARGS
assert len(DEV_MODE_BENCH_ARGS) >= 6
def test_numerical_accuracy_phrasing_flexibility(self):
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir)
generator = PerformanceSummaryGenerator(project_root)
lines = generator._get_numerical_accuracy_analysis()
content = "\n".join(lines)
assert "Based on random test cases:" in content
assert "Based on 1000 random test cases:" not in content
class TestTagSpecificBaselineHandling:
def test_prepare_baseline_with_tag_specific_file(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
tag_baseline_file = baseline_dir / "baseline-v0.4.3.txt"
baseline_content = """Date: 2025-09-13 00:00:36 UTC
Git commit: 1062551a9152a53e938ddbf94c4152ff6ae4254d
Tag: v0.4.3
Hardware Information:
OS: macOS
CPU: Apple M1 (Virtual)
=== 10 Points (2D) ===
Time: [160.1, 168.18, 177.67] µs
"""
tag_baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=true" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
assert "BASELINE_ORIGIN=artifact" in env_content
assert "BASELINE_TAG=v0.4.3" in env_content
assert "BASELINE_SOURCE_FILE=baseline-v0.4.3.txt" in env_content
captured = capsys.readouterr()
assert "Prepared baseline from artifact: baseline-v0.4.3.txt" in captured.out
assert " → baseline_results.txt" in captured.out
assert "=== Baseline Information" in captured.out
assert "Tag: v0.4.3" in captured.out
standard_file = baseline_dir / "baseline_results.txt"
assert standard_file.exists()
assert "Tag: v0.4.3" in standard_file.read_text(encoding="utf-8")
finally:
Path(env_path).unlink(missing_ok=True)
def test_prepare_baseline_with_generic_baseline_file(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
generic_baseline_file = baseline_dir / "baseline-manual-test.txt"
baseline_content = """Date: 2025-09-13 10:30:00 UTC
Git commit: abcdef123456
Hardware Information:
OS: macOS
CPU: Test CPU
=== 100 Points (2D) ===
Time: [95.0, 100.0, 105.0] µs
"""
generic_baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=true" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
assert "BASELINE_ORIGIN=artifact" in env_content
captured = capsys.readouterr()
assert "Prepared baseline from artifact: baseline-manual-test.txt" in captured.out
assert " → baseline_results.txt" in captured.out
standard_file = baseline_dir / "baseline_results.txt"
assert standard_file.exists()
assert "Test CPU" in standard_file.read_text(encoding="utf-8")
finally:
Path(env_path).unlink(missing_ok=True)
def test_prepare_baseline_prefers_standard_name(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
standard_file = baseline_dir / "baseline_results.txt"
tag_file = baseline_dir / "baseline-v1.0.0.txt"
standard_content = "Standard file content"
tag_content = "Tag-specific file content"
standard_file.write_text(standard_content)
tag_file.write_text(tag_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=true" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
assert "BASELINE_ORIGIN=artifact" in env_content
captured = capsys.readouterr()
assert "Prepared baseline from artifact" in captured.out
assert " → " not in captured.out
assert standard_file.read_text(encoding="utf-8") == standard_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_prepare_baseline_no_matching_files(self, capsys):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
(baseline_dir / "metadata.json").write_text("{}")
(baseline_dir / "random.txt").write_text("Not a baseline")
(baseline_dir / "results.log").write_text("Log data")
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert not success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_EXISTS=false" in env_content
assert "BASELINE_SOURCE=missing" in env_content
assert "BASELINE_ORIGIN=unknown" in env_content
captured = capsys.readouterr()
assert "❌ Downloaded artifact but no baseline*.txt files found" in captured.err
finally:
Path(env_path).unlink(missing_ok=True)
def test_extract_baseline_commit_from_tag_file(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
tag_baseline_file = baseline_dir / "baseline-v0.4.3.txt"
baseline_content = """Date: 2025-09-13 00:00:36 UTC
Git commit: 1062551a9152a53e938ddbf94c4152ff6ae4254d
Tag: v0.4.3
Hardware Information:
OS: macOS
"""
tag_baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "1062551a9152a53e938ddbf94c4152ff6ae4254d"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=1062551a9152a53e938ddbf94c4152ff6ae4254d" in env_content
assert "BASELINE_COMMIT_SOURCE=baseline" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_extract_baseline_commit_fallback_to_metadata(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
tag_baseline_file = baseline_dir / "baseline-v0.4.3.txt"
metadata_file = baseline_dir / "metadata.json"
baseline_content = """Date: 2025-09-13 00:00:36 UTC
Tag: v0.4.3
Hardware Information:
OS: macOS
"""
tag_baseline_file.write_text(baseline_content)
metadata = {"tag": "v0.4.3", "commit": "fedcba987654321", "generated_at": "2025-09-13T00:00:36Z"}
with metadata_file.open("w", encoding="utf-8") as f:
json.dump(metadata, f)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "fedcba987654321"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=fedcba987654321" in env_content
assert "BASELINE_COMMIT_SOURCE=metadata" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_extract_baseline_commit_handles_multiple_tag_files(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
tag_file_1 = baseline_dir / "baseline-v0.4.1.txt"
tag_file_2 = baseline_dir / "baseline-v0.4.3.txt"
tag_content_1 = """Date: 2025-09-13 00:00:36 UTC
Git commit: abc123def456
Tag: v0.4.1
"""
tag_content_2 = """Date: 2025-09-13 00:00:36 UTC
Git commit: def456abc789
Tag: v0.4.3
"""
tag_file_1.write_text(tag_content_1)
tag_file_2.write_text(tag_content_2)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "def456abc789"
finally:
Path(env_path).unlink(missing_ok=True)
def test_semver_prefers_stable_over_prerelease(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
stable_file = baseline_dir / "baseline-v1.2.3.txt"
prerelease_file = baseline_dir / "baseline-v1.2.3-beta.1.txt"
older_stable = baseline_dir / "baseline-v1.2.2.txt"
stable_file.write_text("Stable v1.2.3")
prerelease_file.write_text("Pre-release v1.2.3-beta.1")
older_stable.write_text("Older stable v1.2.2")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-v1.2.3.txt"
def test_semver_v043_vs_v043_beta1_preference(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
stable_file = baseline_dir / "baseline-v0.4.3.txt"
prerelease_file = baseline_dir / "baseline-v0.4.3-beta.1.txt"
stable_file.write_text("Date: 2023-12-15\nGit commit: stable043\nTag: v0.4.3\n")
prerelease_file.write_text("Date: 2023-12-15\nGit commit: beta043\nTag: v0.4.3-beta.1\n")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-v0.4.3.txt"
content = selected.read_text()
assert "stable043" in content
assert "Tag: v0.4.3" in content
def test_semver_prefers_higher_prerelease_when_no_stable(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
beta1_file = baseline_dir / "baseline-v1.2.3-beta.1.txt"
beta2_file = baseline_dir / "baseline-v1.2.3-beta.2.txt"
alpha_file = baseline_dir / "baseline-v1.2.3-alpha.1.txt"
beta1_file.write_text("Beta 1")
beta2_file.write_text("Beta 2")
alpha_file.write_text("Alpha 1")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-v1.2.3-beta.2.txt"
def test_baseline_commit_source_from_baseline_file(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Hardware Information:
OS: macOS
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "abc123def456"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=abc123def456" in env_content
assert "BASELINE_COMMIT_SOURCE=baseline" in env_content
assert "BASELINE_SOURCE_FILE=baseline_results.txt" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_baseline_commit_source_from_metadata_file(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_file.write_text("Date: 2023-12-15\nHardware: Test\n")
metadata_file = baseline_dir / "metadata.json"
metadata = {"commit": "def456abc789", "tag": "v1.0.0"}
with metadata_file.open("w", encoding="utf-8") as f:
json.dump(metadata, f)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "def456abc789"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=def456abc789" in env_content
assert "BASELINE_COMMIT_SOURCE=metadata" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_baseline_commit_source_unknown_when_no_commit_found(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_file.write_text("Date: 2023-12-15\n")
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "unknown"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_COMMIT=unknown" in env_content
assert "BASELINE_COMMIT_SOURCE=unknown" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_env_vars_mirrored_to_current_process(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
for key in ["TEST_BASELINE_EXISTS", "TEST_BASELINE_SOURCE"]:
os.environ.pop(key, None)
test_vars = {
"TEST_BASELINE_EXISTS": "true",
"TEST_BASELINE_SOURCE": "artifact",
}
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
BenchmarkRegressionHelper.write_github_env_vars(test_vars)
with open(env_path, encoding="utf-8") as f:
content = f.read()
assert "TEST_BASELINE_EXISTS=true" in content
assert "TEST_BASELINE_SOURCE=artifact" in content
assert os.environ["TEST_BASELINE_EXISTS"] == "true"
assert os.environ["TEST_BASELINE_SOURCE"] == "artifact"
finally:
Path(env_path).unlink(missing_ok=True)
for key in ["TEST_BASELINE_EXISTS", "TEST_BASELINE_SOURCE"]:
os.environ.pop(key, None)
def test_env_vars_multiline_handling(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
for key in ["TEST_MULTILINE", "TEST_SINGLE_LINE", "TEST_WITH_CR"]:
os.environ.pop(key, None)
multiline_value = "Line 1\nLine 2\nLine 3"
cr_value = "Line 1\r\nLine 2\r\nLine 3"
test_vars = {
"TEST_MULTILINE": multiline_value,
"TEST_SINGLE_LINE": "single",
"TEST_WITH_CR": cr_value,
}
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
BenchmarkRegressionHelper.write_github_env_vars(test_vars)
with open(env_path, encoding="utf-8") as f:
content = f.read()
assert "TEST_SINGLE_LINE=single" in content
assert "TEST_MULTILINE<<EOF_" in content
assert "Line 1\nLine 2\nLine 3" in content
assert "Line 1\nLine 2\nLine 3" in content
assert "\r" not in content
assert os.environ["TEST_MULTILINE"] == multiline_value
assert os.environ["TEST_SINGLE_LINE"] == "single"
assert os.environ["TEST_WITH_CR"] == "Line 1\nLine 2\nLine 3"
finally:
Path(env_path).unlink(missing_ok=True)
for key in ["TEST_MULTILINE", "TEST_SINGLE_LINE", "TEST_WITH_CR"]:
os.environ.pop(key, None)
def test_env_vars_none_value_handling(self):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
for key in ["TEST_NONE", "TEST_NORMAL"]:
os.environ.pop(key, None)
test_vars = {
"TEST_NONE": None,
"TEST_NORMAL": "normal_value",
}
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
BenchmarkRegressionHelper.write_github_env_vars(test_vars)
with open(env_path, encoding="utf-8") as f:
content = f.read()
assert "TEST_NONE=" in content assert "TEST_NORMAL=normal_value" in content
assert os.environ["TEST_NONE"] == "" assert os.environ["TEST_NORMAL"] == "normal_value"
finally:
Path(env_path).unlink(missing_ok=True)
for key in ["TEST_NONE", "TEST_NORMAL"]:
os.environ.pop(key, None)
def test_baseline_tag_sanitization(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
baseline_content = """Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Tag: v1.0.0; echo "injected"; rm -rf /tmp/test
Hardware Information:
OS: macOS
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_TAG=v1.0.0__echo__injected___rm_-rf__tmp_test" in env_content
assert "; echo " not in env_content
assert "rm -rf" not in env_content
assert "BASELINE_EXISTS=true" in env_content
assert "BASELINE_SOURCE=artifact" in env_content
finally:
Path(env_path).unlink(missing_ok=True)
def test_baseline_tag_length_capping(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
baseline_file = baseline_dir / "baseline_results.txt"
long_tag = "v1.0.0-" + "a" * 100 baseline_content = f"""Date: 2023-12-15 10:30:00 UTC
Git commit: abc123def456
Tag: {long_tag}
Hardware Information:
OS: macOS
"""
baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert success
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
tag_line = next((line for line in env_content.split("\n") if line.startswith("BASELINE_TAG=")), None)
assert tag_line is not None
tag_value = tag_line.split("=", 1)[1]
assert len(tag_value) <= 64
assert tag_value.startswith("v1.0.0-")
finally:
Path(env_path).unlink(missing_ok=True)
def test_packaging_version_complex_comparisons(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
files_and_expected_order = [
"baseline-v2.0.0.txt", "baseline-v2.0.0rc1.txt", "baseline-v2.0.0b2.txt", "baseline-v2.0.0b1.txt", "baseline-v2.0.0a1.txt", "baseline-v1.9.0.txt", "baseline-v1.9.0rc1.txt", ]
for filename in reversed(files_and_expected_order):
file = baseline_dir / filename
file.write_text(f"Content of {filename}")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-v2.0.0.txt"
def test_packaging_version_invalid_versions(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
valid_file1 = baseline_dir / "baseline-v1.0.0.txt"
valid_file2 = baseline_dir / "baseline-v1.2.txt" invalid_file1 = baseline_dir / "baseline-vInvalid.txt"
generic_file = baseline_dir / "baseline-generic.txt"
valid_file1.write_text("Valid 1.0.0 content")
valid_file2.write_text("Valid 1.2.0 content")
invalid_file1.write_text("Invalid version content")
generic_file.write_text("Generic content")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-v1.2.txt"
assert "Valid 1.2.0 content" in selected.read_text()
def test_packaging_version_truly_invalid_versions(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
invalid_file1 = baseline_dir / "baseline-vInvalid.txt"
invalid_file2 = baseline_dir / "baseline-v1.2.3.4.5.txt" invalid_file3 = baseline_dir / "baseline-vNot-A-Version.txt"
generic_file = baseline_dir / "baseline_results.txt"
invalid_file1.write_text("Invalid content 1")
invalid_file2.write_text("Invalid content 2")
invalid_file3.write_text("Invalid content 3")
generic_file.write_text("Generic baseline content")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline_results.txt"
assert "Generic baseline content" in selected.read_text()
def test_generic_baseline_prefers_newest_mtime(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
older_file = baseline_dir / "baseline-older.txt"
newer_file = baseline_dir / "baseline-newer.txt"
older_file.write_text("Older baseline content")
older_mtime = time.time() - 100 os.utime(older_file, (older_mtime, older_mtime))
newer_file.write_text("Newer baseline content")
newer_mtime = time.time() - 50 os.utime(newer_file, (newer_mtime, newer_mtime))
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-newer.txt"
assert "Newer baseline content" in selected.read_text()
def test_prerelease_detection_fix_validation(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
stable_file = baseline_dir / "baseline-v1.0.0.txt"
prerelease_file = baseline_dir / "baseline-v1.0.0-rc.1.txt"
stable_file.write_text("Stable content")
prerelease_file.write_text("Prerelease content")
selected = BenchmarkRegressionHelper._find_baseline_file(baseline_dir)
assert selected is not None
assert selected.name == "baseline-v1.0.0.txt"
assert "Stable content" in selected.read_text()
def test_prepare_baseline_and_extract_commit_integration(self):
with tempfile.TemporaryDirectory() as temp_dir:
baseline_dir = Path(temp_dir)
tag_baseline_file = baseline_dir / "baseline-v0.4.3.txt"
baseline_content = """Date: 2025-09-13 00:00:36 UTC
Git commit: 1234567890abcdef
Tag: v0.4.3
Hardware Information:
OS: macOS
CPU: Apple M1 (Virtual)
=== 10 Points (2D) ===
Time: [160.1, 168.18, 177.67] µs
"""
tag_baseline_file.write_text(baseline_content)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
env_path = env_file.name
try:
with patch.dict(os.environ, {"GITHUB_ENV": env_path}):
prepare_success = BenchmarkRegressionHelper.prepare_baseline(baseline_dir)
assert prepare_success
standard_file = baseline_dir / "baseline_results.txt"
assert standard_file.exists()
commit_sha = BenchmarkRegressionHelper.extract_baseline_commit(baseline_dir)
assert commit_sha == "1234567890abcdef"
with open(env_path, encoding="utf-8") as f:
env_content = f.read()
assert "BASELINE_TAG=v0.4.3" in env_content
assert "BASELINE_SOURCE_FILE=baseline-v0.4.3.txt" in env_content
finally:
Path(env_path).unlink(missing_ok=True)