import argparse
import logging
import re
import shutil
import sys
from datetime import UTC, datetime
from json import loads
from pathlib import Path
try:
from subprocess_utils import find_project_root, run_cargo_command except ModuleNotFoundError:
from scripts.subprocess_utils import find_project_root, run_cargo_command
logger = logging.getLogger(__name__)
class StorageBackendComparator:
def __init__(self, project_root: Path):
self.project_root = project_root
self.criterion_dir = project_root / "target" / "criterion"
def run_comparison(
self,
benchmark_name: str = "large_scale_performance",
dev_mode: bool = False,
output_path: Path | None = None,
extra_args: list[str] | None = None,
) -> bool:
try:
if output_path is None:
output_path = self.project_root / "artifacts" / "storage_comparison.md"
output_path.parent.mkdir(parents=True, exist_ok=True)
print("🔍 Running storage backend comparison...")
print(f" Benchmark: {benchmark_name}")
print(f" Mode: {'Development (reduced scale)' if dev_mode else 'Production (full scale)'}")
print()
print("📊 Running benchmarks with SlotMap backend...")
logger.debug("Running SlotMap benchmarks with extra_args=%s", extra_args)
slotmap_results = self._run_benchmark(benchmark_name, use_dense_slotmap=False, dev_mode=dev_mode, extra_args=extra_args)
if slotmap_results is None:
print("❌ SlotMap benchmark failed", file=sys.stderr)
return False
print("\n📊 Running benchmarks with DenseSlotMap backend...")
logger.debug("Running DenseSlotMap benchmarks with extra_args=%s", extra_args)
denseslotmap_results = self._run_benchmark(benchmark_name, use_dense_slotmap=True, dev_mode=dev_mode, extra_args=extra_args)
if denseslotmap_results is None:
print("❌ DenseSlotMap benchmark failed", file=sys.stderr)
return False
print("\n📝 Generating comparison report...")
logger.debug(
"Generating comparison report with %d SlotMap and %d DenseSlotMap benchmarks",
len(slotmap_results.get("benchmarks", [])),
len(denseslotmap_results.get("benchmarks", [])),
)
report = self._generate_comparison_report(slotmap_results, denseslotmap_results, benchmark_name, dev_mode)
with output_path.open("w", encoding="utf-8") as f:
f.write(report)
print(f"\n✅ Comparison report saved: {output_path}")
return True
except Exception as e:
print(f"❌ Comparison failed: {e}", file=sys.stderr)
logging.exception("Comparison failed")
return False
def _run_benchmark(self, benchmark_name: str, use_dense_slotmap: bool, dev_mode: bool, extra_args: list[str] | None = None) -> dict | None:
try:
shutil.rmtree(self.criterion_dir, ignore_errors=True)
args = ["bench", "--bench", benchmark_name]
args.insert(1, "--no-default-features")
if use_dense_slotmap:
args.extend(["--features", "dense-slotmap"])
if dev_mode or extra_args:
if "--" not in args:
args.append("--")
if dev_mode:
args.extend(
[
"--sample-size",
"10",
"--measurement-time",
"2",
"--warm-up-time",
"1",
"--noplot",
]
)
if extra_args:
args.extend(extra_args)
result = run_cargo_command(
args,
cwd=self.project_root,
check=False,
)
success = result.returncode == 0
stdout = result.stdout or ""
stderr = result.stderr or ""
if not success:
print(f" ⚠️ Benchmark failed for {'DenseSlotMap' if use_dense_slotmap else 'SlotMap'}", file=sys.stderr)
if stderr:
print(f" Error: {stderr}", file=sys.stderr)
return None
results = self._parse_criterion_output(stdout)
results["backend"] = "DenseSlotMap" if use_dense_slotmap else "SlotMap"
results["features"] = ["dense-slotmap"] if use_dense_slotmap else []
return results
except Exception:
logging.exception("Benchmark execution failed")
return None
def _parse_criterion_output(self, output: str) -> dict:
benchmarks_list: list[dict[str, float | str]] = []
results = {
"benchmarks": benchmarks_list,
"raw_output": output,
}
json_found = False
try:
criterion_path = self.criterion_dir
if criterion_path.exists():
for path in criterion_path.rglob("new/estimates.json"):
try:
data = loads(path.read_text(encoding="utf-8"))
estimate = data["mean"]["point_estimate"]
name = path.parent.parent.name
lower_bound = float(data["mean"]["confidence_interval"]["lower_bound"])
upper_bound = float(data["mean"]["confidence_interval"]["upper_bound"])
benchmarks_list.append(
{
"name": name,
"estimate": float(estimate),
"unit": "ns", "lower": lower_bound,
"upper": upper_bound,
}
)
json_found = True
logger.debug("Parsed JSON for benchmark: %s", name)
except Exception as e:
logger.debug("Failed to parse JSON from %s: %s", path, e)
continue
except Exception:
logger.debug("JSON parsing failed, falling back to regex")
if not json_found:
logger.debug("Using regex fallback for Criterion output parsing")
pattern = r"([A-Za-z0-9_:\-./]+)\s+time:\s+\[([0-9.]+)\s+(\w+)\s+([0-9.]+)\s+(\w+)\s+([0-9.]+)\s+(\w+)\]"
for match in re.finditer(pattern, output):
name = match.group(1)
lower_value = float(match.group(2))
estimate = float(match.group(4))
estimate_unit = match.group(5)
upper_value = float(match.group(6))
benchmarks_list.append(
{
"name": name,
"estimate": estimate,
"unit": estimate_unit,
"lower": lower_value,
"upper": upper_value,
}
)
return results
def _build_comparison_table(self, slotmap_by_name: dict, denseslotmap_by_name: dict, all_names: list) -> tuple[list[str], list[float]]:
lines = []
diffs = []
for name in all_names:
slotmap_bench = slotmap_by_name.get(name)
denseslotmap_bench = denseslotmap_by_name.get(name)
if slotmap_bench and denseslotmap_bench:
slotmap_time = slotmap_bench["estimate"]
denseslotmap_time = denseslotmap_bench["estimate"]
unit = slotmap_bench["unit"]
diff_pct = ((denseslotmap_time - slotmap_time) / slotmap_time) * 100
diffs.append(diff_pct)
if abs(diff_pct) < 2.0:
winner, emoji = "~Same", "🟡"
elif diff_pct < 0:
winner, emoji = "✅ DenseSlotMap", "🟢"
else:
winner, emoji = "✅ SlotMap", "🟢"
lines.append(f"| {name} | {slotmap_time:.2f} {unit} | {denseslotmap_time:.2f} {unit} | {diff_pct:+.1f}% {emoji} | {winner} |")
elif slotmap_bench:
lines.append(f"| {name} | {slotmap_bench['estimate']:.2f} {slotmap_bench['unit']} | N/A | - | - |")
elif denseslotmap_bench:
lines.append(f"| {name} | N/A | {denseslotmap_bench['estimate']:.2f} {denseslotmap_bench['unit']} | - | - |")
return lines, diffs
def _generate_comparison_report(
self,
slotmap_results: dict,
denseslotmap_results: dict,
benchmark_name: str,
dev_mode: bool,
) -> str:
lines = [
"# Storage Backend Comparison Report",
"",
f"**Benchmark**: `{benchmark_name}`",
f"**Mode**: {'Development (reduced scale)' if dev_mode else 'Production (full scale)'}",
f"**Generated**: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}",
"",
"## Executive Summary",
"",
"This report compares the performance of DenseSlotMap (default) vs SlotMap storage backends",
"for the Delaunay triangulation data structure.",
"",
"### Key Metrics",
"",
"- **Construction**: Time to build triangulation",
"- **Iteration**: Speed of vertex/cell/neighbor traversals",
"- **Queries**: Lookup and contains-key performance",
"",
"## Detailed Results",
"",
"### Performance Comparison",
"",
"| Benchmark | SlotMap | DenseSlotMap | Difference | Winner |",
"|-----------|---------|--------------|------------|--------|",
]
slotmap_by_name = {b["name"]: b for b in slotmap_results["benchmarks"]}
denseslotmap_by_name = {b["name"]: b for b in denseslotmap_results["benchmarks"]}
all_names = sorted(set(slotmap_by_name.keys()) | set(denseslotmap_by_name.keys()))
table_lines, diffs = self._build_comparison_table(slotmap_by_name, denseslotmap_by_name, all_names)
lines.extend(table_lines)
lines.extend(["", "## Summary Statistics", ""])
if diffs:
avg_diff = sum(diffs) / len(diffs)
lines.extend(
[
f"- **Average Performance Difference**: {avg_diff:+.1f}%",
f"- **Best Case (DenseSlotMap)**: {min(diffs):+.1f}%",
f"- **Worst Case (DenseSlotMap)**: {max(diffs):+.1f}%",
f"- **Benchmarks Compared**: {len(diffs)}",
"",
]
)
if avg_diff < -5:
lines.append("**Interpretation**: DenseSlotMap shows significant performance improvement (>5% faster)")
elif avg_diff > 5:
lines.append("**Interpretation**: SlotMap shows better performance (DenseSlotMap >5% slower)")
else:
lines.append("**Interpretation**: Performance is comparable between backends (within 5%)")
lines.extend(["", "## Recommendations", ""])
if avg_diff < -5:
lines.extend(
[
"✅ **Recommend DenseSlotMap (default)** for this workload:",
"- Significant iteration performance improvement",
"- Better cache locality for traversal patterns",
"- No flags needed (it is the default backend)",
]
)
elif avg_diff > 5:
lines.extend(
[
"✅ **Recommend SlotMap** for this workload:",
"- Better overall performance",
"- Use `--no-default-features` to enable SlotMap",
]
)
else:
lines.extend(
[
"🟡 **Either backend is suitable** for this workload:",
"- Performance is comparable",
"- Choose based on use case:",
" - SlotMap: Better for dynamic insertions/removals",
" - DenseSlotMap: Better for iteration-heavy workloads",
]
)
else:
lines.append("*No matching benchmarks found for comparison*")
lines.extend(
[
"",
"## Reproduction",
"",
"To reproduce these results:",
"",
"```bash",
"# DenseSlotMap",
f"cargo bench --no-default-features --features dense-slotmap --bench {benchmark_name}",
"",
"# SlotMap",
f"cargo bench --no-default-features --bench {benchmark_name}",
"```",
"",
"---",
"",
"*Generated by compare_storage_backends.py*",
]
)
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="Compare SlotMap vs DenseSlotMap storage backend performance",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--bench",
default="large_scale_performance",
help="Benchmark to run (default: large_scale_performance)",
)
parser.add_argument(
"--dev",
action="store_true",
help="Use development mode (reduced scale for faster iteration)",
)
parser.add_argument(
"--output",
type=Path,
help="Output file path (default: artifacts/storage_comparison.md)",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Enable verbose logging",
)
parser.add_argument(
"--filter",
help="Pass filter to Criterion benchmarks (e.g., 'construction')",
)
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(levelname)s: %(message)s",
)
try:
project_root = find_project_root()
comparator = StorageBackendComparator(project_root)
extra_args = [args.filter] if args.filter else None
success = comparator.run_comparison(
benchmark_name=args.bench,
dev_mode=args.dev,
output_path=args.output,
extra_args=extra_args,
)
sys.exit(0 if success else 1)
except Exception as e:
print(f"❌ Error: {e}", file=sys.stderr)
logging.exception("Fatal error")
sys.exit(1)
if __name__ == "__main__":
main()