import argparse
import json
import os
import sys
import glob
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
def parse_criterion_dir(criterion_dir: str) -> dict[str, dict[str, Any]]:
results = {}
base = Path(criterion_dir)
if not base.exists():
print(f"[chronos] WARNING: Criterion directory not found: {criterion_dir}", file=sys.stderr)
return results
for estimates_path in base.rglob("new/estimates.json"):
try:
with open(estimates_path) as f:
data = json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f"[chronos] WARNING: Could not read {estimates_path}: {e}", file=sys.stderr)
continue
rel = estimates_path.relative_to(base)
parts = list(rel.parts)
bench_parts = parts[:-2]
bench_name = "/".join(bench_parts)
if "slope" in data and data["slope"]:
point = data["slope"]["point_estimate"]
ci_lower = data["slope"]["confidence_interval"]["lower_bound"]
ci_upper = data["slope"]["confidence_interval"]["upper_bound"]
elif "mean" in data and data["mean"]:
point = data["mean"]["point_estimate"]
ci_lower = data["mean"]["confidence_interval"]["lower_bound"]
ci_upper = data["mean"]["confidence_interval"]["upper_bound"]
else:
print(f"[chronos] WARNING: No slope/mean in {estimates_path}", file=sys.stderr)
continue
std_dev = None
if "std_dev" in data and data["std_dev"]:
std_dev = data["std_dev"]["point_estimate"]
results[bench_name] = {
"point_estimate_ns": point,
"ci_lower_ns": ci_lower,
"ci_upper_ns": ci_upper,
"std_dev_ns": std_dev,
}
return results
def load_history(history_file: str) -> list[dict]:
path = Path(history_file)
if path.exists():
with open(path) as f:
return json.load(f)
return []
def save_history(history: list[dict], history_file: str):
path = Path(history_file)
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
json.dump(history, f, indent=2)
print(f"[chronos] History saved: {history_file} ({len(history)} entries)")
def append_to_history(
history: list[dict],
benchmarks: dict[str, dict],
commit: str,
branch: str,
) -> dict:
entry = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"commit": commit,
"branch": branch,
"benchmarks": benchmarks,
}
history.append(entry)
return entry
def compare_to_previous(
history: list[dict],
current: dict[str, dict],
threshold_improved: float = -0.02, threshold_regressed: float = 0.02, threshold_critical: float = 0.10, ) -> list[dict]:
comparisons = []
prev_entry = None
for entry in reversed(history[:-1]):
if entry.get("benchmarks"):
prev_entry = entry
break
for bench_name, current_data in sorted(current.items()):
comp = {
"name": bench_name,
"current_ns": current_data["point_estimate_ns"],
"current_ci_lower": current_data["ci_lower_ns"],
"current_ci_upper": current_data["ci_upper_ns"],
"previous_ns": None,
"change_pct": None,
"status": "new",
}
if prev_entry and bench_name in prev_entry.get("benchmarks", {}):
prev_ns = prev_entry["benchmarks"][bench_name]["point_estimate_ns"]
comp["previous_ns"] = prev_ns
if prev_ns > 0:
change = (comp["current_ns"] - prev_ns) / prev_ns
comp["change_pct"] = change
if change <= threshold_improved:
comp["status"] = "improved"
elif change >= threshold_critical:
comp["status"] = "critical_regression"
elif change >= threshold_regressed:
comp["status"] = "regressed"
else:
comp["status"] = "stable"
comparisons.append(comp)
return comparisons
def format_duration(ns: float | None) -> str:
if ns is None:
return "—"
if ns < 1_000:
return f"{ns:.1f} ns"
elif ns < 1_000_000:
return f"{ns / 1_000:.2f} µs"
elif ns < 1_000_000_000:
return f"{ns / 1_000_000:.2f} ms"
else:
return f"{ns / 1_000_000_000:.3f} s"
def format_change(pct: float | None) -> str:
if pct is None:
return "NEW"
sign = "+" if pct >= 0 else ""
return f"{sign}{pct * 100:.1f}%"
def build_timeseries_data(history: list[dict]) -> dict[str, list[dict]]:
series = {}
for entry in history:
ts = entry["timestamp"]
commit = entry.get("commit", "?")
for bench_name, bench_data in entry.get("benchmarks", {}).items():
if bench_name not in series:
series[bench_name] = []
series[bench_name].append({
"timestamp": ts,
"commit": commit,
"value_ns": bench_data["point_estimate_ns"],
})
return series
def generate_html_report(
comparisons: list[dict],
history: list[dict],
commit: str,
branch: str,
) -> str:
timeseries = build_timeseries_data(history)
n_improved = sum(1 for c in comparisons if c["status"] == "improved")
n_regressed = sum(1 for c in comparisons if c["status"] in ("regressed", "critical_regression"))
n_stable = sum(1 for c in comparisons if c["status"] == "stable")
n_new = sum(1 for c in comparisons if c["status"] == "new")
total = len(comparisons)
has_critical = any(c["status"] == "critical_regression" for c in comparisons)
if has_critical:
verdict_class = "verdict-critical"
verdict_text = "🚨 CRITICAL REGRESSIONS DETECTED"
verdict_sub = "Merge should be blocked until these are addressed."
elif n_regressed > 0:
verdict_class = "verdict-warn"
verdict_text = "⚠️ Regressions Detected"
verdict_sub = f"{n_regressed} benchmark{'s' if n_regressed != 1 else ''} regressed."
elif n_improved > 0:
verdict_class = "verdict-good"
verdict_text = "✅ Performance Improved"
verdict_sub = f"{n_improved} benchmark{'s' if n_improved != 1 else ''} got faster."
else:
verdict_class = "verdict-neutral"
verdict_text = "➖ No Significant Changes"
verdict_sub = "All benchmarks within noise threshold."
status_icon = {
"improved": "🟢",
"stable": "🟡",
"regressed": "🔴",
"critical_regression": "🚨",
"new": "🆕",
}
status_class = {
"improved": "row-improved",
"stable": "row-stable",
"regressed": "row-regressed",
"critical_regression": "row-critical",
"new": "row-new",
}
table_rows = ""
for comp in comparisons:
table_rows += f"""
<tr class="{status_class.get(comp['status'], '')}">
<td class="bench-name">{comp['name']}</td>
<td class="numeric">{format_duration(comp['previous_ns'])}</td>
<td class="numeric">{format_duration(comp['current_ns'])}</td>
<td class="change">{format_change(comp['change_pct'])}</td>
<td class="status-cell">{status_icon.get(comp['status'], '?')}</td>
</tr>"""
palette = [
"#22d3ee", "#a78bfa", "#f472b6", "#34d399", "#fbbf24",
"#f87171", "#60a5fa", "#c084fc", "#fb923c", "#4ade80",
"#e879f9", "#38bdf8", "#a3e635", "#f43f5e", "#14b8a6",
]
chart_datasets = []
bench_names_sorted = sorted(timeseries.keys())
for i, bench_name in enumerate(bench_names_sorted):
points = timeseries[bench_name]
color = palette[i % len(palette)]
data_points = []
for pt in points:
data_points.append({
"x": pt["timestamp"],
"y": round(pt["value_ns"] / 1_000, 3), "commit": pt["commit"],
})
chart_datasets.append({
"label": bench_name,
"data": data_points,
"borderColor": color,
"backgroundColor": color + "33",
"fill": False,
"tension": 0.3,
"pointRadius": 4,
"pointHoverRadius": 7,
})
chart_data_json = json.dumps(chart_datasets)
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Chronos — Benchmark Report</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-adapter-date-fns/3.0.0/chartjs-adapter-date-fns.bundle.min.js"></script>
<style>
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700&family=DM+Sans:wght@400;500;600;700&display=swap');
:root {{
--bg-primary: #0a0e17;
--bg-secondary: #111827;
--bg-tertiary: #1a2235;
--bg-card: #151d2e;
--border: #1e293b;
--border-bright: #334155;
--text-primary: #e2e8f0;
--text-secondary: #94a3b8;
--text-muted: #64748b;
--accent-cyan: #22d3ee;
--accent-green: #34d399;
--accent-red: #f87171;
--accent-yellow: #fbbf24;
--accent-purple: #a78bfa;
--accent-orange: #fb923c;
}}
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{
font-family: 'DM Sans', sans-serif;
background: var(--bg-primary);
color: var(--text-primary);
min-height: 100vh;
line-height: 1.6;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
padding: 2rem;
}}
/* ─── Header ─── */
header {{
border-bottom: 1px solid var(--border);
padding-bottom: 2rem;
margin-bottom: 2rem;
}}
.header-top {{
display: flex;
align-items: baseline;
gap: 1rem;
margin-bottom: 0.5rem;
}}
h1 {{
font-family: 'JetBrains Mono', monospace;
font-size: 1.75rem;
font-weight: 700;
color: var(--accent-cyan);
letter-spacing: -0.02em;
}}
.header-icon {{
font-size: 1.5rem;
}}
.meta {{
font-family: 'JetBrains Mono', monospace;
font-size: 0.8rem;
color: var(--text-muted);
display: flex;
gap: 2rem;
flex-wrap: wrap;
}}
.meta span {{
display: flex;
align-items: center;
gap: 0.4rem;
}}
.meta-label {{
color: var(--text-secondary);
}}
/* ─── Verdict Banner ─── */
.verdict {{
border-radius: 8px;
padding: 1.25rem 1.5rem;
margin-bottom: 2rem;
border: 1px solid;
}}
.verdict-critical {{
background: rgba(248, 113, 113, 0.08);
border-color: rgba(248, 113, 113, 0.3);
}}
.verdict-warn {{
background: rgba(251, 191, 36, 0.08);
border-color: rgba(251, 191, 36, 0.3);
}}
.verdict-good {{
background: rgba(52, 211, 153, 0.08);
border-color: rgba(52, 211, 153, 0.3);
}}
.verdict-neutral {{
background: rgba(148, 163, 184, 0.06);
border-color: rgba(148, 163, 184, 0.2);
}}
.verdict h2 {{
font-size: 1.2rem;
font-weight: 600;
margin-bottom: 0.25rem;
}}
.verdict p {{
color: var(--text-secondary);
font-size: 0.9rem;
}}
/* ─── Stats Row ─── */
.stats-row {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
gap: 1rem;
margin-bottom: 2rem;
}}
.stat-card {{
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem;
text-align: center;
}}
.stat-value {{
font-family: 'JetBrains Mono', monospace;
font-size: 1.75rem;
font-weight: 700;
}}
.stat-label {{
font-size: 0.75rem;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: 0.08em;
margin-top: 0.25rem;
}}
.stat-improved .stat-value {{ color: var(--accent-green); }}
.stat-regressed .stat-value {{ color: var(--accent-red); }}
.stat-stable .stat-value {{ color: var(--accent-yellow); }}
.stat-new .stat-value {{ color: var(--accent-purple); }}
.stat-total .stat-value {{ color: var(--accent-cyan); }}
/* ─── Section Headers ─── */
.section-header {{
font-family: 'JetBrains Mono', monospace;
font-size: 1rem;
font-weight: 600;
color: var(--text-secondary);
margin-bottom: 1rem;
padding-bottom: 0.5rem;
border-bottom: 1px solid var(--border);
text-transform: uppercase;
letter-spacing: 0.05em;
}}
/* ─── Table ─── */
.table-wrapper {{
overflow-x: auto;
margin-bottom: 3rem;
border-radius: 8px;
border: 1px solid var(--border);
}}
table {{
width: 100%;
border-collapse: collapse;
font-size: 0.9rem;
}}
thead th {{
font-family: 'JetBrains Mono', monospace;
font-size: 0.7rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.08em;
color: var(--text-muted);
padding: 0.75rem 1rem;
text-align: left;
background: var(--bg-tertiary);
border-bottom: 1px solid var(--border-bright);
position: sticky;
top: 0;
}}
thead th.numeric,
thead th.change,
thead th.status-cell {{
text-align: right;
}}
td {{
padding: 0.65rem 1rem;
border-bottom: 1px solid var(--border);
transition: background 0.15s;
}}
tr:hover td {{
background: rgba(255, 255, 255, 0.02);
}}
.bench-name {{
font-family: 'JetBrains Mono', monospace;
font-weight: 500;
color: var(--text-primary);
font-size: 0.85rem;
}}
.numeric {{
font-family: 'JetBrains Mono', monospace;
text-align: right;
color: var(--text-secondary);
font-size: 0.85rem;
}}
.change {{
font-family: 'JetBrains Mono', monospace;
text-align: right;
font-weight: 600;
font-size: 0.85rem;
}}
.status-cell {{
text-align: right;
font-size: 1rem;
}}
.row-improved .change {{ color: var(--accent-green); }}
.row-regressed .change {{ color: var(--accent-red); }}
.row-critical .change {{ color: var(--accent-red); font-weight: 700; }}
.row-stable .change {{ color: var(--text-muted); }}
.row-new .change {{ color: var(--accent-purple); }}
.row-critical {{
background: rgba(248, 113, 113, 0.05);
}}
.row-improved {{
background: rgba(52, 211, 153, 0.03);
}}
/* ─── Chart ─── */
.chart-container {{
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1.5rem;
margin-bottom: 2rem;
}}
.chart-canvas-wrap {{
position: relative;
height: 400px;
}}
/* ─── Footer ─── */
footer {{
margin-top: 3rem;
padding-top: 1.5rem;
border-top: 1px solid var(--border);
text-align: center;
color: var(--text-muted);
font-size: 0.75rem;
font-family: 'JetBrains Mono', monospace;
}}
footer a {{
color: var(--accent-cyan);
text-decoration: none;
}}
/* ─── Responsive ─── */
@media (max-width: 640px) {{
.container {{ padding: 1rem; }}
h1 {{ font-size: 1.25rem; }}
.meta {{ flex-direction: column; gap: 0.5rem; }}
.chart-canvas-wrap {{ height: 280px; }}
}}
</style>
</head>
<body>
<div class="container">
<header>
<div class="header-top">
<span class="header-icon">⏱</span>
<h1>CHRONOS</h1>
</div>
<div class="meta">
<span><span class="meta-label">commit</span> {commit}</span>
<span><span class="meta-label">branch</span> {branch}</span>
<span><span class="meta-label">generated</span> {now}</span>
<span><span class="meta-label">history</span> {len(history)} runs</span>
</div>
</header>
<div class="verdict {verdict_class}">
<h2>{verdict_text}</h2>
<p>{verdict_sub}</p>
</div>
<div class="stats-row">
<div class="stat-card stat-total">
<div class="stat-value">{total}</div>
<div class="stat-label">Total</div>
</div>
<div class="stat-card stat-improved">
<div class="stat-value">{n_improved}</div>
<div class="stat-label">Improved</div>
</div>
<div class="stat-card stat-stable">
<div class="stat-value">{n_stable}</div>
<div class="stat-label">Stable</div>
</div>
<div class="stat-card stat-regressed">
<div class="stat-value">{n_regressed}</div>
<div class="stat-label">Regressed</div>
</div>
<div class="stat-card stat-new">
<div class="stat-value">{n_new}</div>
<div class="stat-label">New</div>
</div>
</div>
<div class="section-header">Benchmark Results</div>
<div class="table-wrapper">
<table>
<thead>
<tr>
<th>Benchmark</th>
<th class="numeric">Previous</th>
<th class="numeric">Current</th>
<th class="change">Change</th>
<th class="status-cell">Status</th>
</tr>
</thead>
<tbody>
{table_rows}
</tbody>
</table>
</div>
<div class="section-header">Performance Over Time (µs)</div>
<div class="chart-container">
<div class="chart-canvas-wrap">
<canvas id="timeseriesChart"></canvas>
</div>
</div>
<footer>
Generated by <strong>Chronos</strong> — The Temporal Benchkeeper | Powered by Criterion
</footer>
</div>
<script>
const datasets = {chart_data_json};
const ctx = document.getElementById('timeseriesChart').getContext('2d');
new Chart(ctx, {{
type: 'line',
data: {{ datasets }},
options: {{
responsive: true,
maintainAspectRatio: false,
interaction: {{
mode: 'index',
intersect: false,
}},
plugins: {{
legend: {{
position: 'bottom',
labels: {{
color: '#94a3b8',
font: {{ family: "'JetBrains Mono', monospace", size: 11 }},
boxWidth: 12,
padding: 16,
}},
}},
tooltip: {{
backgroundColor: '#1a2235',
titleColor: '#e2e8f0',
bodyColor: '#94a3b8',
borderColor: '#334155',
borderWidth: 1,
titleFont: {{ family: "'JetBrains Mono', monospace" }},
bodyFont: {{ family: "'JetBrains Mono', monospace", size: 12 }},
callbacks: {{
afterTitle: function(items) {{
const pt = items[0];
const ds = datasets[pt.datasetIndex];
if (ds && ds.data[pt.dataIndex]) {{
return 'commit: ' + ds.data[pt.dataIndex].commit;
}}
return '';
}},
label: function(ctx) {{
return ctx.dataset.label + ': ' + ctx.parsed.y.toFixed(2) + ' µs';
}},
}},
}},
}},
scales: {{
x: {{
type: 'time',
time: {{
tooltipFormat: 'yyyy-MM-dd HH:mm',
displayFormats: {{
hour: 'MMM d HH:mm',
day: 'MMM d',
week: 'MMM d',
month: 'MMM yyyy',
}},
}},
ticks: {{
color: '#64748b',
font: {{ family: "'JetBrains Mono', monospace", size: 10 }},
maxRotation: 45,
}},
grid: {{
color: 'rgba(30, 41, 59, 0.5)',
}},
}},
y: {{
beginAtZero: false,
ticks: {{
color: '#64748b',
font: {{ family: "'JetBrains Mono', monospace", size: 10 }},
callback: function(val) {{ return val.toFixed(1) + ' µs'; }},
}},
grid: {{
color: 'rgba(30, 41, 59, 0.5)',
}},
}},
}},
}},
}});
</script>
</body>
</html>"""
return html
def main():
parser = argparse.ArgumentParser(
description="Chronos — Benchmark Sentinel. Parse Criterion results and generate HTML reports.",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--criterion-dir",
default="target/criterion",
help="Path to Criterion output directory (default: target/criterion)",
)
parser.add_argument(
"--history-file",
default="benchmarks/history.json",
help="Path to the JSON history ledger (default: benchmarks/history.json)",
)
parser.add_argument(
"--output",
default="benchmarks/report.html",
help="Path for the generated HTML report (default: benchmarks/report.html)",
)
parser.add_argument(
"--commit",
default="unknown",
help="Git commit hash for this run",
)
parser.add_argument(
"--branch",
default="unknown",
help="Git branch name",
)
parser.add_argument(
"--threshold-improved",
type=float,
default=0.02,
help="Improvement threshold as fraction (default: 0.02 = 2%%)",
)
parser.add_argument(
"--threshold-regressed",
type=float,
default=0.02,
help="Regression threshold as fraction (default: 0.02 = 2%%)",
)
parser.add_argument(
"--threshold-critical",
type=float,
default=0.10,
help="Critical regression threshold as fraction (default: 0.10 = 10%%)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Parse and report but don't save history",
)
args = parser.parse_args()
print(f"[chronos] Parsing benchmarks from {args.criterion_dir}...")
benchmarks = parse_criterion_dir(args.criterion_dir)
if not benchmarks:
print("[chronos] ERROR: No benchmark results found.", file=sys.stderr)
sys.exit(1)
print(f"[chronos] Found {len(benchmarks)} benchmarks:")
for name in sorted(benchmarks.keys()):
ns = benchmarks[name]["point_estimate_ns"]
print(f" • {name}: {format_duration(ns)}")
history = load_history(args.history_file)
entry = append_to_history(history, benchmarks, args.commit, args.branch)
if not args.dry_run:
save_history(history, args.history_file)
comparisons = compare_to_previous(
history,
benchmarks,
threshold_improved=-args.threshold_improved,
threshold_regressed=args.threshold_regressed,
threshold_critical=args.threshold_critical,
)
print(f"\n[chronos] ═══ Results for {args.commit} on {args.branch} ═══")
for comp in comparisons:
icon = {"improved": "🟢", "stable": "🟡", "regressed": "🔴",
"critical_regression": "🚨", "new": "🆕"}.get(comp["status"], "?")
print(f" {icon} {comp['name']}: {format_duration(comp['current_ns'])} "
f"({format_change(comp['change_pct'])})")
html = generate_html_report(comparisons, history, args.commit, args.branch)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
f.write(html)
print(f"\n[chronos] HTML report written to {args.output}")
has_critical = any(c["status"] == "critical_regression" for c in comparisons)
has_regression = any(c["status"] in ("regressed", "critical_regression") for c in comparisons)
if has_critical:
print("\n[chronos] 🚨 CRITICAL REGRESSIONS — recommend blocking merge.")
sys.exit(2)
elif has_regression:
print("\n[chronos] ⚠️ Regressions detected — review recommended.")
sys.exit(1)
else:
print("\n[chronos] ✅ All clear.")
sys.exit(0)
if __name__ == "__main__":
main()