#![warn(missing_docs)]
mod config;
mod executor;
mod planner;
mod supervisor;
pub use config::*;
pub use executor::{
ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
execute_verifications, format_human_output,
};
pub use supervisor::*;
use clap::{Parser, Subcommand};
use fluxbench_core::{BenchmarkDef, WorkerMain};
use fluxbench_logic::aggregate_verifications;
use fluxbench_report::{
OutputFormat, format_duration, generate_csv_report, generate_github_action_benchmark,
generate_github_summary, generate_html_report, generate_json_report,
};
use rayon::ThreadPoolBuilder;
use regex::Regex;
use std::io::Write;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Parser, Debug)]
#[command(name = "fluxbench")]
#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
pub struct Cli {
#[command(subcommand)]
pub command: Option<Commands>,
#[arg(default_value = ".*")]
pub filter: String,
#[arg(long, default_value = "human")]
pub format: String,
#[arg(short, long)]
pub output: Option<PathBuf>,
#[arg(long)]
pub baseline: Option<Option<PathBuf>>,
#[arg(long)]
pub dry_run: bool,
#[arg(long)]
pub threshold: Option<f64>,
#[arg(long)]
pub group: Option<String>,
#[arg(long)]
pub tag: Option<String>,
#[arg(long)]
pub skip_tag: Option<String>,
#[arg(long, default_value = "3")]
pub warmup: u64,
#[arg(long, default_value = "5")]
pub measurement: u64,
#[arg(long, short = 'n')]
pub samples: Option<u64>,
#[arg(long)]
pub min_iterations: Option<u64>,
#[arg(long)]
pub max_iterations: Option<u64>,
#[arg(short, long)]
pub verbose: bool,
#[arg(long, default_value = "true", action = clap::ArgAction::Set)]
pub isolated: bool,
#[arg(long)]
pub one_shot: bool,
#[arg(long, default_value = "60")]
pub worker_timeout: u64,
#[arg(long, default_value = "1")]
pub jobs: usize,
#[arg(long, short = 'j', default_value = "0")]
pub threads: usize,
#[arg(long, hide = true)]
pub flux_worker: bool,
#[arg(long)]
pub save_baseline: Option<Option<PathBuf>>,
#[arg(long, hide = true)]
pub bench: bool,
}
#[derive(Subcommand, Debug)]
pub enum Commands {
List,
Run,
Compare {
#[arg(name = "REF")]
git_ref: String,
},
}
pub fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
run_with_cli(cli)
}
pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
if cli.flux_worker {
return run_worker_mode();
}
if cli.verbose {
tracing_subscriber::fmt()
.with_env_filter("fluxbench=debug")
.init();
} else {
tracing_subscriber::fmt()
.with_env_filter("fluxbench=info")
.init();
}
let config = FluxConfig::discover().unwrap_or_default();
let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
let jobs = if cli.jobs != 1 {
cli.jobs
} else {
config.runner.jobs.unwrap_or(1)
};
match cli.command {
Some(Commands::List) => {
list_benchmarks(&cli)?;
}
Some(Commands::Run) => {
run_benchmarks(&cli, &config, format, jobs)?;
}
Some(Commands::Compare { ref git_ref }) => {
compare_benchmarks(&cli, &config, git_ref, format)?;
}
None => {
if cli.dry_run {
list_benchmarks(&cli)?;
} else {
run_benchmarks(&cli, &config, format, jobs)?;
}
}
}
Ok(())
}
fn run_worker_mode() -> anyhow::Result<()> {
let mut worker = WorkerMain::new();
worker
.run()
.map_err(|e| anyhow::anyhow!("Worker error: {}", e))
}
fn filter_benchmarks(
cli: &Cli,
benchmarks: &[&'static BenchmarkDef],
) -> Vec<&'static BenchmarkDef> {
let filter_re = Regex::new(&cli.filter).ok();
let plan = planner::build_plan(
benchmarks.iter().copied(),
filter_re.as_ref(),
cli.group.as_deref(),
cli.tag.as_deref(),
cli.skip_tag.as_deref(),
);
plan.benchmarks
}
fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
println!("FluxBench Plan:");
let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
let benchmarks = filter_benchmarks(cli, &all_benchmarks);
let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
std::collections::BTreeMap::new();
for bench in &benchmarks {
groups.entry(bench.group).or_default().push(bench);
}
let mut total = 0;
for (group, benches) in &groups {
println!("├── group: {}", group);
for bench in benches {
let tags = if bench.tags.is_empty() {
String::new()
} else {
format!(" [{}]", bench.tags.join(", "))
};
println!(
"│ ├── {}{} ({}:{})",
bench.id, tags, bench.file, bench.line
);
total += 1;
}
}
println!("{} benchmarks found.", total);
let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
for bench in &all_benchmarks {
for tag in bench.tags {
*tag_counts.entry(tag).or_default() += 1;
}
}
if !tag_counts.is_empty() {
let tags_display: Vec<String> = tag_counts
.iter()
.map(|(tag, count)| format!("{} ({})", tag, count))
.collect();
println!("Tags: {}", tags_display.join(", "));
}
Ok(())
}
fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
let measurement_ns =
FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
let warmup_time_ns = if cli.warmup != 3 {
cli.warmup * 1_000_000_000
} else {
warmup_ns
};
let measurement_time_ns = if cli.measurement != 5 {
cli.measurement * 1_000_000_000
} else {
measurement_ns
};
if let Some(n) = cli.samples.or(config.runner.samples) {
return ExecutionConfig {
warmup_time_ns: 0,
measurement_time_ns: 0,
min_iterations: Some(n),
max_iterations: Some(n),
track_allocations: config.allocator.track,
bootstrap_iterations: config.runner.bootstrap_iterations,
confidence_level: config.runner.confidence_level,
};
}
let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
ExecutionConfig {
warmup_time_ns,
measurement_time_ns,
min_iterations,
max_iterations,
track_allocations: config.allocator.track,
bootstrap_iterations: config.runner.bootstrap_iterations,
confidence_level: config.runner.confidence_level,
}
}
fn run_benchmarks(
cli: &Cli,
config: &FluxConfig,
format: OutputFormat,
jobs: usize,
) -> anyhow::Result<()> {
let jobs = jobs.max(1);
if cli.threads > 0 {
ThreadPoolBuilder::new()
.num_threads(cli.threads)
.build_global()
.ok();
}
let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
let benchmarks = filter_benchmarks(cli, &all_benchmarks);
if benchmarks.is_empty() {
if let Some(ref tag) = cli.tag {
let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
.iter()
.flat_map(|b| b.tags.iter().copied())
.collect();
if !all_tags.contains(tag.as_str()) {
let available: Vec<&str> = all_tags.into_iter().collect();
eprintln!(
"Warning: tag '{}' not found. Available tags: {}",
tag,
available.join(", ")
);
}
}
println!("No benchmarks found.");
return Ok(());
}
let isolated = if config.runner.isolation.is_isolated() {
cli.isolated
} else {
false
};
let threads_str = if cli.threads == 0 {
"all".to_string()
} else {
cli.threads.to_string()
};
let mode_str = if isolated {
if cli.one_shot {
" (isolated, one-shot)"
} else {
" (isolated, persistent)"
}
} else {
" (in-process)"
};
println!(
"Running {} benchmarks{}, {} threads, {} worker(s)...\n",
benchmarks.len(),
mode_str,
threads_str,
jobs
);
let start_time = Instant::now();
let exec_config = build_execution_config(cli, config);
if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
eprintln!(
"Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
exec_config.bootstrap_iterations
);
}
let results = if isolated {
let timeout = std::time::Duration::from_secs(cli.worker_timeout);
let reuse_workers = !cli.one_shot;
let isolated_executor =
IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
isolated_executor.execute(&benchmarks)
} else {
if jobs > 1 {
eprintln!(
"Warning: --jobs currently applies only to isolated mode; running in-process serially."
);
}
let mut executor = Executor::new(exec_config.clone());
executor.execute(&benchmarks)
};
let stats = compute_statistics(&results, &exec_config);
if exec_config.track_allocations
&& !results.is_empty()
&& results
.iter()
.all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
{
eprintln!(
"Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
);
}
let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
if let Some(baseline_path) = resolve_baseline_path(&cli.baseline, config) {
if baseline_path.exists() {
match std::fs::read_to_string(&baseline_path).and_then(|json| {
serde_json::from_str::<fluxbench_report::Report>(&json)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}) {
Ok(baseline) => {
let threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
apply_baseline_comparison(&mut report, &baseline, threshold);
}
Err(e) => {
eprintln!(
"Warning: failed to load baseline {}: {}",
baseline_path.display(),
e
);
}
}
} else {
eprintln!(
"Warning: baseline file not found: {}",
baseline_path.display()
);
}
}
let (comparison_results, comparison_series, synthetic_results, verification_results) =
execute_verifications(&results, &stats);
let verification_summary = aggregate_verifications(&verification_results);
report.comparisons = comparison_results;
report.comparison_series = comparison_series;
report.synthetics = synthetic_results;
report.verifications = verification_results;
report.summary.critical_failures = verification_summary.critical_failures;
report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
if config.ci.github_annotations {
emit_github_annotations(&report);
}
let output = match format {
OutputFormat::Json => generate_json_report(&report)?,
OutputFormat::GithubSummary => generate_github_summary(&report),
OutputFormat::GithubActionBenchmark => generate_github_action_benchmark(&report),
OutputFormat::Html => generate_html_report(&report),
OutputFormat::Csv => generate_csv_report(&report),
OutputFormat::Human => format_human_output(&report),
};
if let Some(ref path) = cli.output {
let mut file = std::fs::File::create(path)?;
file.write_all(output.as_bytes())?;
println!("Report written to: {}", path.display());
} else {
print!("{}", output);
}
save_baseline_if_needed(cli, config, &report)?;
let has_crashes = report
.results
.iter()
.any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
if verification_summary.should_fail_ci() || has_crashes {
if has_crashes {
eprintln!("\nBenchmark(s) crashed during execution");
}
if verification_summary.should_fail_ci() {
eprintln!(
"\n{} critical verification failure(s)",
verification_summary.critical_failures + verification_summary.critical_errors
);
}
std::process::exit(1);
}
Ok(())
}
fn compare_benchmarks(
cli: &Cli,
config: &FluxConfig,
git_ref: &str,
format: OutputFormat,
) -> anyhow::Result<()> {
let baseline_path = resolve_baseline_path(&cli.baseline, config).ok_or_else(|| {
anyhow::anyhow!(
"--baseline required for comparison, or use 'compare' command with a git ref"
)
})?;
if !baseline_path.exists() {
return Err(anyhow::anyhow!(
"Baseline file not found: {}",
baseline_path.display()
));
}
let baseline_json = std::fs::read_to_string(&baseline_path)?;
let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
let resolved_git_ref = resolve_git_ref(git_ref)?;
if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
let matches_ref = baseline_commit == resolved_git_ref
|| baseline_commit.starts_with(&resolved_git_ref)
|| resolved_git_ref.starts_with(baseline_commit);
if !matches_ref {
return Err(anyhow::anyhow!(
"Baseline commit {} does not match git ref {} ({})",
baseline_commit,
git_ref,
resolved_git_ref
));
}
} else {
eprintln!(
"Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
);
}
println!("Comparing against baseline: {}", baseline_path.display());
println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
let benchmarks = filter_benchmarks(cli, &all_benchmarks);
if benchmarks.is_empty() {
println!("No benchmarks found.");
return Ok(());
}
let start_time = Instant::now();
let exec_config = build_execution_config(cli, config);
let mut executor = Executor::new(exec_config.clone());
let results = executor.execute(&benchmarks);
let stats = compute_statistics(&results, &exec_config);
let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
apply_baseline_comparison(&mut report, &baseline, regression_threshold);
let (comparison_results, comparison_series, synthetic_results, verification_results) =
execute_verifications(&results, &stats);
let verification_summary = aggregate_verifications(&verification_results);
report.comparisons = comparison_results;
report.comparison_series = comparison_series;
report.synthetics = synthetic_results;
report.verifications = verification_results;
report.summary.critical_failures = verification_summary.critical_failures;
report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
if config.ci.github_annotations {
emit_github_annotations(&report);
}
let output = match format {
OutputFormat::Json => generate_json_report(&report)?,
OutputFormat::GithubSummary => generate_github_summary(&report),
OutputFormat::GithubActionBenchmark => generate_github_action_benchmark(&report),
OutputFormat::Html => generate_html_report(&report),
OutputFormat::Csv => generate_csv_report(&report),
OutputFormat::Human => format_comparison_output(&report, &baseline),
};
if let Some(ref path) = cli.output {
let mut file = std::fs::File::create(path)?;
file.write_all(output.as_bytes())?;
println!("Report written to: {}", path.display());
} else {
print!("{}", output);
}
save_baseline_if_needed(cli, config, &report)?;
let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
if should_fail {
if report.summary.regressions > 0 {
eprintln!(
"\n{} regression(s) detected above {}% threshold",
report.summary.regressions, regression_threshold
);
}
if verification_summary.should_fail_ci() {
eprintln!(
"\n{} critical verification failure(s)",
verification_summary.critical_failures + verification_summary.critical_errors
);
}
std::process::exit(1);
}
Ok(())
}
fn save_baseline_if_needed(
cli: &Cli,
config: &FluxConfig,
report: &fluxbench_report::Report,
) -> anyhow::Result<()> {
let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
if !should_save {
return Ok(());
}
let path = cli
.save_baseline
.as_ref()
.and_then(|opt| opt.clone())
.or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
.unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let json = generate_json_report(report)?;
std::fs::write(&path, json)?;
eprintln!("Baseline saved to: {}", path.display());
Ok(())
}
fn apply_baseline_comparison(
report: &mut fluxbench_report::Report,
baseline: &fluxbench_report::Report,
regression_threshold: f64,
) {
report.baseline_meta = Some(baseline.meta.clone());
let baseline_map: std::collections::HashMap<_, _> = baseline
.results
.iter()
.filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
.collect();
for result in &mut report.results {
if let (Some(metrics), Some(baseline_metrics)) =
(&result.metrics, baseline_map.get(&result.id))
{
let effective_threshold = if result.threshold > 0.0 {
result.threshold
} else {
regression_threshold
};
let baseline_mean = baseline_metrics.mean_ns;
let absolute_change = metrics.mean_ns - baseline_mean;
let relative_change = if baseline_mean > 0.0 {
(absolute_change / baseline_mean) * 100.0
} else {
0.0
};
let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
|| metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
let is_significant = relative_change.abs() > effective_threshold && ci_non_overlap;
if relative_change > effective_threshold {
report.summary.regressions += 1;
} else if relative_change < -effective_threshold {
report.summary.improvements += 1;
}
let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
absolute_change / metrics.std_dev_ns
} else {
0.0
};
if !effect_size.is_finite() {
effect_size = 0.0;
}
let probability_regression = if ci_non_overlap {
if relative_change > 0.0 { 0.99 } else { 0.01 }
} else if relative_change > 0.0 {
0.60
} else {
0.40
};
result.comparison = Some(fluxbench_report::Comparison {
baseline_mean_ns: baseline_mean,
absolute_change_ns: absolute_change,
relative_change,
probability_regression,
is_significant,
effect_size,
});
}
}
}
fn resolve_baseline_path(
cli_baseline: &Option<Option<PathBuf>>,
config: &FluxConfig,
) -> Option<PathBuf> {
match cli_baseline {
Some(Some(path)) => Some(path.clone()),
Some(None) => {
Some(
config
.output
.baseline_path
.as_ref()
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json")),
)
}
None => None,
}
}
fn emit_github_annotations(report: &fluxbench_report::Report) {
for result in &report.results {
match result.status {
fluxbench_report::BenchmarkStatus::Crashed => {
let msg = result
.failure
.as_ref()
.map(|f| f.message.as_str())
.unwrap_or("benchmark crashed");
println!(
"::error file={},line={}::{}: {}",
result.file, result.line, result.id, msg
);
}
fluxbench_report::BenchmarkStatus::Failed => {
let msg = result
.failure
.as_ref()
.map(|f| f.message.as_str())
.unwrap_or("benchmark failed");
println!(
"::error file={},line={}::{}: {}",
result.file, result.line, result.id, msg
);
}
_ => {}
}
if let Some(cmp) = &result.comparison {
if cmp.is_significant && cmp.relative_change > 0.0 {
println!(
"::error file={},line={}::{}: regression {:+.1}% ({} → {})",
result.file,
result.line,
result.id,
cmp.relative_change,
format_duration(cmp.baseline_mean_ns),
result
.metrics
.as_ref()
.map(|m| format_duration(m.mean_ns))
.unwrap_or_default(),
);
}
}
}
for v in &report.verifications {
match &v.status {
fluxbench_logic::VerificationStatus::Failed => {
let level = match v.severity {
fluxbench_core::Severity::Critical => "error",
_ => "warning",
};
println!("::{}::{}: {}", level, v.id, v.message);
}
fluxbench_logic::VerificationStatus::Error { message } => {
println!("::error::{}: evaluation error: {}", v.id, message);
}
_ => {}
}
}
}
fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
let output = std::process::Command::new("git")
.args(["rev-parse", "--verify", git_ref])
.output()
.map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!(
"Invalid git ref '{}': {}",
git_ref,
stderr.trim()
));
}
let resolved = String::from_utf8(output.stdout)?.trim().to_string();
if resolved.is_empty() {
return Err(anyhow::anyhow!(
"Git ref '{}' resolved to an empty commit hash",
git_ref
));
}
Ok(resolved)
}
fn format_comparison_output(
report: &fluxbench_report::Report,
baseline: &fluxbench_report::Report,
) -> String {
let mut output = String::new();
output.push('\n');
output.push_str("FluxBench Comparison Results\n");
output.push_str(&"=".repeat(60));
output.push_str("\n\n");
output.push_str(&format!(
"Baseline: {} ({})\n",
baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
));
output.push_str(&format!(
"Current: {} ({})\n\n",
report.meta.git_commit.as_deref().unwrap_or("unknown"),
report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
));
for result in &report.results {
let status_icon = match result.status {
fluxbench_report::BenchmarkStatus::Passed => "✓",
fluxbench_report::BenchmarkStatus::Failed => "✗",
fluxbench_report::BenchmarkStatus::Crashed => "💥",
fluxbench_report::BenchmarkStatus::Skipped => "⊘",
};
output.push_str(&format!("{} {}\n", status_icon, result.id));
if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
let change_icon = if comparison.relative_change > 5.0 {
"📈 REGRESSION"
} else if comparison.relative_change < -5.0 {
"📉 improvement"
} else {
"≈ no change"
};
output.push_str(&format!(
" baseline: {} → current: {}\n",
format_duration(comparison.baseline_mean_ns),
format_duration(metrics.mean_ns),
));
output.push_str(&format!(
" change: {:+.2}% ({}) {}\n",
comparison.relative_change,
format_duration(comparison.absolute_change_ns.abs()),
change_icon,
));
}
output.push('\n');
}
output.push_str("Summary\n");
output.push_str(&"-".repeat(60));
output.push('\n');
output.push_str(&format!(
" Regressions: {} Improvements: {} No Change: {}\n",
report.summary.regressions,
report.summary.improvements,
report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
));
output
}
#[cfg(test)]
mod tests {
use super::*;
use fluxbench_report::{
BenchmarkMetrics, BenchmarkReportResult, BenchmarkStatus, Report, ReportConfig, ReportMeta,
ReportSummary, SystemInfo,
};
fn dummy_meta() -> ReportMeta {
ReportMeta {
schema_version: 1,
version: "0.1.0".to_string(),
timestamp: chrono::Utc::now(),
git_commit: None,
git_branch: None,
system: SystemInfo {
os: "linux".to_string(),
os_version: "6.0".to_string(),
cpu: "test".to_string(),
cpu_cores: 1,
memory_gb: 1.0,
},
config: ReportConfig {
warmup_time_ns: 0,
measurement_time_ns: 0,
min_iterations: None,
max_iterations: None,
bootstrap_iterations: 0,
confidence_level: 0.95,
track_allocations: false,
},
}
}
fn dummy_metrics(mean: f64) -> BenchmarkMetrics {
BenchmarkMetrics {
samples: 100,
mean_ns: mean,
median_ns: mean,
std_dev_ns: mean * 0.01,
min_ns: mean * 0.9,
max_ns: mean * 1.1,
p50_ns: mean,
p90_ns: mean * 1.05,
p95_ns: mean * 1.07,
p99_ns: mean * 1.09,
p999_ns: mean * 1.1,
skewness: 0.0,
kurtosis: 3.0,
ci_lower_ns: mean * 0.98,
ci_upper_ns: mean * 1.02,
ci_level: 0.95,
throughput_ops_sec: None,
alloc_bytes: 0,
alloc_count: 0,
mean_cycles: 0.0,
median_cycles: 0.0,
min_cycles: 0,
max_cycles: 0,
cycles_per_ns: 0.0,
}
}
fn dummy_result(id: &str, mean: f64, threshold: f64) -> BenchmarkReportResult {
BenchmarkReportResult {
id: id.to_string(),
name: id.to_string(),
group: "test".to_string(),
status: BenchmarkStatus::Passed,
severity: fluxbench_core::Severity::Warning,
file: "test.rs".to_string(),
line: 1,
metrics: Some(dummy_metrics(mean)),
threshold,
comparison: None,
failure: None,
}
}
fn dummy_report(results: Vec<BenchmarkReportResult>) -> Report {
let total = results.len();
Report {
meta: dummy_meta(),
results,
comparisons: vec![],
comparison_series: vec![],
synthetics: vec![],
verifications: vec![],
summary: ReportSummary {
total_benchmarks: total,
passed: total,
..Default::default()
},
baseline_meta: None,
}
}
#[test]
fn per_bench_threshold_overrides_global() {
let mut report = dummy_report(vec![dummy_result("fast_bench", 108.0, 5.0)]);
let baseline = dummy_report(vec![dummy_result("fast_bench", 100.0, 5.0)]);
apply_baseline_comparison(&mut report, &baseline, 25.0);
assert_eq!(
report.summary.regressions, 1,
"per-bench 5% should catch 8% regression"
);
let cmp = report.results[0].comparison.as_ref().unwrap();
assert!(cmp.is_significant);
}
#[test]
fn zero_threshold_falls_back_to_global() {
let mut report = dummy_report(vec![dummy_result("normal_bench", 108.0, 0.0)]);
let baseline = dummy_report(vec![dummy_result("normal_bench", 100.0, 0.0)]);
apply_baseline_comparison(&mut report, &baseline, 25.0);
assert_eq!(
report.summary.regressions, 0,
"8% under 25% global should not regress"
);
let cmp = report.results[0].comparison.as_ref().unwrap();
assert!(!cmp.is_significant);
}
#[test]
fn mixed_thresholds_independent() {
let mut report = dummy_report(vec![
dummy_result("tight", 108.0, 5.0), dummy_result("loose", 108.0, 0.0), ]);
let baseline = dummy_report(vec![
dummy_result("tight", 100.0, 5.0),
dummy_result("loose", 100.0, 0.0),
]);
apply_baseline_comparison(&mut report, &baseline, 25.0);
assert_eq!(report.summary.regressions, 1);
assert!(
report.results[0]
.comparison
.as_ref()
.unwrap()
.is_significant
);
assert!(
!report.results[1]
.comparison
.as_ref()
.unwrap()
.is_significant
);
}
#[test]
fn per_bench_threshold_detects_improvement() {
let mut report = dummy_report(vec![dummy_result("improving", 90.0, 5.0)]);
let baseline = dummy_report(vec![dummy_result("improving", 100.0, 5.0)]);
apply_baseline_comparison(&mut report, &baseline, 25.0);
assert_eq!(report.summary.improvements, 1);
assert_eq!(report.summary.regressions, 0);
}
}