1#![warn(missing_docs)]
2mod config;
24mod executor;
25mod planner;
26mod supervisor;
27
28pub use config::*;
29pub use executor::{
30 ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
31 execute_verifications, format_human_output,
32};
33pub use supervisor::*;
34
35use clap::{Parser, Subcommand};
36use fluxbench_core::{BenchmarkDef, WorkerMain};
37use fluxbench_logic::aggregate_verifications;
38use fluxbench_report::{
39 OutputFormat, generate_csv_report, generate_github_summary, generate_html_report,
40 generate_json_report,
41};
42use rayon::ThreadPoolBuilder;
43use regex::Regex;
44use std::io::Write;
45use std::path::PathBuf;
46use std::time::Instant;
47
48#[derive(Parser, Debug)]
50#[command(name = "fluxbench")]
51#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
52pub struct Cli {
53 #[command(subcommand)]
55 pub command: Option<Commands>,
56
57 #[arg(default_value = ".*")]
59 pub filter: String,
60
61 #[arg(long, default_value = "human")]
63 pub format: String,
64
65 #[arg(short, long)]
67 pub output: Option<PathBuf>,
68
69 #[arg(long)]
71 pub baseline: Option<PathBuf>,
72
73 #[arg(long)]
75 pub dry_run: bool,
76
77 #[arg(long)]
79 pub threshold: Option<f64>,
80
81 #[arg(long)]
83 pub group: Option<String>,
84
85 #[arg(long)]
87 pub tag: Option<String>,
88
89 #[arg(long)]
91 pub skip_tag: Option<String>,
92
93 #[arg(long, default_value = "3")]
95 pub warmup: u64,
96
97 #[arg(long, default_value = "5")]
99 pub measurement: u64,
100
101 #[arg(long, short = 'n')]
104 pub samples: Option<u64>,
105
106 #[arg(long)]
108 pub min_iterations: Option<u64>,
109
110 #[arg(long)]
112 pub max_iterations: Option<u64>,
113
114 #[arg(short, long)]
116 pub verbose: bool,
117
118 #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
121 pub isolated: bool,
122
123 #[arg(long)]
126 pub one_shot: bool,
127
128 #[arg(long, default_value = "60")]
130 pub worker_timeout: u64,
131
132 #[arg(long, default_value = "1")]
134 pub jobs: usize,
135
136 #[arg(long, short = 'j', default_value = "0")]
139 pub threads: usize,
140
141 #[arg(long, hide = true)]
143 pub flux_worker: bool,
144
145 #[arg(long)]
148 pub save_baseline: Option<Option<PathBuf>>,
149
150 #[arg(long, hide = true)]
152 pub bench: bool,
153}
154
155#[derive(Subcommand, Debug)]
157pub enum Commands {
158 List,
160 Run,
162 Compare {
164 #[arg(name = "REF")]
166 git_ref: String,
167 },
168}
169
170pub fn run() -> anyhow::Result<()> {
176 let cli = Cli::parse();
177 run_with_cli(cli)
178}
179
180pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
182 if cli.flux_worker {
184 return run_worker_mode();
185 }
186
187 if cli.verbose {
189 tracing_subscriber::fmt()
190 .with_env_filter("fluxbench=debug")
191 .init();
192 } else {
193 tracing_subscriber::fmt()
194 .with_env_filter("fluxbench=info")
195 .init();
196 }
197
198 let config = FluxConfig::discover().unwrap_or_default();
200
201 let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
203
204 let jobs = if cli.jobs != 1 {
206 cli.jobs
207 } else {
208 config.runner.jobs.unwrap_or(1)
209 };
210
211 match cli.command {
212 Some(Commands::List) => {
213 list_benchmarks(&cli)?;
214 }
215 Some(Commands::Run) => {
216 run_benchmarks(&cli, &config, format, jobs)?;
217 }
218 Some(Commands::Compare { ref git_ref }) => {
219 compare_benchmarks(&cli, &config, git_ref, format)?;
220 }
221 None => {
222 if cli.dry_run {
224 list_benchmarks(&cli)?;
225 } else {
226 run_benchmarks(&cli, &config, format, jobs)?;
227 }
228 }
229 }
230
231 Ok(())
232}
233
234fn run_worker_mode() -> anyhow::Result<()> {
236 let mut worker = WorkerMain::new();
237 worker
238 .run()
239 .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
240}
241
242fn filter_benchmarks(
246 cli: &Cli,
247 benchmarks: &[&'static BenchmarkDef],
248) -> Vec<&'static BenchmarkDef> {
249 let filter_re = Regex::new(&cli.filter).ok();
250
251 let plan = planner::build_plan(
252 benchmarks.iter().copied(),
253 filter_re.as_ref(),
254 cli.group.as_deref(),
255 cli.tag.as_deref(),
256 cli.skip_tag.as_deref(),
257 );
258
259 plan.benchmarks
260}
261
262fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
263 println!("FluxBench Plan:");
264
265 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
266 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
267
268 let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
269 std::collections::BTreeMap::new();
270
271 for bench in &benchmarks {
272 groups.entry(bench.group).or_default().push(bench);
273 }
274
275 let mut total = 0;
276 for (group, benches) in &groups {
277 println!("├── group: {}", group);
278 for bench in benches {
279 let tags = if bench.tags.is_empty() {
280 String::new()
281 } else {
282 format!(" [{}]", bench.tags.join(", "))
283 };
284 println!(
285 "│ ├── {}{} ({}:{})",
286 bench.id, tags, bench.file, bench.line
287 );
288 total += 1;
289 }
290 }
291
292 println!("{} benchmarks found.", total);
293
294 let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
297 for bench in &all_benchmarks {
298 for tag in bench.tags {
299 *tag_counts.entry(tag).or_default() += 1;
300 }
301 }
302 if !tag_counts.is_empty() {
303 let tags_display: Vec<String> = tag_counts
304 .iter()
305 .map(|(tag, count)| format!("{} ({})", tag, count))
306 .collect();
307 println!("Tags: {}", tags_display.join(", "));
308 }
309
310 Ok(())
311}
312
313fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
315 let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
317 let measurement_ns =
318 FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
319
320 let warmup_time_ns = if cli.warmup != 3 {
325 cli.warmup * 1_000_000_000
326 } else {
327 warmup_ns
328 };
329 let measurement_time_ns = if cli.measurement != 5 {
330 cli.measurement * 1_000_000_000
331 } else {
332 measurement_ns
333 };
334
335 if let Some(n) = cli.samples.or(config.runner.samples) {
338 return ExecutionConfig {
339 warmup_time_ns: 0,
340 measurement_time_ns: 0,
341 min_iterations: Some(n),
342 max_iterations: Some(n),
343 track_allocations: config.allocator.track,
344 bootstrap_iterations: config.runner.bootstrap_iterations,
345 confidence_level: config.runner.confidence_level,
346 };
347 }
348
349 let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
351 let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
352
353 ExecutionConfig {
354 warmup_time_ns,
355 measurement_time_ns,
356 min_iterations,
357 max_iterations,
358 track_allocations: config.allocator.track,
359 bootstrap_iterations: config.runner.bootstrap_iterations,
360 confidence_level: config.runner.confidence_level,
361 }
362}
363
364fn run_benchmarks(
365 cli: &Cli,
366 config: &FluxConfig,
367 format: OutputFormat,
368 jobs: usize,
369) -> anyhow::Result<()> {
370 let jobs = jobs.max(1);
371
372 if cli.threads > 0 {
374 ThreadPoolBuilder::new()
375 .num_threads(cli.threads)
376 .build_global()
377 .ok();
378 }
379
380 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
382 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
383
384 if benchmarks.is_empty() {
385 if let Some(ref tag) = cli.tag {
387 let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
388 .iter()
389 .flat_map(|b| b.tags.iter().copied())
390 .collect();
391 if !all_tags.contains(tag.as_str()) {
392 let available: Vec<&str> = all_tags.into_iter().collect();
393 eprintln!(
394 "Warning: tag '{}' not found. Available tags: {}",
395 tag,
396 available.join(", ")
397 );
398 }
399 }
400 println!("No benchmarks found.");
401 return Ok(());
402 }
403
404 let isolated = if config.runner.isolation.is_isolated() {
406 cli.isolated
407 } else {
408 false
409 };
410
411 let threads_str = if cli.threads == 0 {
412 "all".to_string()
413 } else {
414 cli.threads.to_string()
415 };
416 let mode_str = if isolated {
417 if cli.one_shot {
418 " (isolated, one-shot)"
419 } else {
420 " (isolated, persistent)"
421 }
422 } else {
423 " (in-process)"
424 };
425 println!(
426 "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
427 benchmarks.len(),
428 mode_str,
429 threads_str,
430 jobs
431 );
432
433 let start_time = Instant::now();
434
435 let exec_config = build_execution_config(cli, config);
437
438 if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
439 eprintln!(
440 "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
441 Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
442 exec_config.bootstrap_iterations
443 );
444 }
445
446 let results = if isolated {
448 let timeout = std::time::Duration::from_secs(cli.worker_timeout);
449 let reuse_workers = !cli.one_shot;
450 let isolated_executor =
451 IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
452 isolated_executor.execute(&benchmarks)
453 } else {
454 if jobs > 1 {
455 eprintln!(
456 "Warning: --jobs currently applies only to isolated mode; running in-process serially."
457 );
458 }
459 let mut executor = Executor::new(exec_config.clone());
460 executor.execute(&benchmarks)
461 };
462
463 let stats = compute_statistics(&results, &exec_config);
465
466 if exec_config.track_allocations
468 && !results.is_empty()
469 && results
470 .iter()
471 .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
472 {
473 eprintln!(
474 "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
475 Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
476 );
477 }
478
479 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
481 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
482
483 let (comparison_results, comparison_series, synthetic_results, verification_results) =
485 execute_verifications(&results, &stats);
486 let verification_summary = aggregate_verifications(&verification_results);
487 report.comparisons = comparison_results;
488 report.comparison_series = comparison_series;
489 report.synthetics = synthetic_results;
490 report.verifications = verification_results;
491
492 report.summary.critical_failures = verification_summary.critical_failures;
494 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
495
496 let output = match format {
498 OutputFormat::Json => generate_json_report(&report)?,
499 OutputFormat::GithubSummary => generate_github_summary(&report),
500 OutputFormat::Html => generate_html_report(&report),
501 OutputFormat::Csv => generate_csv_report(&report),
502 OutputFormat::Human => format_human_output(&report),
503 };
504
505 if let Some(ref path) = cli.output {
507 let mut file = std::fs::File::create(path)?;
508 file.write_all(output.as_bytes())?;
509 println!("Report written to: {}", path.display());
510 } else {
511 print!("{}", output);
512 }
513
514 save_baseline_if_needed(cli, config, &report)?;
516
517 let has_crashes = report
519 .results
520 .iter()
521 .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
522
523 if verification_summary.should_fail_ci() || has_crashes {
524 if has_crashes {
525 eprintln!("\nBenchmark(s) crashed during execution");
526 }
527 if verification_summary.should_fail_ci() {
528 eprintln!(
529 "\n{} critical verification failure(s)",
530 verification_summary.critical_failures + verification_summary.critical_errors
531 );
532 }
533 std::process::exit(1);
534 }
535
536 Ok(())
537}
538
539fn compare_benchmarks(
540 cli: &Cli,
541 config: &FluxConfig,
542 git_ref: &str,
543 format: OutputFormat,
544) -> anyhow::Result<()> {
545 let baseline_path = cli.baseline.as_ref().ok_or_else(|| {
547 anyhow::anyhow!(
548 "--baseline required for comparison, or use 'compare' command with a git ref"
549 )
550 })?;
551
552 if !baseline_path.exists() {
553 return Err(anyhow::anyhow!(
554 "Baseline file not found: {}",
555 baseline_path.display()
556 ));
557 }
558
559 let baseline_json = std::fs::read_to_string(baseline_path)?;
560 let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
561 let resolved_git_ref = resolve_git_ref(git_ref)?;
562
563 if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
564 let matches_ref = baseline_commit == resolved_git_ref
565 || baseline_commit.starts_with(&resolved_git_ref)
566 || resolved_git_ref.starts_with(baseline_commit);
567 if !matches_ref {
568 return Err(anyhow::anyhow!(
569 "Baseline commit {} does not match git ref {} ({})",
570 baseline_commit,
571 git_ref,
572 resolved_git_ref
573 ));
574 }
575 } else {
576 eprintln!(
577 "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
578 );
579 }
580
581 println!("Comparing against baseline: {}", baseline_path.display());
582 println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
583
584 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
586 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
587
588 if benchmarks.is_empty() {
589 println!("No benchmarks found.");
590 return Ok(());
591 }
592
593 let start_time = Instant::now();
594
595 let exec_config = build_execution_config(cli, config);
596
597 let mut executor = Executor::new(exec_config.clone());
598 let results = executor.execute(&benchmarks);
599 let stats = compute_statistics(&results, &exec_config);
600
601 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
602 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
603
604 let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
606 let baseline_map: std::collections::HashMap<_, _> = baseline
607 .results
608 .iter()
609 .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
610 .collect();
611
612 for result in &mut report.results {
613 if let (Some(metrics), Some(baseline_metrics)) =
614 (&result.metrics, baseline_map.get(&result.id))
615 {
616 let baseline_mean = baseline_metrics.mean_ns;
617 let absolute_change = metrics.mean_ns - baseline_mean;
618 let relative_change = if baseline_mean > 0.0 {
619 (absolute_change / baseline_mean) * 100.0
620 } else {
621 0.0
622 };
623
624 let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
626 || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
627 let is_significant = relative_change.abs() > regression_threshold && ci_non_overlap;
628
629 if relative_change > regression_threshold {
631 report.summary.regressions += 1;
632 } else if relative_change < -regression_threshold {
633 report.summary.improvements += 1;
634 }
635
636 let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
637 absolute_change / metrics.std_dev_ns
638 } else {
639 0.0
640 };
641 if !effect_size.is_finite() {
642 effect_size = 0.0;
643 }
644
645 let probability_regression = if ci_non_overlap {
646 if relative_change > 0.0 { 0.99 } else { 0.01 }
647 } else if relative_change > 0.0 {
648 0.60
649 } else {
650 0.40
651 };
652
653 result.comparison = Some(fluxbench_report::Comparison {
654 baseline_mean_ns: baseline_mean,
655 absolute_change_ns: absolute_change,
656 relative_change,
657 probability_regression,
658 is_significant,
659 effect_size,
660 });
661 }
662 }
663
664 let (comparison_results, comparison_series, synthetic_results, verification_results) =
666 execute_verifications(&results, &stats);
667 let verification_summary = aggregate_verifications(&verification_results);
668 report.comparisons = comparison_results;
669 report.comparison_series = comparison_series;
670 report.synthetics = synthetic_results;
671 report.verifications = verification_results;
672 report.summary.critical_failures = verification_summary.critical_failures;
673 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
674
675 let output = match format {
677 OutputFormat::Json => generate_json_report(&report)?,
678 OutputFormat::GithubSummary => generate_github_summary(&report),
679 OutputFormat::Html => generate_html_report(&report),
680 OutputFormat::Csv => generate_csv_report(&report),
681 OutputFormat::Human => format_comparison_output(&report, &baseline),
682 };
683
684 if let Some(ref path) = cli.output {
685 let mut file = std::fs::File::create(path)?;
686 file.write_all(output.as_bytes())?;
687 println!("Report written to: {}", path.display());
688 } else {
689 print!("{}", output);
690 }
691
692 save_baseline_if_needed(cli, config, &report)?;
694
695 let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
697 if should_fail {
698 if report.summary.regressions > 0 {
699 eprintln!(
700 "\n{} regression(s) detected above {}% threshold",
701 report.summary.regressions, regression_threshold
702 );
703 }
704 if verification_summary.should_fail_ci() {
705 eprintln!(
706 "\n{} critical verification failure(s)",
707 verification_summary.critical_failures + verification_summary.critical_errors
708 );
709 }
710 std::process::exit(1);
711 }
712
713 Ok(())
714}
715
716fn save_baseline_if_needed(
718 cli: &Cli,
719 config: &FluxConfig,
720 report: &fluxbench_report::Report,
721) -> anyhow::Result<()> {
722 let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
724 if !should_save {
725 return Ok(());
726 }
727
728 let path = cli
730 .save_baseline
731 .as_ref()
732 .and_then(|opt| opt.clone())
733 .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
734 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
735
736 if let Some(parent) = path.parent() {
737 std::fs::create_dir_all(parent)?;
738 }
739
740 let json = generate_json_report(report)?;
741 std::fs::write(&path, json)?;
742 eprintln!("Baseline saved to: {}", path.display());
743
744 Ok(())
745}
746
747fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
748 let output = std::process::Command::new("git")
749 .args(["rev-parse", "--verify", git_ref])
750 .output()
751 .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
752
753 if !output.status.success() {
754 let stderr = String::from_utf8_lossy(&output.stderr);
755 return Err(anyhow::anyhow!(
756 "Invalid git ref '{}': {}",
757 git_ref,
758 stderr.trim()
759 ));
760 }
761
762 let resolved = String::from_utf8(output.stdout)?.trim().to_string();
763 if resolved.is_empty() {
764 return Err(anyhow::anyhow!(
765 "Git ref '{}' resolved to an empty commit hash",
766 git_ref
767 ));
768 }
769
770 Ok(resolved)
771}
772
773fn format_comparison_output(
775 report: &fluxbench_report::Report,
776 baseline: &fluxbench_report::Report,
777) -> String {
778 let mut output = String::new();
779
780 output.push('\n');
781 output.push_str("FluxBench Comparison Results\n");
782 output.push_str(&"=".repeat(60));
783 output.push_str("\n\n");
784
785 output.push_str(&format!(
786 "Baseline: {} ({})\n",
787 baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
788 baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
789 ));
790 output.push_str(&format!(
791 "Current: {} ({})\n\n",
792 report.meta.git_commit.as_deref().unwrap_or("unknown"),
793 report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
794 ));
795
796 for result in &report.results {
797 let status_icon = match result.status {
798 fluxbench_report::BenchmarkStatus::Passed => "✓",
799 fluxbench_report::BenchmarkStatus::Failed => "✗",
800 fluxbench_report::BenchmarkStatus::Crashed => "💥",
801 fluxbench_report::BenchmarkStatus::Skipped => "⊘",
802 };
803
804 output.push_str(&format!("{} {}\n", status_icon, result.id));
805
806 if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
807 let change_icon = if comparison.relative_change > 5.0 {
808 "📈 REGRESSION"
809 } else if comparison.relative_change < -5.0 {
810 "📉 improvement"
811 } else {
812 "≈ no change"
813 };
814
815 output.push_str(&format!(
816 " baseline: {:.2} ns → current: {:.2} ns\n",
817 comparison.baseline_mean_ns, metrics.mean_ns
818 ));
819 output.push_str(&format!(
820 " change: {:+.2}% ({:+.2} ns) {}\n",
821 comparison.relative_change, comparison.absolute_change_ns, change_icon
822 ));
823 }
824
825 output.push('\n');
826 }
827
828 output.push_str("Summary\n");
830 output.push_str(&"-".repeat(60));
831 output.push('\n');
832 output.push_str(&format!(
833 " Regressions: {} Improvements: {} No Change: {}\n",
834 report.summary.regressions,
835 report.summary.improvements,
836 report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
837 ));
838
839 output
840}