1mod config;
23mod executor;
24mod planner;
25mod supervisor;
26
27pub use config::*;
28pub use executor::{
29 ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
30 execute_verifications, format_human_output,
31};
32pub use supervisor::*;
33
34use clap::{Parser, Subcommand};
35use fluxbench_core::{BenchmarkDef, WorkerMain};
36use fluxbench_logic::aggregate_verifications;
37use fluxbench_report::{
38 OutputFormat, generate_csv_report, generate_github_summary, generate_html_report,
39 generate_json_report,
40};
41use rayon::ThreadPoolBuilder;
42use regex::Regex;
43use std::io::Write;
44use std::path::PathBuf;
45use std::time::Instant;
46
47#[derive(Parser, Debug)]
49#[command(name = "fluxbench")]
50#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
51pub struct Cli {
52 #[command(subcommand)]
53 pub command: Option<Commands>,
54
55 #[arg(default_value = ".*")]
57 pub filter: String,
58
59 #[arg(long, default_value = "human")]
61 pub format: String,
62
63 #[arg(short, long)]
65 pub output: Option<PathBuf>,
66
67 #[arg(long)]
69 pub baseline: Option<PathBuf>,
70
71 #[arg(long)]
73 pub dry_run: bool,
74
75 #[arg(long)]
77 pub threshold: Option<f64>,
78
79 #[arg(long)]
81 pub group: Option<String>,
82
83 #[arg(long)]
85 pub tag: Option<String>,
86
87 #[arg(long)]
89 pub skip_tag: Option<String>,
90
91 #[arg(long, default_value = "3")]
93 pub warmup: u64,
94
95 #[arg(long, default_value = "5")]
97 pub measurement: u64,
98
99 #[arg(long, short = 'n')]
102 pub samples: Option<u64>,
103
104 #[arg(long)]
106 pub min_iterations: Option<u64>,
107
108 #[arg(long)]
110 pub max_iterations: Option<u64>,
111
112 #[arg(short, long)]
114 pub verbose: bool,
115
116 #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
119 pub isolated: bool,
120
121 #[arg(long)]
124 pub one_shot: bool,
125
126 #[arg(long, default_value = "60")]
128 pub worker_timeout: u64,
129
130 #[arg(long, default_value = "1")]
132 pub jobs: usize,
133
134 #[arg(long, short = 'j', default_value = "0")]
137 pub threads: usize,
138
139 #[arg(long, hide = true)]
141 pub flux_worker: bool,
142
143 #[arg(long)]
146 pub save_baseline: Option<Option<PathBuf>>,
147
148 #[arg(long, hide = true)]
150 pub bench: bool,
151}
152
153#[derive(Subcommand, Debug)]
155pub enum Commands {
156 List,
158 Run,
160 Compare {
162 #[arg(name = "REF")]
164 git_ref: String,
165 },
166}
167
168pub fn run() -> anyhow::Result<()> {
174 let cli = Cli::parse();
175 run_with_cli(cli)
176}
177
178pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
180 if cli.flux_worker {
182 return run_worker_mode();
183 }
184
185 if cli.verbose {
187 tracing_subscriber::fmt()
188 .with_env_filter("fluxbench=debug")
189 .init();
190 } else {
191 tracing_subscriber::fmt()
192 .with_env_filter("fluxbench=info")
193 .init();
194 }
195
196 let config = FluxConfig::discover().unwrap_or_default();
198
199 let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
201
202 let jobs = if cli.jobs != 1 {
204 cli.jobs
205 } else {
206 config.runner.jobs.unwrap_or(1)
207 };
208
209 match cli.command {
210 Some(Commands::List) => {
211 list_benchmarks(&cli)?;
212 }
213 Some(Commands::Run) => {
214 run_benchmarks(&cli, &config, format, jobs)?;
215 }
216 Some(Commands::Compare { ref git_ref }) => {
217 compare_benchmarks(&cli, &config, git_ref, format)?;
218 }
219 None => {
220 if cli.dry_run {
222 list_benchmarks(&cli)?;
223 } else {
224 run_benchmarks(&cli, &config, format, jobs)?;
225 }
226 }
227 }
228
229 Ok(())
230}
231
232fn run_worker_mode() -> anyhow::Result<()> {
234 let mut worker = WorkerMain::new();
235 worker
236 .run()
237 .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
238}
239
240fn filter_benchmarks(
244 cli: &Cli,
245 benchmarks: &[&'static BenchmarkDef],
246) -> Vec<&'static BenchmarkDef> {
247 let filter_re = Regex::new(&cli.filter).ok();
248
249 let plan = planner::build_plan(
250 benchmarks.iter().copied(),
251 filter_re.as_ref(),
252 cli.group.as_deref(),
253 cli.tag.as_deref(),
254 cli.skip_tag.as_deref(),
255 );
256
257 plan.benchmarks
258}
259
260fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
261 println!("FluxBench Plan:");
262
263 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
264 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
265
266 let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
267 std::collections::BTreeMap::new();
268
269 for bench in &benchmarks {
270 groups.entry(bench.group).or_default().push(bench);
271 }
272
273 let mut total = 0;
274 for (group, benches) in &groups {
275 println!("├── group: {}", group);
276 for bench in benches {
277 let tags = if bench.tags.is_empty() {
278 String::new()
279 } else {
280 format!(" [{}]", bench.tags.join(", "))
281 };
282 println!(
283 "│ ├── {}{} ({}:{})",
284 bench.id, tags, bench.file, bench.line
285 );
286 total += 1;
287 }
288 }
289
290 println!("{} benchmarks found.", total);
291
292 let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
295 for bench in &all_benchmarks {
296 for tag in bench.tags {
297 *tag_counts.entry(tag).or_default() += 1;
298 }
299 }
300 if !tag_counts.is_empty() {
301 let tags_display: Vec<String> = tag_counts
302 .iter()
303 .map(|(tag, count)| format!("{} ({})", tag, count))
304 .collect();
305 println!("Tags: {}", tags_display.join(", "));
306 }
307
308 Ok(())
309}
310
311fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
313 let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
315 let measurement_ns =
316 FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
317
318 let warmup_time_ns = if cli.warmup != 3 {
323 cli.warmup * 1_000_000_000
324 } else {
325 warmup_ns
326 };
327 let measurement_time_ns = if cli.measurement != 5 {
328 cli.measurement * 1_000_000_000
329 } else {
330 measurement_ns
331 };
332
333 if let Some(n) = cli.samples.or(config.runner.samples) {
336 return ExecutionConfig {
337 warmup_time_ns: 0,
338 measurement_time_ns: 0,
339 min_iterations: Some(n),
340 max_iterations: Some(n),
341 track_allocations: config.allocator.track,
342 bootstrap_iterations: config.runner.bootstrap_iterations,
343 confidence_level: config.runner.confidence_level,
344 };
345 }
346
347 let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
349 let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
350
351 ExecutionConfig {
352 warmup_time_ns,
353 measurement_time_ns,
354 min_iterations,
355 max_iterations,
356 track_allocations: config.allocator.track,
357 bootstrap_iterations: config.runner.bootstrap_iterations,
358 confidence_level: config.runner.confidence_level,
359 }
360}
361
362fn run_benchmarks(
363 cli: &Cli,
364 config: &FluxConfig,
365 format: OutputFormat,
366 jobs: usize,
367) -> anyhow::Result<()> {
368 let jobs = jobs.max(1);
369
370 if cli.threads > 0 {
372 ThreadPoolBuilder::new()
373 .num_threads(cli.threads)
374 .build_global()
375 .ok();
376 }
377
378 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
380 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
381
382 if benchmarks.is_empty() {
383 if let Some(ref tag) = cli.tag {
385 let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
386 .iter()
387 .flat_map(|b| b.tags.iter().copied())
388 .collect();
389 if !all_tags.contains(tag.as_str()) {
390 let available: Vec<&str> = all_tags.into_iter().collect();
391 eprintln!(
392 "Warning: tag '{}' not found. Available tags: {}",
393 tag,
394 available.join(", ")
395 );
396 }
397 }
398 println!("No benchmarks found.");
399 return Ok(());
400 }
401
402 let isolated = if config.runner.isolation.is_isolated() {
404 cli.isolated
405 } else {
406 false
407 };
408
409 let threads_str = if cli.threads == 0 {
410 "all".to_string()
411 } else {
412 cli.threads.to_string()
413 };
414 let mode_str = if isolated {
415 if cli.one_shot {
416 " (isolated, one-shot)"
417 } else {
418 " (isolated, persistent)"
419 }
420 } else {
421 " (in-process)"
422 };
423 println!(
424 "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
425 benchmarks.len(),
426 mode_str,
427 threads_str,
428 jobs
429 );
430
431 let start_time = Instant::now();
432
433 let exec_config = build_execution_config(cli, config);
435
436 if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
437 eprintln!(
438 "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
439 Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
440 exec_config.bootstrap_iterations
441 );
442 }
443
444 let results = if isolated {
446 let timeout = std::time::Duration::from_secs(cli.worker_timeout);
447 let reuse_workers = !cli.one_shot;
448 let isolated_executor =
449 IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
450 isolated_executor.execute(&benchmarks)
451 } else {
452 if jobs > 1 {
453 eprintln!(
454 "Warning: --jobs currently applies only to isolated mode; running in-process serially."
455 );
456 }
457 let mut executor = Executor::new(exec_config.clone());
458 executor.execute(&benchmarks)
459 };
460
461 let stats = compute_statistics(&results, &exec_config);
463
464 if exec_config.track_allocations
466 && !results.is_empty()
467 && results
468 .iter()
469 .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
470 {
471 eprintln!(
472 "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
473 Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
474 );
475 }
476
477 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
479 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
480
481 let (comparison_results, comparison_series, synthetic_results, verification_results) =
483 execute_verifications(&results, &stats);
484 let verification_summary = aggregate_verifications(&verification_results);
485 report.comparisons = comparison_results;
486 report.comparison_series = comparison_series;
487 report.synthetics = synthetic_results;
488 report.verifications = verification_results;
489
490 report.summary.critical_failures = verification_summary.critical_failures;
492 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
493
494 let output = match format {
496 OutputFormat::Json => generate_json_report(&report)?,
497 OutputFormat::GithubSummary => generate_github_summary(&report),
498 OutputFormat::Html => generate_html_report(&report),
499 OutputFormat::Csv => generate_csv_report(&report),
500 OutputFormat::Human => format_human_output(&report),
501 };
502
503 if let Some(ref path) = cli.output {
505 let mut file = std::fs::File::create(path)?;
506 file.write_all(output.as_bytes())?;
507 println!("Report written to: {}", path.display());
508 } else {
509 print!("{}", output);
510 }
511
512 save_baseline_if_needed(cli, config, &report)?;
514
515 let has_crashes = report
517 .results
518 .iter()
519 .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
520
521 if verification_summary.should_fail_ci() || has_crashes {
522 if has_crashes {
523 eprintln!("\nBenchmark(s) crashed during execution");
524 }
525 if verification_summary.should_fail_ci() {
526 eprintln!(
527 "\n{} critical verification failure(s)",
528 verification_summary.critical_failures + verification_summary.critical_errors
529 );
530 }
531 std::process::exit(1);
532 }
533
534 Ok(())
535}
536
537fn compare_benchmarks(
538 cli: &Cli,
539 config: &FluxConfig,
540 git_ref: &str,
541 format: OutputFormat,
542) -> anyhow::Result<()> {
543 let baseline_path = cli.baseline.as_ref().ok_or_else(|| {
545 anyhow::anyhow!(
546 "--baseline required for comparison, or use 'compare' command with a git ref"
547 )
548 })?;
549
550 if !baseline_path.exists() {
551 return Err(anyhow::anyhow!(
552 "Baseline file not found: {}",
553 baseline_path.display()
554 ));
555 }
556
557 let baseline_json = std::fs::read_to_string(baseline_path)?;
558 let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
559 let resolved_git_ref = resolve_git_ref(git_ref)?;
560
561 if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
562 let matches_ref = baseline_commit == resolved_git_ref
563 || baseline_commit.starts_with(&resolved_git_ref)
564 || resolved_git_ref.starts_with(baseline_commit);
565 if !matches_ref {
566 return Err(anyhow::anyhow!(
567 "Baseline commit {} does not match git ref {} ({})",
568 baseline_commit,
569 git_ref,
570 resolved_git_ref
571 ));
572 }
573 } else {
574 eprintln!(
575 "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
576 );
577 }
578
579 println!("Comparing against baseline: {}", baseline_path.display());
580 println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
581
582 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
584 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
585
586 if benchmarks.is_empty() {
587 println!("No benchmarks found.");
588 return Ok(());
589 }
590
591 let start_time = Instant::now();
592
593 let exec_config = build_execution_config(cli, config);
594
595 let mut executor = Executor::new(exec_config.clone());
596 let results = executor.execute(&benchmarks);
597 let stats = compute_statistics(&results, &exec_config);
598
599 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
600 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
601
602 let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
604 let baseline_map: std::collections::HashMap<_, _> = baseline
605 .results
606 .iter()
607 .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
608 .collect();
609
610 for result in &mut report.results {
611 if let (Some(metrics), Some(baseline_metrics)) =
612 (&result.metrics, baseline_map.get(&result.id))
613 {
614 let baseline_mean = baseline_metrics.mean_ns;
615 let absolute_change = metrics.mean_ns - baseline_mean;
616 let relative_change = if baseline_mean > 0.0 {
617 (absolute_change / baseline_mean) * 100.0
618 } else {
619 0.0
620 };
621
622 let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
624 || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
625 let is_significant = relative_change.abs() > regression_threshold && ci_non_overlap;
626
627 if relative_change > regression_threshold {
629 report.summary.regressions += 1;
630 } else if relative_change < -regression_threshold {
631 report.summary.improvements += 1;
632 }
633
634 let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
635 absolute_change / metrics.std_dev_ns
636 } else {
637 0.0
638 };
639 if !effect_size.is_finite() {
640 effect_size = 0.0;
641 }
642
643 let probability_regression = if ci_non_overlap {
644 if relative_change > 0.0 { 0.99 } else { 0.01 }
645 } else if relative_change > 0.0 {
646 0.60
647 } else {
648 0.40
649 };
650
651 result.comparison = Some(fluxbench_report::Comparison {
652 baseline_mean_ns: baseline_mean,
653 absolute_change_ns: absolute_change,
654 relative_change,
655 probability_regression,
656 is_significant,
657 effect_size,
658 });
659 }
660 }
661
662 let (comparison_results, comparison_series, synthetic_results, verification_results) =
664 execute_verifications(&results, &stats);
665 let verification_summary = aggregate_verifications(&verification_results);
666 report.comparisons = comparison_results;
667 report.comparison_series = comparison_series;
668 report.synthetics = synthetic_results;
669 report.verifications = verification_results;
670 report.summary.critical_failures = verification_summary.critical_failures;
671 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
672
673 let output = match format {
675 OutputFormat::Json => generate_json_report(&report)?,
676 OutputFormat::GithubSummary => generate_github_summary(&report),
677 OutputFormat::Html => generate_html_report(&report),
678 OutputFormat::Csv => generate_csv_report(&report),
679 OutputFormat::Human => format_comparison_output(&report, &baseline),
680 };
681
682 if let Some(ref path) = cli.output {
683 let mut file = std::fs::File::create(path)?;
684 file.write_all(output.as_bytes())?;
685 println!("Report written to: {}", path.display());
686 } else {
687 print!("{}", output);
688 }
689
690 save_baseline_if_needed(cli, config, &report)?;
692
693 let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
695 if should_fail {
696 if report.summary.regressions > 0 {
697 eprintln!(
698 "\n{} regression(s) detected above {}% threshold",
699 report.summary.regressions, regression_threshold
700 );
701 }
702 if verification_summary.should_fail_ci() {
703 eprintln!(
704 "\n{} critical verification failure(s)",
705 verification_summary.critical_failures + verification_summary.critical_errors
706 );
707 }
708 std::process::exit(1);
709 }
710
711 Ok(())
712}
713
714fn save_baseline_if_needed(
716 cli: &Cli,
717 config: &FluxConfig,
718 report: &fluxbench_report::Report,
719) -> anyhow::Result<()> {
720 let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
722 if !should_save {
723 return Ok(());
724 }
725
726 let path = cli
728 .save_baseline
729 .as_ref()
730 .and_then(|opt| opt.clone())
731 .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
732 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
733
734 if let Some(parent) = path.parent() {
735 std::fs::create_dir_all(parent)?;
736 }
737
738 let json = generate_json_report(report)?;
739 std::fs::write(&path, json)?;
740 eprintln!("Baseline saved to: {}", path.display());
741
742 Ok(())
743}
744
745fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
746 let output = std::process::Command::new("git")
747 .args(["rev-parse", "--verify", git_ref])
748 .output()
749 .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
750
751 if !output.status.success() {
752 let stderr = String::from_utf8_lossy(&output.stderr);
753 return Err(anyhow::anyhow!(
754 "Invalid git ref '{}': {}",
755 git_ref,
756 stderr.trim()
757 ));
758 }
759
760 let resolved = String::from_utf8(output.stdout)?.trim().to_string();
761 if resolved.is_empty() {
762 return Err(anyhow::anyhow!(
763 "Git ref '{}' resolved to an empty commit hash",
764 git_ref
765 ));
766 }
767
768 Ok(resolved)
769}
770
771fn format_comparison_output(
773 report: &fluxbench_report::Report,
774 baseline: &fluxbench_report::Report,
775) -> String {
776 let mut output = String::new();
777
778 output.push('\n');
779 output.push_str("FluxBench Comparison Results\n");
780 output.push_str(&"=".repeat(60));
781 output.push_str("\n\n");
782
783 output.push_str(&format!(
784 "Baseline: {} ({})\n",
785 baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
786 baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
787 ));
788 output.push_str(&format!(
789 "Current: {} ({})\n\n",
790 report.meta.git_commit.as_deref().unwrap_or("unknown"),
791 report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
792 ));
793
794 for result in &report.results {
795 let status_icon = match result.status {
796 fluxbench_report::BenchmarkStatus::Passed => "✓",
797 fluxbench_report::BenchmarkStatus::Failed => "✗",
798 fluxbench_report::BenchmarkStatus::Crashed => "💥",
799 fluxbench_report::BenchmarkStatus::Skipped => "⊘",
800 };
801
802 output.push_str(&format!("{} {}\n", status_icon, result.id));
803
804 if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
805 let change_icon = if comparison.relative_change > 5.0 {
806 "📈 REGRESSION"
807 } else if comparison.relative_change < -5.0 {
808 "📉 improvement"
809 } else {
810 "≈ no change"
811 };
812
813 output.push_str(&format!(
814 " baseline: {:.2} ns → current: {:.2} ns\n",
815 comparison.baseline_mean_ns, metrics.mean_ns
816 ));
817 output.push_str(&format!(
818 " change: {:+.2}% ({:+.2} ns) {}\n",
819 comparison.relative_change, comparison.absolute_change_ns, change_icon
820 ));
821 }
822
823 output.push('\n');
824 }
825
826 output.push_str("Summary\n");
828 output.push_str(&"-".repeat(60));
829 output.push('\n');
830 output.push_str(&format!(
831 " Regressions: {} Improvements: {} No Change: {}\n",
832 report.summary.regressions,
833 report.summary.improvements,
834 report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
835 ));
836
837 output
838}