1#![warn(missing_docs)]
2mod config;
24mod executor;
25mod planner;
26mod supervisor;
27
28pub use config::*;
29pub use executor::{
30 ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
31 execute_verifications, format_human_output,
32};
33pub use supervisor::*;
34
35use clap::{Parser, Subcommand};
36use fluxbench_core::{BenchmarkDef, WorkerMain};
37use fluxbench_logic::aggregate_verifications;
38use fluxbench_report::{
39 OutputFormat, generate_csv_report, generate_github_summary, generate_html_report,
40 generate_json_report,
41};
42use rayon::ThreadPoolBuilder;
43use regex::Regex;
44use std::io::Write;
45use std::path::PathBuf;
46use std::time::Instant;
47
48#[derive(Parser, Debug)]
50#[command(name = "fluxbench")]
51#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
52pub struct Cli {
53 #[command(subcommand)]
55 pub command: Option<Commands>,
56
57 #[arg(default_value = ".*")]
59 pub filter: String,
60
61 #[arg(long, default_value = "human")]
63 pub format: String,
64
65 #[arg(short, long)]
67 pub output: Option<PathBuf>,
68
69 #[arg(long)]
71 pub baseline: Option<PathBuf>,
72
73 #[arg(long)]
75 pub dry_run: bool,
76
77 #[arg(long)]
79 pub threshold: Option<f64>,
80
81 #[arg(long)]
83 pub group: Option<String>,
84
85 #[arg(long)]
87 pub tag: Option<String>,
88
89 #[arg(long)]
91 pub skip_tag: Option<String>,
92
93 #[arg(long, default_value = "3")]
95 pub warmup: u64,
96
97 #[arg(long, default_value = "5")]
99 pub measurement: u64,
100
101 #[arg(long, short = 'n')]
104 pub samples: Option<u64>,
105
106 #[arg(long)]
108 pub min_iterations: Option<u64>,
109
110 #[arg(long)]
112 pub max_iterations: Option<u64>,
113
114 #[arg(short, long)]
116 pub verbose: bool,
117
118 #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
121 pub isolated: bool,
122
123 #[arg(long)]
126 pub one_shot: bool,
127
128 #[arg(long, default_value = "60")]
130 pub worker_timeout: u64,
131
132 #[arg(long, default_value = "1")]
134 pub jobs: usize,
135
136 #[arg(long, short = 'j', default_value = "0")]
139 pub threads: usize,
140
141 #[arg(long, hide = true)]
143 pub flux_worker: bool,
144
145 #[arg(long)]
148 pub save_baseline: Option<Option<PathBuf>>,
149
150 #[arg(long, hide = true)]
152 pub bench: bool,
153}
154
155#[derive(Subcommand, Debug)]
157pub enum Commands {
158 List,
160 Run,
162 Compare {
164 #[arg(name = "REF")]
166 git_ref: String,
167 },
168}
169
170pub fn run() -> anyhow::Result<()> {
176 let cli = Cli::parse();
177 run_with_cli(cli)
178}
179
180pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
182 if cli.flux_worker {
184 return run_worker_mode();
185 }
186
187 if cli.verbose {
189 tracing_subscriber::fmt()
190 .with_env_filter("fluxbench=debug")
191 .init();
192 } else {
193 tracing_subscriber::fmt()
194 .with_env_filter("fluxbench=info")
195 .init();
196 }
197
198 let config = FluxConfig::discover().unwrap_or_default();
200
201 let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
203
204 let jobs = if cli.jobs != 1 {
206 cli.jobs
207 } else {
208 config.runner.jobs.unwrap_or(1)
209 };
210
211 match cli.command {
212 Some(Commands::List) => {
213 list_benchmarks(&cli)?;
214 }
215 Some(Commands::Run) => {
216 run_benchmarks(&cli, &config, format, jobs)?;
217 }
218 Some(Commands::Compare { ref git_ref }) => {
219 compare_benchmarks(&cli, &config, git_ref, format)?;
220 }
221 None => {
222 if cli.dry_run {
224 list_benchmarks(&cli)?;
225 } else {
226 run_benchmarks(&cli, &config, format, jobs)?;
227 }
228 }
229 }
230
231 Ok(())
232}
233
234fn run_worker_mode() -> anyhow::Result<()> {
236 let mut worker = WorkerMain::new();
237 worker
238 .run()
239 .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
240}
241
242fn filter_benchmarks(
246 cli: &Cli,
247 benchmarks: &[&'static BenchmarkDef],
248) -> Vec<&'static BenchmarkDef> {
249 let filter_re = Regex::new(&cli.filter).ok();
250
251 let plan = planner::build_plan(
252 benchmarks.iter().copied(),
253 filter_re.as_ref(),
254 cli.group.as_deref(),
255 cli.tag.as_deref(),
256 cli.skip_tag.as_deref(),
257 );
258
259 plan.benchmarks
260}
261
262fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
263 println!("FluxBench Plan:");
264
265 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
266 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
267
268 let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
269 std::collections::BTreeMap::new();
270
271 for bench in &benchmarks {
272 groups.entry(bench.group).or_default().push(bench);
273 }
274
275 let mut total = 0;
276 for (group, benches) in &groups {
277 println!("├── group: {}", group);
278 for bench in benches {
279 let tags = if bench.tags.is_empty() {
280 String::new()
281 } else {
282 format!(" [{}]", bench.tags.join(", "))
283 };
284 println!(
285 "│ ├── {}{} ({}:{})",
286 bench.id, tags, bench.file, bench.line
287 );
288 total += 1;
289 }
290 }
291
292 println!("{} benchmarks found.", total);
293
294 let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
297 for bench in &all_benchmarks {
298 for tag in bench.tags {
299 *tag_counts.entry(tag).or_default() += 1;
300 }
301 }
302 if !tag_counts.is_empty() {
303 let tags_display: Vec<String> = tag_counts
304 .iter()
305 .map(|(tag, count)| format!("{} ({})", tag, count))
306 .collect();
307 println!("Tags: {}", tags_display.join(", "));
308 }
309
310 Ok(())
311}
312
313fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
315 let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
317 let measurement_ns =
318 FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
319
320 let warmup_time_ns = if cli.warmup != 3 {
325 cli.warmup * 1_000_000_000
326 } else {
327 warmup_ns
328 };
329 let measurement_time_ns = if cli.measurement != 5 {
330 cli.measurement * 1_000_000_000
331 } else {
332 measurement_ns
333 };
334
335 if let Some(n) = cli.samples.or(config.runner.samples) {
338 return ExecutionConfig {
339 warmup_time_ns: 0,
340 measurement_time_ns: 0,
341 min_iterations: Some(n),
342 max_iterations: Some(n),
343 track_allocations: config.allocator.track,
344 bootstrap_iterations: config.runner.bootstrap_iterations,
345 confidence_level: config.runner.confidence_level,
346 };
347 }
348
349 let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
351 let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
352
353 ExecutionConfig {
354 warmup_time_ns,
355 measurement_time_ns,
356 min_iterations,
357 max_iterations,
358 track_allocations: config.allocator.track,
359 bootstrap_iterations: config.runner.bootstrap_iterations,
360 confidence_level: config.runner.confidence_level,
361 }
362}
363
364fn run_benchmarks(
365 cli: &Cli,
366 config: &FluxConfig,
367 format: OutputFormat,
368 jobs: usize,
369) -> anyhow::Result<()> {
370 let jobs = jobs.max(1);
371
372 if cli.threads > 0 {
374 ThreadPoolBuilder::new()
375 .num_threads(cli.threads)
376 .build_global()
377 .ok();
378 }
379
380 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
382 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
383
384 if benchmarks.is_empty() {
385 if let Some(ref tag) = cli.tag {
387 let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
388 .iter()
389 .flat_map(|b| b.tags.iter().copied())
390 .collect();
391 if !all_tags.contains(tag.as_str()) {
392 let available: Vec<&str> = all_tags.into_iter().collect();
393 eprintln!(
394 "Warning: tag '{}' not found. Available tags: {}",
395 tag,
396 available.join(", ")
397 );
398 }
399 }
400 println!("No benchmarks found.");
401 return Ok(());
402 }
403
404 let isolated = if config.runner.isolation.is_isolated() {
406 cli.isolated
407 } else {
408 false
409 };
410
411 let threads_str = if cli.threads == 0 {
412 "all".to_string()
413 } else {
414 cli.threads.to_string()
415 };
416 let mode_str = if isolated {
417 if cli.one_shot {
418 " (isolated, one-shot)"
419 } else {
420 " (isolated, persistent)"
421 }
422 } else {
423 " (in-process)"
424 };
425 println!(
426 "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
427 benchmarks.len(),
428 mode_str,
429 threads_str,
430 jobs
431 );
432
433 let start_time = Instant::now();
434
435 let exec_config = build_execution_config(cli, config);
437
438 if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
439 eprintln!(
440 "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
441 Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
442 exec_config.bootstrap_iterations
443 );
444 }
445
446 let results = if isolated {
448 let timeout = std::time::Duration::from_secs(cli.worker_timeout);
449 let reuse_workers = !cli.one_shot;
450 let isolated_executor =
451 IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
452 isolated_executor.execute(&benchmarks)
453 } else {
454 if jobs > 1 {
455 eprintln!(
456 "Warning: --jobs currently applies only to isolated mode; running in-process serially."
457 );
458 }
459 let mut executor = Executor::new(exec_config.clone());
460 executor.execute(&benchmarks)
461 };
462
463 let stats = compute_statistics(&results, &exec_config);
465
466 if exec_config.track_allocations
468 && !results.is_empty()
469 && results
470 .iter()
471 .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
472 {
473 eprintln!(
474 "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
475 Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
476 );
477 }
478
479 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
481 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
482
483 let (comparison_results, comparison_series, synthetic_results, verification_results) =
485 execute_verifications(&results, &stats);
486 let verification_summary = aggregate_verifications(&verification_results);
487 report.comparisons = comparison_results;
488 report.comparison_series = comparison_series;
489 report.synthetics = synthetic_results;
490 report.verifications = verification_results;
491
492 report.summary.critical_failures = verification_summary.critical_failures;
494 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
495
496 let output = match format {
498 OutputFormat::Json => generate_json_report(&report)?,
499 OutputFormat::GithubSummary => generate_github_summary(&report),
500 OutputFormat::Html => generate_html_report(&report),
501 OutputFormat::Csv => generate_csv_report(&report),
502 OutputFormat::Human => format_human_output(&report),
503 };
504
505 if let Some(ref path) = cli.output {
507 let mut file = std::fs::File::create(path)?;
508 file.write_all(output.as_bytes())?;
509 println!("Report written to: {}", path.display());
510 } else {
511 print!("{}", output);
512 }
513
514 save_baseline_if_needed(cli, config, &report)?;
516
517 let has_crashes = report
519 .results
520 .iter()
521 .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
522
523 if verification_summary.should_fail_ci() || has_crashes {
524 if has_crashes {
525 eprintln!("\nBenchmark(s) crashed during execution");
526 }
527 if verification_summary.should_fail_ci() {
528 eprintln!(
529 "\n{} critical verification failure(s)",
530 verification_summary.critical_failures + verification_summary.critical_errors
531 );
532 }
533 std::process::exit(1);
534 }
535
536 Ok(())
537}
538
539fn compare_benchmarks(
540 cli: &Cli,
541 config: &FluxConfig,
542 git_ref: &str,
543 format: OutputFormat,
544) -> anyhow::Result<()> {
545 let baseline_path = cli.baseline.as_ref().ok_or_else(|| {
547 anyhow::anyhow!(
548 "--baseline required for comparison, or use 'compare' command with a git ref"
549 )
550 })?;
551
552 if !baseline_path.exists() {
553 return Err(anyhow::anyhow!(
554 "Baseline file not found: {}",
555 baseline_path.display()
556 ));
557 }
558
559 let baseline_json = std::fs::read_to_string(baseline_path)?;
560 let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
561 let resolved_git_ref = resolve_git_ref(git_ref)?;
562
563 if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
564 let matches_ref = baseline_commit == resolved_git_ref
565 || baseline_commit.starts_with(&resolved_git_ref)
566 || resolved_git_ref.starts_with(baseline_commit);
567 if !matches_ref {
568 return Err(anyhow::anyhow!(
569 "Baseline commit {} does not match git ref {} ({})",
570 baseline_commit,
571 git_ref,
572 resolved_git_ref
573 ));
574 }
575 } else {
576 eprintln!(
577 "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
578 );
579 }
580
581 println!("Comparing against baseline: {}", baseline_path.display());
582 println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
583
584 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
586 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
587
588 if benchmarks.is_empty() {
589 println!("No benchmarks found.");
590 return Ok(());
591 }
592
593 let start_time = Instant::now();
594
595 let exec_config = build_execution_config(cli, config);
596
597 let mut executor = Executor::new(exec_config.clone());
598 let results = executor.execute(&benchmarks);
599 let stats = compute_statistics(&results, &exec_config);
600
601 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
602 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
603
604 report.baseline_meta = Some(baseline.meta.clone());
606
607 let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
609 let baseline_map: std::collections::HashMap<_, _> = baseline
610 .results
611 .iter()
612 .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
613 .collect();
614
615 for result in &mut report.results {
616 if let (Some(metrics), Some(baseline_metrics)) =
617 (&result.metrics, baseline_map.get(&result.id))
618 {
619 let baseline_mean = baseline_metrics.mean_ns;
620 let absolute_change = metrics.mean_ns - baseline_mean;
621 let relative_change = if baseline_mean > 0.0 {
622 (absolute_change / baseline_mean) * 100.0
623 } else {
624 0.0
625 };
626
627 let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
629 || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
630 let is_significant = relative_change.abs() > regression_threshold && ci_non_overlap;
631
632 if relative_change > regression_threshold {
634 report.summary.regressions += 1;
635 } else if relative_change < -regression_threshold {
636 report.summary.improvements += 1;
637 }
638
639 let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
640 absolute_change / metrics.std_dev_ns
641 } else {
642 0.0
643 };
644 if !effect_size.is_finite() {
645 effect_size = 0.0;
646 }
647
648 let probability_regression = if ci_non_overlap {
649 if relative_change > 0.0 { 0.99 } else { 0.01 }
650 } else if relative_change > 0.0 {
651 0.60
652 } else {
653 0.40
654 };
655
656 result.comparison = Some(fluxbench_report::Comparison {
657 baseline_mean_ns: baseline_mean,
658 absolute_change_ns: absolute_change,
659 relative_change,
660 probability_regression,
661 is_significant,
662 effect_size,
663 });
664 }
665 }
666
667 let (comparison_results, comparison_series, synthetic_results, verification_results) =
669 execute_verifications(&results, &stats);
670 let verification_summary = aggregate_verifications(&verification_results);
671 report.comparisons = comparison_results;
672 report.comparison_series = comparison_series;
673 report.synthetics = synthetic_results;
674 report.verifications = verification_results;
675 report.summary.critical_failures = verification_summary.critical_failures;
676 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
677
678 let output = match format {
680 OutputFormat::Json => generate_json_report(&report)?,
681 OutputFormat::GithubSummary => generate_github_summary(&report),
682 OutputFormat::Html => generate_html_report(&report),
683 OutputFormat::Csv => generate_csv_report(&report),
684 OutputFormat::Human => format_comparison_output(&report, &baseline),
685 };
686
687 if let Some(ref path) = cli.output {
688 let mut file = std::fs::File::create(path)?;
689 file.write_all(output.as_bytes())?;
690 println!("Report written to: {}", path.display());
691 } else {
692 print!("{}", output);
693 }
694
695 save_baseline_if_needed(cli, config, &report)?;
697
698 let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
700 if should_fail {
701 if report.summary.regressions > 0 {
702 eprintln!(
703 "\n{} regression(s) detected above {}% threshold",
704 report.summary.regressions, regression_threshold
705 );
706 }
707 if verification_summary.should_fail_ci() {
708 eprintln!(
709 "\n{} critical verification failure(s)",
710 verification_summary.critical_failures + verification_summary.critical_errors
711 );
712 }
713 std::process::exit(1);
714 }
715
716 Ok(())
717}
718
719fn save_baseline_if_needed(
721 cli: &Cli,
722 config: &FluxConfig,
723 report: &fluxbench_report::Report,
724) -> anyhow::Result<()> {
725 let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
727 if !should_save {
728 return Ok(());
729 }
730
731 let path = cli
733 .save_baseline
734 .as_ref()
735 .and_then(|opt| opt.clone())
736 .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
737 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
738
739 if let Some(parent) = path.parent() {
740 std::fs::create_dir_all(parent)?;
741 }
742
743 let json = generate_json_report(report)?;
744 std::fs::write(&path, json)?;
745 eprintln!("Baseline saved to: {}", path.display());
746
747 Ok(())
748}
749
750fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
751 let output = std::process::Command::new("git")
752 .args(["rev-parse", "--verify", git_ref])
753 .output()
754 .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
755
756 if !output.status.success() {
757 let stderr = String::from_utf8_lossy(&output.stderr);
758 return Err(anyhow::anyhow!(
759 "Invalid git ref '{}': {}",
760 git_ref,
761 stderr.trim()
762 ));
763 }
764
765 let resolved = String::from_utf8(output.stdout)?.trim().to_string();
766 if resolved.is_empty() {
767 return Err(anyhow::anyhow!(
768 "Git ref '{}' resolved to an empty commit hash",
769 git_ref
770 ));
771 }
772
773 Ok(resolved)
774}
775
776fn format_comparison_output(
778 report: &fluxbench_report::Report,
779 baseline: &fluxbench_report::Report,
780) -> String {
781 let mut output = String::new();
782
783 output.push('\n');
784 output.push_str("FluxBench Comparison Results\n");
785 output.push_str(&"=".repeat(60));
786 output.push_str("\n\n");
787
788 output.push_str(&format!(
789 "Baseline: {} ({})\n",
790 baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
791 baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
792 ));
793 output.push_str(&format!(
794 "Current: {} ({})\n\n",
795 report.meta.git_commit.as_deref().unwrap_or("unknown"),
796 report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
797 ));
798
799 for result in &report.results {
800 let status_icon = match result.status {
801 fluxbench_report::BenchmarkStatus::Passed => "✓",
802 fluxbench_report::BenchmarkStatus::Failed => "✗",
803 fluxbench_report::BenchmarkStatus::Crashed => "💥",
804 fluxbench_report::BenchmarkStatus::Skipped => "⊘",
805 };
806
807 output.push_str(&format!("{} {}\n", status_icon, result.id));
808
809 if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
810 let change_icon = if comparison.relative_change > 5.0 {
811 "📈 REGRESSION"
812 } else if comparison.relative_change < -5.0 {
813 "📉 improvement"
814 } else {
815 "≈ no change"
816 };
817
818 output.push_str(&format!(
819 " baseline: {:.2} ns → current: {:.2} ns\n",
820 comparison.baseline_mean_ns, metrics.mean_ns
821 ));
822 output.push_str(&format!(
823 " change: {:+.2}% ({:+.2} ns) {}\n",
824 comparison.relative_change, comparison.absolute_change_ns, change_icon
825 ));
826 }
827
828 output.push('\n');
829 }
830
831 output.push_str("Summary\n");
833 output.push_str(&"-".repeat(60));
834 output.push('\n');
835 output.push_str(&format!(
836 " Regressions: {} Improvements: {} No Change: {}\n",
837 report.summary.regressions,
838 report.summary.improvements,
839 report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
840 ));
841
842 output
843}