1#![warn(missing_docs)]
2mod config;
24mod executor;
25mod planner;
26mod supervisor;
27
28pub use config::*;
29pub use executor::{
30 ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
31 execute_verifications, format_human_output,
32};
33pub use supervisor::*;
34
35use clap::{Parser, Subcommand};
36use fluxbench_core::{BenchmarkDef, WorkerMain};
37use fluxbench_logic::aggregate_verifications;
38use fluxbench_report::{
39 OutputFormat, format_duration, generate_csv_report, generate_github_action_benchmark,
40 generate_github_summary, generate_html_report, generate_json_report,
41};
42use rayon::ThreadPoolBuilder;
43use regex::Regex;
44use std::io::Write;
45use std::path::PathBuf;
46use std::time::Instant;
47
48#[derive(Parser, Debug)]
50#[command(name = "fluxbench")]
51#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
52pub struct Cli {
53 #[command(subcommand)]
55 pub command: Option<Commands>,
56
57 #[arg(default_value = ".*")]
59 pub filter: String,
60
61 #[arg(long, default_value = "human")]
63 pub format: String,
64
65 #[arg(short, long)]
67 pub output: Option<PathBuf>,
68
69 #[arg(long)]
72 pub baseline: Option<Option<PathBuf>>,
73
74 #[arg(long)]
76 pub dry_run: bool,
77
78 #[arg(long)]
80 pub threshold: Option<f64>,
81
82 #[arg(long)]
84 pub group: Option<String>,
85
86 #[arg(long)]
88 pub tag: Option<String>,
89
90 #[arg(long)]
92 pub skip_tag: Option<String>,
93
94 #[arg(long, default_value = "3")]
96 pub warmup: u64,
97
98 #[arg(long, default_value = "5")]
100 pub measurement: u64,
101
102 #[arg(long, short = 'n')]
105 pub samples: Option<u64>,
106
107 #[arg(long)]
109 pub min_iterations: Option<u64>,
110
111 #[arg(long)]
113 pub max_iterations: Option<u64>,
114
115 #[arg(short, long)]
117 pub verbose: bool,
118
119 #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
122 pub isolated: bool,
123
124 #[arg(long)]
127 pub one_shot: bool,
128
129 #[arg(long, default_value = "60")]
131 pub worker_timeout: u64,
132
133 #[arg(long, default_value = "1")]
135 pub jobs: usize,
136
137 #[arg(long, short = 'j', default_value = "0")]
140 pub threads: usize,
141
142 #[arg(long, hide = true)]
144 pub flux_worker: bool,
145
146 #[arg(long)]
149 pub save_baseline: Option<Option<PathBuf>>,
150
151 #[arg(long, hide = true)]
153 pub bench: bool,
154}
155
156#[derive(Subcommand, Debug)]
158pub enum Commands {
159 List,
161 Run,
163 Compare {
165 #[arg(name = "REF")]
167 git_ref: String,
168 },
169}
170
171pub fn run() -> anyhow::Result<()> {
177 let cli = Cli::parse();
178 run_with_cli(cli)
179}
180
181pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
183 if cli.flux_worker {
185 return run_worker_mode();
186 }
187
188 if cli.verbose {
190 tracing_subscriber::fmt()
191 .with_env_filter("fluxbench=debug")
192 .init();
193 } else {
194 tracing_subscriber::fmt()
195 .with_env_filter("fluxbench=info")
196 .init();
197 }
198
199 let config = FluxConfig::discover().unwrap_or_default();
201
202 let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
204
205 let jobs = if cli.jobs != 1 {
207 cli.jobs
208 } else {
209 config.runner.jobs.unwrap_or(1)
210 };
211
212 match cli.command {
213 Some(Commands::List) => {
214 list_benchmarks(&cli)?;
215 }
216 Some(Commands::Run) => {
217 run_benchmarks(&cli, &config, format, jobs)?;
218 }
219 Some(Commands::Compare { ref git_ref }) => {
220 compare_benchmarks(&cli, &config, git_ref, format)?;
221 }
222 None => {
223 if cli.dry_run {
225 list_benchmarks(&cli)?;
226 } else {
227 run_benchmarks(&cli, &config, format, jobs)?;
228 }
229 }
230 }
231
232 Ok(())
233}
234
235fn run_worker_mode() -> anyhow::Result<()> {
237 let mut worker = WorkerMain::new();
238 worker
239 .run()
240 .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
241}
242
243fn filter_benchmarks(
247 cli: &Cli,
248 benchmarks: &[&'static BenchmarkDef],
249) -> Vec<&'static BenchmarkDef> {
250 let filter_re = Regex::new(&cli.filter).ok();
251
252 let plan = planner::build_plan(
253 benchmarks.iter().copied(),
254 filter_re.as_ref(),
255 cli.group.as_deref(),
256 cli.tag.as_deref(),
257 cli.skip_tag.as_deref(),
258 );
259
260 plan.benchmarks
261}
262
263fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
264 println!("FluxBench Plan:");
265
266 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
267 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
268
269 let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
270 std::collections::BTreeMap::new();
271
272 for bench in &benchmarks {
273 groups.entry(bench.group).or_default().push(bench);
274 }
275
276 let mut total = 0;
277 for (group, benches) in &groups {
278 println!("├── group: {}", group);
279 for bench in benches {
280 let tags = if bench.tags.is_empty() {
281 String::new()
282 } else {
283 format!(" [{}]", bench.tags.join(", "))
284 };
285 println!(
286 "│ ├── {}{} ({}:{})",
287 bench.id, tags, bench.file, bench.line
288 );
289 total += 1;
290 }
291 }
292
293 println!("{} benchmarks found.", total);
294
295 let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
298 for bench in &all_benchmarks {
299 for tag in bench.tags {
300 *tag_counts.entry(tag).or_default() += 1;
301 }
302 }
303 if !tag_counts.is_empty() {
304 let tags_display: Vec<String> = tag_counts
305 .iter()
306 .map(|(tag, count)| format!("{} ({})", tag, count))
307 .collect();
308 println!("Tags: {}", tags_display.join(", "));
309 }
310
311 Ok(())
312}
313
314fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
316 let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
318 let measurement_ns =
319 FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
320
321 let warmup_time_ns = if cli.warmup != 3 {
326 cli.warmup * 1_000_000_000
327 } else {
328 warmup_ns
329 };
330 let measurement_time_ns = if cli.measurement != 5 {
331 cli.measurement * 1_000_000_000
332 } else {
333 measurement_ns
334 };
335
336 if let Some(n) = cli.samples.or(config.runner.samples) {
339 return ExecutionConfig {
340 warmup_time_ns: 0,
341 measurement_time_ns: 0,
342 min_iterations: Some(n),
343 max_iterations: Some(n),
344 track_allocations: config.allocator.track,
345 bootstrap_iterations: config.runner.bootstrap_iterations,
346 confidence_level: config.runner.confidence_level,
347 };
348 }
349
350 let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
352 let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
353
354 ExecutionConfig {
355 warmup_time_ns,
356 measurement_time_ns,
357 min_iterations,
358 max_iterations,
359 track_allocations: config.allocator.track,
360 bootstrap_iterations: config.runner.bootstrap_iterations,
361 confidence_level: config.runner.confidence_level,
362 }
363}
364
365fn run_benchmarks(
366 cli: &Cli,
367 config: &FluxConfig,
368 format: OutputFormat,
369 jobs: usize,
370) -> anyhow::Result<()> {
371 let jobs = jobs.max(1);
372
373 if cli.threads > 0 {
375 ThreadPoolBuilder::new()
376 .num_threads(cli.threads)
377 .build_global()
378 .ok();
379 }
380
381 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
383 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
384
385 if benchmarks.is_empty() {
386 if let Some(ref tag) = cli.tag {
388 let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
389 .iter()
390 .flat_map(|b| b.tags.iter().copied())
391 .collect();
392 if !all_tags.contains(tag.as_str()) {
393 let available: Vec<&str> = all_tags.into_iter().collect();
394 eprintln!(
395 "Warning: tag '{}' not found. Available tags: {}",
396 tag,
397 available.join(", ")
398 );
399 }
400 }
401 println!("No benchmarks found.");
402 return Ok(());
403 }
404
405 let isolated = if config.runner.isolation.is_isolated() {
407 cli.isolated
408 } else {
409 false
410 };
411
412 let threads_str = if cli.threads == 0 {
413 "all".to_string()
414 } else {
415 cli.threads.to_string()
416 };
417 let mode_str = if isolated {
418 if cli.one_shot {
419 " (isolated, one-shot)"
420 } else {
421 " (isolated, persistent)"
422 }
423 } else {
424 " (in-process)"
425 };
426 println!(
427 "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
428 benchmarks.len(),
429 mode_str,
430 threads_str,
431 jobs
432 );
433
434 let start_time = Instant::now();
435
436 let exec_config = build_execution_config(cli, config);
438
439 if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
440 eprintln!(
441 "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
442 Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
443 exec_config.bootstrap_iterations
444 );
445 }
446
447 let results = if isolated {
449 let timeout = std::time::Duration::from_secs(cli.worker_timeout);
450 let reuse_workers = !cli.one_shot;
451 let isolated_executor =
452 IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
453 isolated_executor.execute(&benchmarks)
454 } else {
455 if jobs > 1 {
456 eprintln!(
457 "Warning: --jobs currently applies only to isolated mode; running in-process serially."
458 );
459 }
460 let mut executor = Executor::new(exec_config.clone());
461 executor.execute(&benchmarks)
462 };
463
464 let stats = compute_statistics(&results, &exec_config);
466
467 if exec_config.track_allocations
469 && !results.is_empty()
470 && results
471 .iter()
472 .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
473 {
474 eprintln!(
475 "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
476 Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
477 );
478 }
479
480 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
482 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
483
484 if let Some(baseline_path) = resolve_baseline_path(&cli.baseline, config) {
486 if baseline_path.exists() {
487 match std::fs::read_to_string(&baseline_path).and_then(|json| {
488 serde_json::from_str::<fluxbench_report::Report>(&json)
489 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
490 }) {
491 Ok(baseline) => {
492 let threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
493 apply_baseline_comparison(&mut report, &baseline, threshold);
494 }
495 Err(e) => {
496 eprintln!(
497 "Warning: failed to load baseline {}: {}",
498 baseline_path.display(),
499 e
500 );
501 }
502 }
503 } else {
504 eprintln!(
505 "Warning: baseline file not found: {}",
506 baseline_path.display()
507 );
508 }
509 }
510
511 let (comparison_results, comparison_series, synthetic_results, verification_results) =
513 execute_verifications(&results, &stats);
514 let verification_summary = aggregate_verifications(&verification_results);
515 report.comparisons = comparison_results;
516 report.comparison_series = comparison_series;
517 report.synthetics = synthetic_results;
518 report.verifications = verification_results;
519
520 report.summary.critical_failures = verification_summary.critical_failures;
522 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
523
524 if config.ci.github_annotations {
526 emit_github_annotations(&report);
527 }
528
529 let output = match format {
531 OutputFormat::Json => generate_json_report(&report)?,
532 OutputFormat::GithubSummary => generate_github_summary(&report),
533 OutputFormat::GithubActionBenchmark => generate_github_action_benchmark(&report),
534 OutputFormat::Html => generate_html_report(&report),
535 OutputFormat::Csv => generate_csv_report(&report),
536 OutputFormat::Human => format_human_output(&report),
537 };
538
539 if let Some(ref path) = cli.output {
541 let mut file = std::fs::File::create(path)?;
542 file.write_all(output.as_bytes())?;
543 println!("Report written to: {}", path.display());
544 } else {
545 print!("{}", output);
546 }
547
548 save_baseline_if_needed(cli, config, &report)?;
550
551 let has_crashes = report
553 .results
554 .iter()
555 .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
556
557 if verification_summary.should_fail_ci() || has_crashes {
558 if has_crashes {
559 eprintln!("\nBenchmark(s) crashed during execution");
560 }
561 if verification_summary.should_fail_ci() {
562 eprintln!(
563 "\n{} critical verification failure(s)",
564 verification_summary.critical_failures + verification_summary.critical_errors
565 );
566 }
567 std::process::exit(1);
568 }
569
570 Ok(())
571}
572
573fn compare_benchmarks(
574 cli: &Cli,
575 config: &FluxConfig,
576 git_ref: &str,
577 format: OutputFormat,
578) -> anyhow::Result<()> {
579 let baseline_path = resolve_baseline_path(&cli.baseline, config).ok_or_else(|| {
581 anyhow::anyhow!(
582 "--baseline required for comparison, or use 'compare' command with a git ref"
583 )
584 })?;
585
586 if !baseline_path.exists() {
587 return Err(anyhow::anyhow!(
588 "Baseline file not found: {}",
589 baseline_path.display()
590 ));
591 }
592
593 let baseline_json = std::fs::read_to_string(&baseline_path)?;
594 let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
595 let resolved_git_ref = resolve_git_ref(git_ref)?;
596
597 if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
598 let matches_ref = baseline_commit == resolved_git_ref
599 || baseline_commit.starts_with(&resolved_git_ref)
600 || resolved_git_ref.starts_with(baseline_commit);
601 if !matches_ref {
602 return Err(anyhow::anyhow!(
603 "Baseline commit {} does not match git ref {} ({})",
604 baseline_commit,
605 git_ref,
606 resolved_git_ref
607 ));
608 }
609 } else {
610 eprintln!(
611 "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
612 );
613 }
614
615 println!("Comparing against baseline: {}", baseline_path.display());
616 println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
617
618 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
620 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
621
622 if benchmarks.is_empty() {
623 println!("No benchmarks found.");
624 return Ok(());
625 }
626
627 let start_time = Instant::now();
628
629 let exec_config = build_execution_config(cli, config);
630
631 let mut executor = Executor::new(exec_config.clone());
632 let results = executor.execute(&benchmarks);
633 let stats = compute_statistics(&results, &exec_config);
634
635 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
636 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
637
638 let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
640 apply_baseline_comparison(&mut report, &baseline, regression_threshold);
641
642 let (comparison_results, comparison_series, synthetic_results, verification_results) =
644 execute_verifications(&results, &stats);
645 let verification_summary = aggregate_verifications(&verification_results);
646 report.comparisons = comparison_results;
647 report.comparison_series = comparison_series;
648 report.synthetics = synthetic_results;
649 report.verifications = verification_results;
650 report.summary.critical_failures = verification_summary.critical_failures;
651 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
652
653 if config.ci.github_annotations {
655 emit_github_annotations(&report);
656 }
657
658 let output = match format {
660 OutputFormat::Json => generate_json_report(&report)?,
661 OutputFormat::GithubSummary => generate_github_summary(&report),
662 OutputFormat::GithubActionBenchmark => generate_github_action_benchmark(&report),
663 OutputFormat::Html => generate_html_report(&report),
664 OutputFormat::Csv => generate_csv_report(&report),
665 OutputFormat::Human => format_comparison_output(&report, &baseline),
666 };
667
668 if let Some(ref path) = cli.output {
669 let mut file = std::fs::File::create(path)?;
670 file.write_all(output.as_bytes())?;
671 println!("Report written to: {}", path.display());
672 } else {
673 print!("{}", output);
674 }
675
676 save_baseline_if_needed(cli, config, &report)?;
678
679 let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
681 if should_fail {
682 if report.summary.regressions > 0 {
683 eprintln!(
684 "\n{} regression(s) detected above {}% threshold",
685 report.summary.regressions, regression_threshold
686 );
687 }
688 if verification_summary.should_fail_ci() {
689 eprintln!(
690 "\n{} critical verification failure(s)",
691 verification_summary.critical_failures + verification_summary.critical_errors
692 );
693 }
694 std::process::exit(1);
695 }
696
697 Ok(())
698}
699
700fn save_baseline_if_needed(
702 cli: &Cli,
703 config: &FluxConfig,
704 report: &fluxbench_report::Report,
705) -> anyhow::Result<()> {
706 let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
708 if !should_save {
709 return Ok(());
710 }
711
712 let path = cli
714 .save_baseline
715 .as_ref()
716 .and_then(|opt| opt.clone())
717 .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
718 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
719
720 if let Some(parent) = path.parent() {
721 std::fs::create_dir_all(parent)?;
722 }
723
724 let json = generate_json_report(report)?;
725 std::fs::write(&path, json)?;
726 eprintln!("Baseline saved to: {}", path.display());
727
728 Ok(())
729}
730
731fn apply_baseline_comparison(
736 report: &mut fluxbench_report::Report,
737 baseline: &fluxbench_report::Report,
738 regression_threshold: f64,
739) {
740 report.baseline_meta = Some(baseline.meta.clone());
741
742 let baseline_map: std::collections::HashMap<_, _> = baseline
743 .results
744 .iter()
745 .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
746 .collect();
747
748 for result in &mut report.results {
749 if let (Some(metrics), Some(baseline_metrics)) =
750 (&result.metrics, baseline_map.get(&result.id))
751 {
752 let effective_threshold = if result.threshold > 0.0 {
754 result.threshold
755 } else {
756 regression_threshold
757 };
758
759 let baseline_mean = baseline_metrics.mean_ns;
760 let absolute_change = metrics.mean_ns - baseline_mean;
761 let relative_change = if baseline_mean > 0.0 {
762 (absolute_change / baseline_mean) * 100.0
763 } else {
764 0.0
765 };
766
767 let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
768 || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
769 let is_significant = relative_change.abs() > effective_threshold && ci_non_overlap;
770
771 if relative_change > effective_threshold {
772 report.summary.regressions += 1;
773 } else if relative_change < -effective_threshold {
774 report.summary.improvements += 1;
775 }
776
777 let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
778 absolute_change / metrics.std_dev_ns
779 } else {
780 0.0
781 };
782 if !effect_size.is_finite() {
783 effect_size = 0.0;
784 }
785
786 let probability_regression = if ci_non_overlap {
787 if relative_change > 0.0 { 0.99 } else { 0.01 }
788 } else if relative_change > 0.0 {
789 0.60
790 } else {
791 0.40
792 };
793
794 result.comparison = Some(fluxbench_report::Comparison {
795 baseline_mean_ns: baseline_mean,
796 absolute_change_ns: absolute_change,
797 relative_change,
798 probability_regression,
799 is_significant,
800 effect_size,
801 });
802 }
803 }
804}
805
806fn resolve_baseline_path(
812 cli_baseline: &Option<Option<PathBuf>>,
813 config: &FluxConfig,
814) -> Option<PathBuf> {
815 match cli_baseline {
816 Some(Some(path)) => Some(path.clone()),
817 Some(None) => {
818 Some(
820 config
821 .output
822 .baseline_path
823 .as_ref()
824 .map(PathBuf::from)
825 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json")),
826 )
827 }
828 None => None,
829 }
830}
831
832fn emit_github_annotations(report: &fluxbench_report::Report) {
836 for result in &report.results {
838 match result.status {
839 fluxbench_report::BenchmarkStatus::Crashed => {
840 let msg = result
841 .failure
842 .as_ref()
843 .map(|f| f.message.as_str())
844 .unwrap_or("benchmark crashed");
845 println!(
846 "::error file={},line={}::{}: {}",
847 result.file, result.line, result.id, msg
848 );
849 }
850 fluxbench_report::BenchmarkStatus::Failed => {
851 let msg = result
852 .failure
853 .as_ref()
854 .map(|f| f.message.as_str())
855 .unwrap_or("benchmark failed");
856 println!(
857 "::error file={},line={}::{}: {}",
858 result.file, result.line, result.id, msg
859 );
860 }
861 _ => {}
862 }
863
864 if let Some(cmp) = &result.comparison {
866 if cmp.is_significant && cmp.relative_change > 0.0 {
867 println!(
868 "::error file={},line={}::{}: regression {:+.1}% ({} → {})",
869 result.file,
870 result.line,
871 result.id,
872 cmp.relative_change,
873 format_duration(cmp.baseline_mean_ns),
874 result
875 .metrics
876 .as_ref()
877 .map(|m| format_duration(m.mean_ns))
878 .unwrap_or_default(),
879 );
880 }
881 }
882 }
883
884 for v in &report.verifications {
886 match &v.status {
887 fluxbench_logic::VerificationStatus::Failed => {
888 let level = match v.severity {
889 fluxbench_core::Severity::Critical => "error",
890 _ => "warning",
891 };
892 println!("::{}::{}: {}", level, v.id, v.message);
893 }
894 fluxbench_logic::VerificationStatus::Error { message } => {
895 println!("::error::{}: evaluation error: {}", v.id, message);
896 }
897 _ => {}
898 }
899 }
900}
901
902fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
903 let output = std::process::Command::new("git")
904 .args(["rev-parse", "--verify", git_ref])
905 .output()
906 .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
907
908 if !output.status.success() {
909 let stderr = String::from_utf8_lossy(&output.stderr);
910 return Err(anyhow::anyhow!(
911 "Invalid git ref '{}': {}",
912 git_ref,
913 stderr.trim()
914 ));
915 }
916
917 let resolved = String::from_utf8(output.stdout)?.trim().to_string();
918 if resolved.is_empty() {
919 return Err(anyhow::anyhow!(
920 "Git ref '{}' resolved to an empty commit hash",
921 git_ref
922 ));
923 }
924
925 Ok(resolved)
926}
927
928fn format_comparison_output(
930 report: &fluxbench_report::Report,
931 baseline: &fluxbench_report::Report,
932) -> String {
933 let mut output = String::new();
934
935 output.push('\n');
936 output.push_str("FluxBench Comparison Results\n");
937 output.push_str(&"=".repeat(60));
938 output.push_str("\n\n");
939
940 output.push_str(&format!(
941 "Baseline: {} ({})\n",
942 baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
943 baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
944 ));
945 output.push_str(&format!(
946 "Current: {} ({})\n\n",
947 report.meta.git_commit.as_deref().unwrap_or("unknown"),
948 report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
949 ));
950
951 for result in &report.results {
952 let status_icon = match result.status {
953 fluxbench_report::BenchmarkStatus::Passed => "✓",
954 fluxbench_report::BenchmarkStatus::Failed => "✗",
955 fluxbench_report::BenchmarkStatus::Crashed => "💥",
956 fluxbench_report::BenchmarkStatus::Skipped => "⊘",
957 };
958
959 output.push_str(&format!("{} {}\n", status_icon, result.id));
960
961 if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
962 let change_icon = if comparison.relative_change > 5.0 {
963 "📈 REGRESSION"
964 } else if comparison.relative_change < -5.0 {
965 "📉 improvement"
966 } else {
967 "≈ no change"
968 };
969
970 output.push_str(&format!(
971 " baseline: {} → current: {}\n",
972 format_duration(comparison.baseline_mean_ns),
973 format_duration(metrics.mean_ns),
974 ));
975 output.push_str(&format!(
976 " change: {:+.2}% ({}) {}\n",
977 comparison.relative_change,
978 format_duration(comparison.absolute_change_ns.abs()),
979 change_icon,
980 ));
981 }
982
983 output.push('\n');
984 }
985
986 output.push_str("Summary\n");
988 output.push_str(&"-".repeat(60));
989 output.push('\n');
990 output.push_str(&format!(
991 " Regressions: {} Improvements: {} No Change: {}\n",
992 report.summary.regressions,
993 report.summary.improvements,
994 report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
995 ));
996
997 output
998}
999
1000#[cfg(test)]
1001mod tests {
1002 use super::*;
1003 use fluxbench_report::{
1004 BenchmarkMetrics, BenchmarkReportResult, BenchmarkStatus, Report, ReportConfig, ReportMeta,
1005 ReportSummary, SystemInfo,
1006 };
1007
1008 fn dummy_meta() -> ReportMeta {
1009 ReportMeta {
1010 schema_version: 1,
1011 version: "0.1.0".to_string(),
1012 timestamp: chrono::Utc::now(),
1013 git_commit: None,
1014 git_branch: None,
1015 system: SystemInfo {
1016 os: "linux".to_string(),
1017 os_version: "6.0".to_string(),
1018 cpu: "test".to_string(),
1019 cpu_cores: 1,
1020 memory_gb: 1.0,
1021 },
1022 config: ReportConfig {
1023 warmup_time_ns: 0,
1024 measurement_time_ns: 0,
1025 min_iterations: None,
1026 max_iterations: None,
1027 bootstrap_iterations: 0,
1028 confidence_level: 0.95,
1029 track_allocations: false,
1030 },
1031 }
1032 }
1033
1034 fn dummy_metrics(mean: f64) -> BenchmarkMetrics {
1035 BenchmarkMetrics {
1036 samples: 100,
1037 mean_ns: mean,
1038 median_ns: mean,
1039 std_dev_ns: mean * 0.01,
1040 min_ns: mean * 0.9,
1041 max_ns: mean * 1.1,
1042 p50_ns: mean,
1043 p90_ns: mean * 1.05,
1044 p95_ns: mean * 1.07,
1045 p99_ns: mean * 1.09,
1046 p999_ns: mean * 1.1,
1047 skewness: 0.0,
1048 kurtosis: 3.0,
1049 ci_lower_ns: mean * 0.98,
1050 ci_upper_ns: mean * 1.02,
1051 ci_level: 0.95,
1052 throughput_ops_sec: None,
1053 alloc_bytes: 0,
1054 alloc_count: 0,
1055 mean_cycles: 0.0,
1056 median_cycles: 0.0,
1057 min_cycles: 0,
1058 max_cycles: 0,
1059 cycles_per_ns: 0.0,
1060 }
1061 }
1062
1063 fn dummy_result(id: &str, mean: f64, threshold: f64) -> BenchmarkReportResult {
1064 BenchmarkReportResult {
1065 id: id.to_string(),
1066 name: id.to_string(),
1067 group: "test".to_string(),
1068 status: BenchmarkStatus::Passed,
1069 severity: fluxbench_core::Severity::Warning,
1070 file: "test.rs".to_string(),
1071 line: 1,
1072 metrics: Some(dummy_metrics(mean)),
1073 threshold,
1074 comparison: None,
1075 failure: None,
1076 }
1077 }
1078
1079 fn dummy_report(results: Vec<BenchmarkReportResult>) -> Report {
1080 let total = results.len();
1081 Report {
1082 meta: dummy_meta(),
1083 results,
1084 comparisons: vec![],
1085 comparison_series: vec![],
1086 synthetics: vec![],
1087 verifications: vec![],
1088 summary: ReportSummary {
1089 total_benchmarks: total,
1090 passed: total,
1091 ..Default::default()
1092 },
1093 baseline_meta: None,
1094 }
1095 }
1096
1097 #[test]
1098 fn per_bench_threshold_overrides_global() {
1099 let mut report = dummy_report(vec![dummy_result("fast_bench", 108.0, 5.0)]);
1103 let baseline = dummy_report(vec![dummy_result("fast_bench", 100.0, 5.0)]);
1104
1105 apply_baseline_comparison(&mut report, &baseline, 25.0);
1106
1107 assert_eq!(
1108 report.summary.regressions, 1,
1109 "per-bench 5% should catch 8% regression"
1110 );
1111 let cmp = report.results[0].comparison.as_ref().unwrap();
1112 assert!(cmp.is_significant);
1113 }
1114
1115 #[test]
1116 fn zero_threshold_falls_back_to_global() {
1117 let mut report = dummy_report(vec![dummy_result("normal_bench", 108.0, 0.0)]);
1121 let baseline = dummy_report(vec![dummy_result("normal_bench", 100.0, 0.0)]);
1122
1123 apply_baseline_comparison(&mut report, &baseline, 25.0);
1124
1125 assert_eq!(
1126 report.summary.regressions, 0,
1127 "8% under 25% global should not regress"
1128 );
1129 let cmp = report.results[0].comparison.as_ref().unwrap();
1130 assert!(!cmp.is_significant);
1131 }
1132
1133 #[test]
1134 fn mixed_thresholds_independent() {
1135 let mut report = dummy_report(vec![
1138 dummy_result("tight", 108.0, 5.0), dummy_result("loose", 108.0, 0.0), ]);
1141 let baseline = dummy_report(vec![
1142 dummy_result("tight", 100.0, 5.0),
1143 dummy_result("loose", 100.0, 0.0),
1144 ]);
1145
1146 apply_baseline_comparison(&mut report, &baseline, 25.0);
1147
1148 assert_eq!(report.summary.regressions, 1);
1149 assert!(
1150 report.results[0]
1151 .comparison
1152 .as_ref()
1153 .unwrap()
1154 .is_significant
1155 );
1156 assert!(
1157 !report.results[1]
1158 .comparison
1159 .as_ref()
1160 .unwrap()
1161 .is_significant
1162 );
1163 }
1164
1165 #[test]
1166 fn per_bench_threshold_detects_improvement() {
1167 let mut report = dummy_report(vec![dummy_result("improving", 90.0, 5.0)]);
1170 let baseline = dummy_report(vec![dummy_result("improving", 100.0, 5.0)]);
1171
1172 apply_baseline_comparison(&mut report, &baseline, 25.0);
1173
1174 assert_eq!(report.summary.improvements, 1);
1175 assert_eq!(report.summary.regressions, 0);
1176 }
1177}