1#![warn(missing_docs)]
2mod config;
24mod executor;
25mod planner;
26mod supervisor;
27
28pub use config::*;
29pub use executor::{
30 ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
31 execute_verifications, format_human_output,
32};
33pub use supervisor::*;
34
35use clap::{Parser, Subcommand};
36use fluxbench_core::{BenchmarkDef, WorkerMain};
37use fluxbench_logic::aggregate_verifications;
38use fluxbench_report::{
39 OutputFormat, format_duration, generate_csv_report, generate_github_summary,
40 generate_html_report, generate_json_report,
41};
42use rayon::ThreadPoolBuilder;
43use regex::Regex;
44use std::io::Write;
45use std::path::PathBuf;
46use std::time::Instant;
47
48#[derive(Parser, Debug)]
50#[command(name = "fluxbench")]
51#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
52pub struct Cli {
53 #[command(subcommand)]
55 pub command: Option<Commands>,
56
57 #[arg(default_value = ".*")]
59 pub filter: String,
60
61 #[arg(long, default_value = "human")]
63 pub format: String,
64
65 #[arg(short, long)]
67 pub output: Option<PathBuf>,
68
69 #[arg(long)]
72 pub baseline: Option<Option<PathBuf>>,
73
74 #[arg(long)]
76 pub dry_run: bool,
77
78 #[arg(long)]
80 pub threshold: Option<f64>,
81
82 #[arg(long)]
84 pub group: Option<String>,
85
86 #[arg(long)]
88 pub tag: Option<String>,
89
90 #[arg(long)]
92 pub skip_tag: Option<String>,
93
94 #[arg(long, default_value = "3")]
96 pub warmup: u64,
97
98 #[arg(long, default_value = "5")]
100 pub measurement: u64,
101
102 #[arg(long, short = 'n')]
105 pub samples: Option<u64>,
106
107 #[arg(long)]
109 pub min_iterations: Option<u64>,
110
111 #[arg(long)]
113 pub max_iterations: Option<u64>,
114
115 #[arg(short, long)]
117 pub verbose: bool,
118
119 #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
122 pub isolated: bool,
123
124 #[arg(long)]
127 pub one_shot: bool,
128
129 #[arg(long, default_value = "60")]
131 pub worker_timeout: u64,
132
133 #[arg(long, default_value = "1")]
135 pub jobs: usize,
136
137 #[arg(long, short = 'j', default_value = "0")]
140 pub threads: usize,
141
142 #[arg(long, hide = true)]
144 pub flux_worker: bool,
145
146 #[arg(long)]
149 pub save_baseline: Option<Option<PathBuf>>,
150
151 #[arg(long, hide = true)]
153 pub bench: bool,
154}
155
156#[derive(Subcommand, Debug)]
158pub enum Commands {
159 List,
161 Run,
163 Compare {
165 #[arg(name = "REF")]
167 git_ref: String,
168 },
169}
170
171pub fn run() -> anyhow::Result<()> {
177 let cli = Cli::parse();
178 run_with_cli(cli)
179}
180
181pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
183 if cli.flux_worker {
185 return run_worker_mode();
186 }
187
188 if cli.verbose {
190 tracing_subscriber::fmt()
191 .with_env_filter("fluxbench=debug")
192 .init();
193 } else {
194 tracing_subscriber::fmt()
195 .with_env_filter("fluxbench=info")
196 .init();
197 }
198
199 let config = FluxConfig::discover().unwrap_or_default();
201
202 let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
204
205 let jobs = if cli.jobs != 1 {
207 cli.jobs
208 } else {
209 config.runner.jobs.unwrap_or(1)
210 };
211
212 match cli.command {
213 Some(Commands::List) => {
214 list_benchmarks(&cli)?;
215 }
216 Some(Commands::Run) => {
217 run_benchmarks(&cli, &config, format, jobs)?;
218 }
219 Some(Commands::Compare { ref git_ref }) => {
220 compare_benchmarks(&cli, &config, git_ref, format)?;
221 }
222 None => {
223 if cli.dry_run {
225 list_benchmarks(&cli)?;
226 } else {
227 run_benchmarks(&cli, &config, format, jobs)?;
228 }
229 }
230 }
231
232 Ok(())
233}
234
235fn run_worker_mode() -> anyhow::Result<()> {
237 let mut worker = WorkerMain::new();
238 worker
239 .run()
240 .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
241}
242
243fn filter_benchmarks(
247 cli: &Cli,
248 benchmarks: &[&'static BenchmarkDef],
249) -> Vec<&'static BenchmarkDef> {
250 let filter_re = Regex::new(&cli.filter).ok();
251
252 let plan = planner::build_plan(
253 benchmarks.iter().copied(),
254 filter_re.as_ref(),
255 cli.group.as_deref(),
256 cli.tag.as_deref(),
257 cli.skip_tag.as_deref(),
258 );
259
260 plan.benchmarks
261}
262
263fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
264 println!("FluxBench Plan:");
265
266 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
267 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
268
269 let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
270 std::collections::BTreeMap::new();
271
272 for bench in &benchmarks {
273 groups.entry(bench.group).or_default().push(bench);
274 }
275
276 let mut total = 0;
277 for (group, benches) in &groups {
278 println!("├── group: {}", group);
279 for bench in benches {
280 let tags = if bench.tags.is_empty() {
281 String::new()
282 } else {
283 format!(" [{}]", bench.tags.join(", "))
284 };
285 println!(
286 "│ ├── {}{} ({}:{})",
287 bench.id, tags, bench.file, bench.line
288 );
289 total += 1;
290 }
291 }
292
293 println!("{} benchmarks found.", total);
294
295 let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
298 for bench in &all_benchmarks {
299 for tag in bench.tags {
300 *tag_counts.entry(tag).or_default() += 1;
301 }
302 }
303 if !tag_counts.is_empty() {
304 let tags_display: Vec<String> = tag_counts
305 .iter()
306 .map(|(tag, count)| format!("{} ({})", tag, count))
307 .collect();
308 println!("Tags: {}", tags_display.join(", "));
309 }
310
311 Ok(())
312}
313
314fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
316 let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
318 let measurement_ns =
319 FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
320
321 let warmup_time_ns = if cli.warmup != 3 {
326 cli.warmup * 1_000_000_000
327 } else {
328 warmup_ns
329 };
330 let measurement_time_ns = if cli.measurement != 5 {
331 cli.measurement * 1_000_000_000
332 } else {
333 measurement_ns
334 };
335
336 if let Some(n) = cli.samples.or(config.runner.samples) {
339 return ExecutionConfig {
340 warmup_time_ns: 0,
341 measurement_time_ns: 0,
342 min_iterations: Some(n),
343 max_iterations: Some(n),
344 track_allocations: config.allocator.track,
345 bootstrap_iterations: config.runner.bootstrap_iterations,
346 confidence_level: config.runner.confidence_level,
347 };
348 }
349
350 let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
352 let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
353
354 ExecutionConfig {
355 warmup_time_ns,
356 measurement_time_ns,
357 min_iterations,
358 max_iterations,
359 track_allocations: config.allocator.track,
360 bootstrap_iterations: config.runner.bootstrap_iterations,
361 confidence_level: config.runner.confidence_level,
362 }
363}
364
365fn run_benchmarks(
366 cli: &Cli,
367 config: &FluxConfig,
368 format: OutputFormat,
369 jobs: usize,
370) -> anyhow::Result<()> {
371 let jobs = jobs.max(1);
372
373 if cli.threads > 0 {
375 ThreadPoolBuilder::new()
376 .num_threads(cli.threads)
377 .build_global()
378 .ok();
379 }
380
381 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
383 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
384
385 if benchmarks.is_empty() {
386 if let Some(ref tag) = cli.tag {
388 let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
389 .iter()
390 .flat_map(|b| b.tags.iter().copied())
391 .collect();
392 if !all_tags.contains(tag.as_str()) {
393 let available: Vec<&str> = all_tags.into_iter().collect();
394 eprintln!(
395 "Warning: tag '{}' not found. Available tags: {}",
396 tag,
397 available.join(", ")
398 );
399 }
400 }
401 println!("No benchmarks found.");
402 return Ok(());
403 }
404
405 let isolated = if config.runner.isolation.is_isolated() {
407 cli.isolated
408 } else {
409 false
410 };
411
412 let threads_str = if cli.threads == 0 {
413 "all".to_string()
414 } else {
415 cli.threads.to_string()
416 };
417 let mode_str = if isolated {
418 if cli.one_shot {
419 " (isolated, one-shot)"
420 } else {
421 " (isolated, persistent)"
422 }
423 } else {
424 " (in-process)"
425 };
426 println!(
427 "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
428 benchmarks.len(),
429 mode_str,
430 threads_str,
431 jobs
432 );
433
434 let start_time = Instant::now();
435
436 let exec_config = build_execution_config(cli, config);
438
439 if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
440 eprintln!(
441 "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
442 Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
443 exec_config.bootstrap_iterations
444 );
445 }
446
447 let results = if isolated {
449 let timeout = std::time::Duration::from_secs(cli.worker_timeout);
450 let reuse_workers = !cli.one_shot;
451 let isolated_executor =
452 IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
453 isolated_executor.execute(&benchmarks)
454 } else {
455 if jobs > 1 {
456 eprintln!(
457 "Warning: --jobs currently applies only to isolated mode; running in-process serially."
458 );
459 }
460 let mut executor = Executor::new(exec_config.clone());
461 executor.execute(&benchmarks)
462 };
463
464 let stats = compute_statistics(&results, &exec_config);
466
467 if exec_config.track_allocations
469 && !results.is_empty()
470 && results
471 .iter()
472 .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
473 {
474 eprintln!(
475 "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
476 Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
477 );
478 }
479
480 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
482 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
483
484 if let Some(baseline_path) = resolve_baseline_path(&cli.baseline, config) {
486 if baseline_path.exists() {
487 match std::fs::read_to_string(&baseline_path).and_then(|json| {
488 serde_json::from_str::<fluxbench_report::Report>(&json)
489 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
490 }) {
491 Ok(baseline) => {
492 let threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
493 apply_baseline_comparison(&mut report, &baseline, threshold);
494 }
495 Err(e) => {
496 eprintln!(
497 "Warning: failed to load baseline {}: {}",
498 baseline_path.display(),
499 e
500 );
501 }
502 }
503 } else {
504 eprintln!(
505 "Warning: baseline file not found: {}",
506 baseline_path.display()
507 );
508 }
509 }
510
511 let (comparison_results, comparison_series, synthetic_results, verification_results) =
513 execute_verifications(&results, &stats);
514 let verification_summary = aggregate_verifications(&verification_results);
515 report.comparisons = comparison_results;
516 report.comparison_series = comparison_series;
517 report.synthetics = synthetic_results;
518 report.verifications = verification_results;
519
520 report.summary.critical_failures = verification_summary.critical_failures;
522 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
523
524 if config.ci.github_annotations {
526 emit_github_annotations(&report);
527 }
528
529 let output = match format {
531 OutputFormat::Json => generate_json_report(&report)?,
532 OutputFormat::GithubSummary => generate_github_summary(&report),
533 OutputFormat::Html => generate_html_report(&report),
534 OutputFormat::Csv => generate_csv_report(&report),
535 OutputFormat::Human => format_human_output(&report),
536 };
537
538 if let Some(ref path) = cli.output {
540 let mut file = std::fs::File::create(path)?;
541 file.write_all(output.as_bytes())?;
542 println!("Report written to: {}", path.display());
543 } else {
544 print!("{}", output);
545 }
546
547 save_baseline_if_needed(cli, config, &report)?;
549
550 let has_crashes = report
552 .results
553 .iter()
554 .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
555
556 if verification_summary.should_fail_ci() || has_crashes {
557 if has_crashes {
558 eprintln!("\nBenchmark(s) crashed during execution");
559 }
560 if verification_summary.should_fail_ci() {
561 eprintln!(
562 "\n{} critical verification failure(s)",
563 verification_summary.critical_failures + verification_summary.critical_errors
564 );
565 }
566 std::process::exit(1);
567 }
568
569 Ok(())
570}
571
572fn compare_benchmarks(
573 cli: &Cli,
574 config: &FluxConfig,
575 git_ref: &str,
576 format: OutputFormat,
577) -> anyhow::Result<()> {
578 let baseline_path = resolve_baseline_path(&cli.baseline, config).ok_or_else(|| {
580 anyhow::anyhow!(
581 "--baseline required for comparison, or use 'compare' command with a git ref"
582 )
583 })?;
584
585 if !baseline_path.exists() {
586 return Err(anyhow::anyhow!(
587 "Baseline file not found: {}",
588 baseline_path.display()
589 ));
590 }
591
592 let baseline_json = std::fs::read_to_string(&baseline_path)?;
593 let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
594 let resolved_git_ref = resolve_git_ref(git_ref)?;
595
596 if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
597 let matches_ref = baseline_commit == resolved_git_ref
598 || baseline_commit.starts_with(&resolved_git_ref)
599 || resolved_git_ref.starts_with(baseline_commit);
600 if !matches_ref {
601 return Err(anyhow::anyhow!(
602 "Baseline commit {} does not match git ref {} ({})",
603 baseline_commit,
604 git_ref,
605 resolved_git_ref
606 ));
607 }
608 } else {
609 eprintln!(
610 "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
611 );
612 }
613
614 println!("Comparing against baseline: {}", baseline_path.display());
615 println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
616
617 let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
619 let benchmarks = filter_benchmarks(cli, &all_benchmarks);
620
621 if benchmarks.is_empty() {
622 println!("No benchmarks found.");
623 return Ok(());
624 }
625
626 let start_time = Instant::now();
627
628 let exec_config = build_execution_config(cli, config);
629
630 let mut executor = Executor::new(exec_config.clone());
631 let results = executor.execute(&benchmarks);
632 let stats = compute_statistics(&results, &exec_config);
633
634 let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
635 let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
636
637 let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
639 apply_baseline_comparison(&mut report, &baseline, regression_threshold);
640
641 let (comparison_results, comparison_series, synthetic_results, verification_results) =
643 execute_verifications(&results, &stats);
644 let verification_summary = aggregate_verifications(&verification_results);
645 report.comparisons = comparison_results;
646 report.comparison_series = comparison_series;
647 report.synthetics = synthetic_results;
648 report.verifications = verification_results;
649 report.summary.critical_failures = verification_summary.critical_failures;
650 report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
651
652 if config.ci.github_annotations {
654 emit_github_annotations(&report);
655 }
656
657 let output = match format {
659 OutputFormat::Json => generate_json_report(&report)?,
660 OutputFormat::GithubSummary => generate_github_summary(&report),
661 OutputFormat::Html => generate_html_report(&report),
662 OutputFormat::Csv => generate_csv_report(&report),
663 OutputFormat::Human => format_comparison_output(&report, &baseline),
664 };
665
666 if let Some(ref path) = cli.output {
667 let mut file = std::fs::File::create(path)?;
668 file.write_all(output.as_bytes())?;
669 println!("Report written to: {}", path.display());
670 } else {
671 print!("{}", output);
672 }
673
674 save_baseline_if_needed(cli, config, &report)?;
676
677 let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
679 if should_fail {
680 if report.summary.regressions > 0 {
681 eprintln!(
682 "\n{} regression(s) detected above {}% threshold",
683 report.summary.regressions, regression_threshold
684 );
685 }
686 if verification_summary.should_fail_ci() {
687 eprintln!(
688 "\n{} critical verification failure(s)",
689 verification_summary.critical_failures + verification_summary.critical_errors
690 );
691 }
692 std::process::exit(1);
693 }
694
695 Ok(())
696}
697
698fn save_baseline_if_needed(
700 cli: &Cli,
701 config: &FluxConfig,
702 report: &fluxbench_report::Report,
703) -> anyhow::Result<()> {
704 let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
706 if !should_save {
707 return Ok(());
708 }
709
710 let path = cli
712 .save_baseline
713 .as_ref()
714 .and_then(|opt| opt.clone())
715 .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
716 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
717
718 if let Some(parent) = path.parent() {
719 std::fs::create_dir_all(parent)?;
720 }
721
722 let json = generate_json_report(report)?;
723 std::fs::write(&path, json)?;
724 eprintln!("Baseline saved to: {}", path.display());
725
726 Ok(())
727}
728
729fn apply_baseline_comparison(
734 report: &mut fluxbench_report::Report,
735 baseline: &fluxbench_report::Report,
736 regression_threshold: f64,
737) {
738 report.baseline_meta = Some(baseline.meta.clone());
739
740 let baseline_map: std::collections::HashMap<_, _> = baseline
741 .results
742 .iter()
743 .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
744 .collect();
745
746 for result in &mut report.results {
747 if let (Some(metrics), Some(baseline_metrics)) =
748 (&result.metrics, baseline_map.get(&result.id))
749 {
750 let effective_threshold = if result.threshold > 0.0 {
752 result.threshold
753 } else {
754 regression_threshold
755 };
756
757 let baseline_mean = baseline_metrics.mean_ns;
758 let absolute_change = metrics.mean_ns - baseline_mean;
759 let relative_change = if baseline_mean > 0.0 {
760 (absolute_change / baseline_mean) * 100.0
761 } else {
762 0.0
763 };
764
765 let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
766 || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
767 let is_significant = relative_change.abs() > effective_threshold && ci_non_overlap;
768
769 if relative_change > effective_threshold {
770 report.summary.regressions += 1;
771 } else if relative_change < -effective_threshold {
772 report.summary.improvements += 1;
773 }
774
775 let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
776 absolute_change / metrics.std_dev_ns
777 } else {
778 0.0
779 };
780 if !effect_size.is_finite() {
781 effect_size = 0.0;
782 }
783
784 let probability_regression = if ci_non_overlap {
785 if relative_change > 0.0 { 0.99 } else { 0.01 }
786 } else if relative_change > 0.0 {
787 0.60
788 } else {
789 0.40
790 };
791
792 result.comparison = Some(fluxbench_report::Comparison {
793 baseline_mean_ns: baseline_mean,
794 absolute_change_ns: absolute_change,
795 relative_change,
796 probability_regression,
797 is_significant,
798 effect_size,
799 });
800 }
801 }
802}
803
804fn resolve_baseline_path(
810 cli_baseline: &Option<Option<PathBuf>>,
811 config: &FluxConfig,
812) -> Option<PathBuf> {
813 match cli_baseline {
814 Some(Some(path)) => Some(path.clone()),
815 Some(None) => {
816 Some(
818 config
819 .output
820 .baseline_path
821 .as_ref()
822 .map(PathBuf::from)
823 .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json")),
824 )
825 }
826 None => None,
827 }
828}
829
830fn emit_github_annotations(report: &fluxbench_report::Report) {
834 for result in &report.results {
836 match result.status {
837 fluxbench_report::BenchmarkStatus::Crashed => {
838 let msg = result
839 .failure
840 .as_ref()
841 .map(|f| f.message.as_str())
842 .unwrap_or("benchmark crashed");
843 println!(
844 "::error file={},line={}::{}: {}",
845 result.file, result.line, result.id, msg
846 );
847 }
848 fluxbench_report::BenchmarkStatus::Failed => {
849 let msg = result
850 .failure
851 .as_ref()
852 .map(|f| f.message.as_str())
853 .unwrap_or("benchmark failed");
854 println!(
855 "::error file={},line={}::{}: {}",
856 result.file, result.line, result.id, msg
857 );
858 }
859 _ => {}
860 }
861
862 if let Some(cmp) = &result.comparison {
864 if cmp.is_significant && cmp.relative_change > 0.0 {
865 println!(
866 "::error file={},line={}::{}: regression {:+.1}% ({} → {})",
867 result.file,
868 result.line,
869 result.id,
870 cmp.relative_change,
871 format_duration(cmp.baseline_mean_ns),
872 result
873 .metrics
874 .as_ref()
875 .map(|m| format_duration(m.mean_ns))
876 .unwrap_or_default(),
877 );
878 }
879 }
880 }
881
882 for v in &report.verifications {
884 match &v.status {
885 fluxbench_logic::VerificationStatus::Failed => {
886 let level = match v.severity {
887 fluxbench_core::Severity::Critical => "error",
888 _ => "warning",
889 };
890 println!("::{}::{}: {}", level, v.id, v.message);
891 }
892 fluxbench_logic::VerificationStatus::Error { message } => {
893 println!("::error::{}: evaluation error: {}", v.id, message);
894 }
895 _ => {}
896 }
897 }
898}
899
900fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
901 let output = std::process::Command::new("git")
902 .args(["rev-parse", "--verify", git_ref])
903 .output()
904 .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
905
906 if !output.status.success() {
907 let stderr = String::from_utf8_lossy(&output.stderr);
908 return Err(anyhow::anyhow!(
909 "Invalid git ref '{}': {}",
910 git_ref,
911 stderr.trim()
912 ));
913 }
914
915 let resolved = String::from_utf8(output.stdout)?.trim().to_string();
916 if resolved.is_empty() {
917 return Err(anyhow::anyhow!(
918 "Git ref '{}' resolved to an empty commit hash",
919 git_ref
920 ));
921 }
922
923 Ok(resolved)
924}
925
926fn format_comparison_output(
928 report: &fluxbench_report::Report,
929 baseline: &fluxbench_report::Report,
930) -> String {
931 let mut output = String::new();
932
933 output.push('\n');
934 output.push_str("FluxBench Comparison Results\n");
935 output.push_str(&"=".repeat(60));
936 output.push_str("\n\n");
937
938 output.push_str(&format!(
939 "Baseline: {} ({})\n",
940 baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
941 baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
942 ));
943 output.push_str(&format!(
944 "Current: {} ({})\n\n",
945 report.meta.git_commit.as_deref().unwrap_or("unknown"),
946 report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
947 ));
948
949 for result in &report.results {
950 let status_icon = match result.status {
951 fluxbench_report::BenchmarkStatus::Passed => "✓",
952 fluxbench_report::BenchmarkStatus::Failed => "✗",
953 fluxbench_report::BenchmarkStatus::Crashed => "💥",
954 fluxbench_report::BenchmarkStatus::Skipped => "⊘",
955 };
956
957 output.push_str(&format!("{} {}\n", status_icon, result.id));
958
959 if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
960 let change_icon = if comparison.relative_change > 5.0 {
961 "📈 REGRESSION"
962 } else if comparison.relative_change < -5.0 {
963 "📉 improvement"
964 } else {
965 "≈ no change"
966 };
967
968 output.push_str(&format!(
969 " baseline: {} → current: {}\n",
970 format_duration(comparison.baseline_mean_ns),
971 format_duration(metrics.mean_ns),
972 ));
973 output.push_str(&format!(
974 " change: {:+.2}% ({}) {}\n",
975 comparison.relative_change,
976 format_duration(comparison.absolute_change_ns.abs()),
977 change_icon,
978 ));
979 }
980
981 output.push('\n');
982 }
983
984 output.push_str("Summary\n");
986 output.push_str(&"-".repeat(60));
987 output.push('\n');
988 output.push_str(&format!(
989 " Regressions: {} Improvements: {} No Change: {}\n",
990 report.summary.regressions,
991 report.summary.improvements,
992 report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
993 ));
994
995 output
996}
997
998#[cfg(test)]
999mod tests {
1000 use super::*;
1001 use fluxbench_report::{
1002 BenchmarkMetrics, BenchmarkReportResult, BenchmarkStatus, Report, ReportConfig, ReportMeta,
1003 ReportSummary, SystemInfo,
1004 };
1005
1006 fn dummy_meta() -> ReportMeta {
1007 ReportMeta {
1008 schema_version: 1,
1009 version: "0.1.0".to_string(),
1010 timestamp: chrono::Utc::now(),
1011 git_commit: None,
1012 git_branch: None,
1013 system: SystemInfo {
1014 os: "linux".to_string(),
1015 os_version: "6.0".to_string(),
1016 cpu: "test".to_string(),
1017 cpu_cores: 1,
1018 memory_gb: 1.0,
1019 },
1020 config: ReportConfig {
1021 warmup_time_ns: 0,
1022 measurement_time_ns: 0,
1023 min_iterations: None,
1024 max_iterations: None,
1025 bootstrap_iterations: 0,
1026 confidence_level: 0.95,
1027 track_allocations: false,
1028 },
1029 }
1030 }
1031
1032 fn dummy_metrics(mean: f64) -> BenchmarkMetrics {
1033 BenchmarkMetrics {
1034 samples: 100,
1035 mean_ns: mean,
1036 median_ns: mean,
1037 std_dev_ns: mean * 0.01,
1038 min_ns: mean * 0.9,
1039 max_ns: mean * 1.1,
1040 p50_ns: mean,
1041 p90_ns: mean * 1.05,
1042 p95_ns: mean * 1.07,
1043 p99_ns: mean * 1.09,
1044 p999_ns: mean * 1.1,
1045 skewness: 0.0,
1046 kurtosis: 3.0,
1047 ci_lower_ns: mean * 0.98,
1048 ci_upper_ns: mean * 1.02,
1049 ci_level: 0.95,
1050 throughput_ops_sec: None,
1051 alloc_bytes: 0,
1052 alloc_count: 0,
1053 mean_cycles: 0.0,
1054 median_cycles: 0.0,
1055 min_cycles: 0,
1056 max_cycles: 0,
1057 cycles_per_ns: 0.0,
1058 }
1059 }
1060
1061 fn dummy_result(id: &str, mean: f64, threshold: f64) -> BenchmarkReportResult {
1062 BenchmarkReportResult {
1063 id: id.to_string(),
1064 name: id.to_string(),
1065 group: "test".to_string(),
1066 status: BenchmarkStatus::Passed,
1067 severity: fluxbench_core::Severity::Warning,
1068 file: "test.rs".to_string(),
1069 line: 1,
1070 metrics: Some(dummy_metrics(mean)),
1071 threshold,
1072 comparison: None,
1073 failure: None,
1074 }
1075 }
1076
1077 fn dummy_report(results: Vec<BenchmarkReportResult>) -> Report {
1078 let total = results.len();
1079 Report {
1080 meta: dummy_meta(),
1081 results,
1082 comparisons: vec![],
1083 comparison_series: vec![],
1084 synthetics: vec![],
1085 verifications: vec![],
1086 summary: ReportSummary {
1087 total_benchmarks: total,
1088 passed: total,
1089 ..Default::default()
1090 },
1091 baseline_meta: None,
1092 }
1093 }
1094
1095 #[test]
1096 fn per_bench_threshold_overrides_global() {
1097 let mut report = dummy_report(vec![dummy_result("fast_bench", 108.0, 5.0)]);
1101 let baseline = dummy_report(vec![dummy_result("fast_bench", 100.0, 5.0)]);
1102
1103 apply_baseline_comparison(&mut report, &baseline, 25.0);
1104
1105 assert_eq!(
1106 report.summary.regressions, 1,
1107 "per-bench 5% should catch 8% regression"
1108 );
1109 let cmp = report.results[0].comparison.as_ref().unwrap();
1110 assert!(cmp.is_significant);
1111 }
1112
1113 #[test]
1114 fn zero_threshold_falls_back_to_global() {
1115 let mut report = dummy_report(vec![dummy_result("normal_bench", 108.0, 0.0)]);
1119 let baseline = dummy_report(vec![dummy_result("normal_bench", 100.0, 0.0)]);
1120
1121 apply_baseline_comparison(&mut report, &baseline, 25.0);
1122
1123 assert_eq!(
1124 report.summary.regressions, 0,
1125 "8% under 25% global should not regress"
1126 );
1127 let cmp = report.results[0].comparison.as_ref().unwrap();
1128 assert!(!cmp.is_significant);
1129 }
1130
1131 #[test]
1132 fn mixed_thresholds_independent() {
1133 let mut report = dummy_report(vec![
1136 dummy_result("tight", 108.0, 5.0), dummy_result("loose", 108.0, 0.0), ]);
1139 let baseline = dummy_report(vec![
1140 dummy_result("tight", 100.0, 5.0),
1141 dummy_result("loose", 100.0, 0.0),
1142 ]);
1143
1144 apply_baseline_comparison(&mut report, &baseline, 25.0);
1145
1146 assert_eq!(report.summary.regressions, 1);
1147 assert!(
1148 report.results[0]
1149 .comparison
1150 .as_ref()
1151 .unwrap()
1152 .is_significant
1153 );
1154 assert!(
1155 !report.results[1]
1156 .comparison
1157 .as_ref()
1158 .unwrap()
1159 .is_significant
1160 );
1161 }
1162
1163 #[test]
1164 fn per_bench_threshold_detects_improvement() {
1165 let mut report = dummy_report(vec![dummy_result("improving", 90.0, 5.0)]);
1168 let baseline = dummy_report(vec![dummy_result("improving", 100.0, 5.0)]);
1169
1170 apply_baseline_comparison(&mut report, &baseline, 25.0);
1171
1172 assert_eq!(report.summary.improvements, 1);
1173 assert_eq!(report.summary.regressions, 0);
1174 }
1175}