Skip to main content

fluxbench_cli/
lib.rs

1#![warn(missing_docs)]
2//! FluxBench CLI Library
3//!
4//! This module provides the CLI infrastructure for benchmark binaries.
5//! Use `fluxbench::run()` (or `fluxbench_cli::run()`) in your main function to get the full
6//! fluxbench CLI experience with your registered benchmarks.
7//!
8//! # Example
9//!
10//! ```ignore
11//! use fluxbench::prelude::*;
12//!
13//! #[bench]
14//! fn my_benchmark(b: &mut Bencher) {
15//!     b.iter(|| expensive_operation());
16//! }
17//!
18//! fn main() {
19//!     fluxbench_cli::run();
20//! }
21//! ```
22
23mod config;
24mod executor;
25mod planner;
26mod supervisor;
27
28pub use config::*;
29pub use executor::{
30    ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
31    execute_verifications, format_human_output,
32};
33pub use supervisor::*;
34
35use clap::{Parser, Subcommand};
36use fluxbench_core::{BenchmarkDef, WorkerMain};
37use fluxbench_logic::aggregate_verifications;
38use fluxbench_report::{
39    OutputFormat, generate_csv_report, generate_github_summary, generate_html_report,
40    generate_json_report,
41};
42use rayon::ThreadPoolBuilder;
43use regex::Regex;
44use std::io::Write;
45use std::path::PathBuf;
46use std::time::Instant;
47
48/// FluxBench CLI arguments
49#[derive(Parser, Debug)]
50#[command(name = "fluxbench")]
51#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
52pub struct Cli {
53    /// Optional subcommand (List, Run, Compare); defaults to Run
54    #[command(subcommand)]
55    pub command: Option<Commands>,
56
57    /// Filter benchmarks by regex pattern
58    #[arg(default_value = ".*")]
59    pub filter: String,
60
61    /// Output format: json, github-summary, csv, html, human
62    #[arg(long, default_value = "human")]
63    pub format: String,
64
65    /// Output file (stdout if not specified)
66    #[arg(short, long)]
67    pub output: Option<PathBuf>,
68
69    /// Load baseline for comparison
70    #[arg(long)]
71    pub baseline: Option<PathBuf>,
72
73    /// Dry run - list benchmarks without executing
74    #[arg(long)]
75    pub dry_run: bool,
76
77    /// Regression threshold percentage
78    #[arg(long)]
79    pub threshold: Option<f64>,
80
81    /// Run benchmarks for this group only
82    #[arg(long)]
83    pub group: Option<String>,
84
85    /// Filter by tag
86    #[arg(long)]
87    pub tag: Option<String>,
88
89    /// Skip benchmarks with this tag
90    #[arg(long)]
91    pub skip_tag: Option<String>,
92
93    /// Warmup time in seconds
94    #[arg(long, default_value = "3")]
95    pub warmup: u64,
96
97    /// Measurement time in seconds
98    #[arg(long, default_value = "5")]
99    pub measurement: u64,
100
101    /// Fixed sample count mode: skip warmup, run exactly N iterations
102    /// Each iteration becomes one sample. Overrides warmup/measurement/min/max.
103    #[arg(long, short = 'n')]
104    pub samples: Option<u64>,
105
106    /// Minimum number of iterations
107    #[arg(long)]
108    pub min_iterations: Option<u64>,
109
110    /// Maximum number of iterations
111    #[arg(long)]
112    pub max_iterations: Option<u64>,
113
114    /// Verbose output
115    #[arg(short, long)]
116    pub verbose: bool,
117
118    /// Run benchmarks in isolated worker processes (default: true)
119    /// Use --isolated=false to disable and run in-process
120    #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
121    pub isolated: bool,
122
123    /// Use fresh worker process for each benchmark (One-Shot mode)
124    /// Default is Persistent mode: reuse worker for safe Rust code
125    #[arg(long)]
126    pub one_shot: bool,
127
128    /// Worker timeout in seconds
129    #[arg(long, default_value = "60")]
130    pub worker_timeout: u64,
131
132    /// Number of parallel isolated workers
133    #[arg(long, default_value = "1")]
134    pub jobs: usize,
135
136    /// Number of threads for parallel statistics computation
137    /// 0 = use all available cores (default), 1 = single-threaded
138    #[arg(long, short = 'j', default_value = "0")]
139    pub threads: usize,
140
141    /// Internal: Run as worker process (used by supervisor)
142    #[arg(long, hide = true)]
143    pub flux_worker: bool,
144
145    /// Save benchmark results as baseline JSON
146    /// Optionally specify a path; defaults to config or target/fluxbench/baseline.json
147    #[arg(long)]
148    pub save_baseline: Option<Option<PathBuf>>,
149
150    /// Internal: Absorb cargo bench's --bench flag
151    #[arg(long, hide = true)]
152    pub bench: bool,
153}
154
155/// CLI subcommands
156#[derive(Subcommand, Debug)]
157pub enum Commands {
158    /// List all discovered benchmarks
159    List,
160    /// Run benchmarks (default)
161    Run,
162    /// Compare against a git ref
163    Compare {
164        /// Git ref to compare against (e.g., origin/main)
165        #[arg(name = "REF")]
166        git_ref: String,
167    },
168}
169
170/// Run the FluxBench CLI with the given arguments.
171/// This is the main entry point for benchmark binaries.
172///
173/// # Returns
174/// Returns `Ok(())` on success, or an error if something goes wrong.
175pub fn run() -> anyhow::Result<()> {
176    let cli = Cli::parse();
177    run_with_cli(cli)
178}
179
180/// Run the FluxBench CLI with pre-parsed arguments.
181pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
182    // Handle worker mode first (before any other initialization)
183    if cli.flux_worker {
184        return run_worker_mode();
185    }
186
187    // Initialize logging
188    if cli.verbose {
189        tracing_subscriber::fmt()
190            .with_env_filter("fluxbench=debug")
191            .init();
192    } else {
193        tracing_subscriber::fmt()
194            .with_env_filter("fluxbench=info")
195            .init();
196    }
197
198    // Discover flux.toml configuration (CLI flags override)
199    let config = FluxConfig::discover().unwrap_or_default();
200
201    // Parse output format
202    let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
203
204    // Resolve jobs: CLI wins if explicitly set (not default 1), else flux.toml, else 1
205    let jobs = if cli.jobs != 1 {
206        cli.jobs
207    } else {
208        config.runner.jobs.unwrap_or(1)
209    };
210
211    match cli.command {
212        Some(Commands::List) => {
213            list_benchmarks(&cli)?;
214        }
215        Some(Commands::Run) => {
216            run_benchmarks(&cli, &config, format, jobs)?;
217        }
218        Some(Commands::Compare { ref git_ref }) => {
219            compare_benchmarks(&cli, &config, git_ref, format)?;
220        }
221        None => {
222            // Default: run benchmarks
223            if cli.dry_run {
224                list_benchmarks(&cli)?;
225            } else {
226                run_benchmarks(&cli, &config, format, jobs)?;
227            }
228        }
229    }
230
231    Ok(())
232}
233
234/// Run as a worker process (IPC mode)
235fn run_worker_mode() -> anyhow::Result<()> {
236    let mut worker = WorkerMain::new();
237    worker
238        .run()
239        .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
240}
241
242/// Filter benchmarks based on CLI options using the planner module.
243///
244/// Returns benchmarks sorted alphabetically by ID for deterministic execution.
245fn filter_benchmarks(
246    cli: &Cli,
247    benchmarks: &[&'static BenchmarkDef],
248) -> Vec<&'static BenchmarkDef> {
249    let filter_re = Regex::new(&cli.filter).ok();
250
251    let plan = planner::build_plan(
252        benchmarks.iter().copied(),
253        filter_re.as_ref(),
254        cli.group.as_deref(),
255        cli.tag.as_deref(),
256        cli.skip_tag.as_deref(),
257    );
258
259    plan.benchmarks
260}
261
262fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
263    println!("FluxBench Plan:");
264
265    let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
266    let benchmarks = filter_benchmarks(cli, &all_benchmarks);
267
268    let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
269        std::collections::BTreeMap::new();
270
271    for bench in &benchmarks {
272        groups.entry(bench.group).or_default().push(bench);
273    }
274
275    let mut total = 0;
276    for (group, benches) in &groups {
277        println!("├── group: {}", group);
278        for bench in benches {
279            let tags = if bench.tags.is_empty() {
280                String::new()
281            } else {
282                format!(" [{}]", bench.tags.join(", "))
283            };
284            println!(
285                "│   ├── {}{} ({}:{})",
286                bench.id, tags, bench.file, bench.line
287            );
288            total += 1;
289        }
290    }
291
292    println!("{} benchmarks found.", total);
293
294    // Show all available tags across the entire suite (not just filtered results)
295    // so users can discover what tags they can filter by.
296    let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
297    for bench in &all_benchmarks {
298        for tag in bench.tags {
299            *tag_counts.entry(tag).or_default() += 1;
300        }
301    }
302    if !tag_counts.is_empty() {
303        let tags_display: Vec<String> = tag_counts
304            .iter()
305            .map(|(tag, count)| format!("{} ({})", tag, count))
306            .collect();
307        println!("Tags: {}", tags_display.join(", "));
308    }
309
310    Ok(())
311}
312
313/// Build an ExecutionConfig by layering: flux.toml defaults → CLI overrides.
314fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
315    // Start from flux.toml values (parsed durations fall back to defaults on error)
316    let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
317    let measurement_ns =
318        FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
319
320    // CLI flags override config file values.
321    // clap defaults are warmup=3, measurement=5, so we check if the user explicitly
322    // passed different values by comparing against clap defaults. If the CLI value
323    // differs from clap's default, the user explicitly set it and it wins.
324    let warmup_time_ns = if cli.warmup != 3 {
325        cli.warmup * 1_000_000_000
326    } else {
327        warmup_ns
328    };
329    let measurement_time_ns = if cli.measurement != 5 {
330        cli.measurement * 1_000_000_000
331    } else {
332        measurement_ns
333    };
334
335    // --samples N: fixed-count mode, no warmup, each iteration = one sample
336    // CLI wins, then flux.toml
337    if let Some(n) = cli.samples.or(config.runner.samples) {
338        return ExecutionConfig {
339            warmup_time_ns: 0,
340            measurement_time_ns: 0,
341            min_iterations: Some(n),
342            max_iterations: Some(n),
343            track_allocations: config.allocator.track,
344            bootstrap_iterations: config.runner.bootstrap_iterations,
345            confidence_level: config.runner.confidence_level,
346        };
347    }
348
349    // min/max iterations: CLI wins if set, else config, else default
350    let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
351    let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
352
353    ExecutionConfig {
354        warmup_time_ns,
355        measurement_time_ns,
356        min_iterations,
357        max_iterations,
358        track_allocations: config.allocator.track,
359        bootstrap_iterations: config.runner.bootstrap_iterations,
360        confidence_level: config.runner.confidence_level,
361    }
362}
363
364fn run_benchmarks(
365    cli: &Cli,
366    config: &FluxConfig,
367    format: OutputFormat,
368    jobs: usize,
369) -> anyhow::Result<()> {
370    let jobs = jobs.max(1);
371
372    // Configure Rayon thread pool for statistics computation
373    if cli.threads > 0 {
374        ThreadPoolBuilder::new()
375            .num_threads(cli.threads)
376            .build_global()
377            .ok();
378    }
379
380    // Discover benchmarks
381    let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
382    let benchmarks = filter_benchmarks(cli, &all_benchmarks);
383
384    if benchmarks.is_empty() {
385        // If filtering by tag and no matches, check if the tag exists at all
386        if let Some(ref tag) = cli.tag {
387            let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
388                .iter()
389                .flat_map(|b| b.tags.iter().copied())
390                .collect();
391            if !all_tags.contains(tag.as_str()) {
392                let available: Vec<&str> = all_tags.into_iter().collect();
393                eprintln!(
394                    "Warning: tag '{}' not found. Available tags: {}",
395                    tag,
396                    available.join(", ")
397                );
398            }
399        }
400        println!("No benchmarks found.");
401        return Ok(());
402    }
403
404    // Determine isolation mode: flux.toml can override CLI default
405    let isolated = if config.runner.isolation.is_isolated() {
406        cli.isolated
407    } else {
408        false
409    };
410
411    let threads_str = if cli.threads == 0 {
412        "all".to_string()
413    } else {
414        cli.threads.to_string()
415    };
416    let mode_str = if isolated {
417        if cli.one_shot {
418            " (isolated, one-shot)"
419        } else {
420            " (isolated, persistent)"
421        }
422    } else {
423        " (in-process)"
424    };
425    println!(
426        "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
427        benchmarks.len(),
428        mode_str,
429        threads_str,
430        jobs
431    );
432
433    let start_time = Instant::now();
434
435    // Build execution config from flux.toml + CLI overrides
436    let exec_config = build_execution_config(cli, config);
437
438    if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
439        eprintln!(
440            "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
441             Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
442            exec_config.bootstrap_iterations
443        );
444    }
445
446    // Execute benchmarks (isolated by default per TDD)
447    let results = if isolated {
448        let timeout = std::time::Duration::from_secs(cli.worker_timeout);
449        let reuse_workers = !cli.one_shot;
450        let isolated_executor =
451            IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
452        isolated_executor.execute(&benchmarks)
453    } else {
454        if jobs > 1 {
455            eprintln!(
456                "Warning: --jobs currently applies only to isolated mode; running in-process serially."
457            );
458        }
459        let mut executor = Executor::new(exec_config.clone());
460        executor.execute(&benchmarks)
461    };
462
463    // Compute statistics
464    let stats = compute_statistics(&results, &exec_config);
465
466    // Warn if allocation tracking is enabled but nothing was recorded
467    if exec_config.track_allocations
468        && !results.is_empty()
469        && results
470            .iter()
471            .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
472    {
473        eprintln!(
474            "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
475             Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
476        );
477    }
478
479    // Build report
480    let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
481    let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
482
483    // Run comparisons, synthetics, and verifications
484    let (comparison_results, comparison_series, synthetic_results, verification_results) =
485        execute_verifications(&results, &stats);
486    let verification_summary = aggregate_verifications(&verification_results);
487    report.comparisons = comparison_results;
488    report.comparison_series = comparison_series;
489    report.synthetics = synthetic_results;
490    report.verifications = verification_results;
491
492    // Update summary with verification info
493    report.summary.critical_failures = verification_summary.critical_failures;
494    report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
495
496    // Generate output
497    let output = match format {
498        OutputFormat::Json => generate_json_report(&report)?,
499        OutputFormat::GithubSummary => generate_github_summary(&report),
500        OutputFormat::Html => generate_html_report(&report),
501        OutputFormat::Csv => generate_csv_report(&report),
502        OutputFormat::Human => format_human_output(&report),
503    };
504
505    // Write output
506    if let Some(ref path) = cli.output {
507        let mut file = std::fs::File::create(path)?;
508        file.write_all(output.as_bytes())?;
509        println!("Report written to: {}", path.display());
510    } else {
511        print!("{}", output);
512    }
513
514    // Save baseline if requested
515    save_baseline_if_needed(cli, config, &report)?;
516
517    // Exit with appropriate code
518    let has_crashes = report
519        .results
520        .iter()
521        .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
522
523    if verification_summary.should_fail_ci() || has_crashes {
524        if has_crashes {
525            eprintln!("\nBenchmark(s) crashed during execution");
526        }
527        if verification_summary.should_fail_ci() {
528            eprintln!(
529                "\n{} critical verification failure(s)",
530                verification_summary.critical_failures + verification_summary.critical_errors
531            );
532        }
533        std::process::exit(1);
534    }
535
536    Ok(())
537}
538
539fn compare_benchmarks(
540    cli: &Cli,
541    config: &FluxConfig,
542    git_ref: &str,
543    format: OutputFormat,
544) -> anyhow::Result<()> {
545    // Load baseline
546    let baseline_path = cli.baseline.as_ref().ok_or_else(|| {
547        anyhow::anyhow!(
548            "--baseline required for comparison, or use 'compare' command with a git ref"
549        )
550    })?;
551
552    if !baseline_path.exists() {
553        return Err(anyhow::anyhow!(
554            "Baseline file not found: {}",
555            baseline_path.display()
556        ));
557    }
558
559    let baseline_json = std::fs::read_to_string(baseline_path)?;
560    let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
561    let resolved_git_ref = resolve_git_ref(git_ref)?;
562
563    if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
564        let matches_ref = baseline_commit == resolved_git_ref
565            || baseline_commit.starts_with(&resolved_git_ref)
566            || resolved_git_ref.starts_with(baseline_commit);
567        if !matches_ref {
568            return Err(anyhow::anyhow!(
569                "Baseline commit {} does not match git ref {} ({})",
570                baseline_commit,
571                git_ref,
572                resolved_git_ref
573            ));
574        }
575    } else {
576        eprintln!(
577            "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
578        );
579    }
580
581    println!("Comparing against baseline: {}", baseline_path.display());
582    println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
583
584    // Run current benchmarks
585    let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
586    let benchmarks = filter_benchmarks(cli, &all_benchmarks);
587
588    if benchmarks.is_empty() {
589        println!("No benchmarks found.");
590        return Ok(());
591    }
592
593    let start_time = Instant::now();
594
595    let exec_config = build_execution_config(cli, config);
596
597    let mut executor = Executor::new(exec_config.clone());
598    let results = executor.execute(&benchmarks);
599    let stats = compute_statistics(&results, &exec_config);
600
601    let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
602    let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
603
604    // Add comparison data
605    let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
606    let baseline_map: std::collections::HashMap<_, _> = baseline
607        .results
608        .iter()
609        .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
610        .collect();
611
612    for result in &mut report.results {
613        if let (Some(metrics), Some(baseline_metrics)) =
614            (&result.metrics, baseline_map.get(&result.id))
615        {
616            let baseline_mean = baseline_metrics.mean_ns;
617            let absolute_change = metrics.mean_ns - baseline_mean;
618            let relative_change = if baseline_mean > 0.0 {
619                (absolute_change / baseline_mean) * 100.0
620            } else {
621                0.0
622            };
623
624            // Determine significance via CI non-overlap and threshold crossing.
625            let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
626                || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
627            let is_significant = relative_change.abs() > regression_threshold && ci_non_overlap;
628
629            // Track regressions/improvements
630            if relative_change > regression_threshold {
631                report.summary.regressions += 1;
632            } else if relative_change < -regression_threshold {
633                report.summary.improvements += 1;
634            }
635
636            let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
637                absolute_change / metrics.std_dev_ns
638            } else {
639                0.0
640            };
641            if !effect_size.is_finite() {
642                effect_size = 0.0;
643            }
644
645            let probability_regression = if ci_non_overlap {
646                if relative_change > 0.0 { 0.99 } else { 0.01 }
647            } else if relative_change > 0.0 {
648                0.60
649            } else {
650                0.40
651            };
652
653            result.comparison = Some(fluxbench_report::Comparison {
654                baseline_mean_ns: baseline_mean,
655                absolute_change_ns: absolute_change,
656                relative_change,
657                probability_regression,
658                is_significant,
659                effect_size,
660            });
661        }
662    }
663
664    // Run comparisons, synthetics, and verifications
665    let (comparison_results, comparison_series, synthetic_results, verification_results) =
666        execute_verifications(&results, &stats);
667    let verification_summary = aggregate_verifications(&verification_results);
668    report.comparisons = comparison_results;
669    report.comparison_series = comparison_series;
670    report.synthetics = synthetic_results;
671    report.verifications = verification_results;
672    report.summary.critical_failures = verification_summary.critical_failures;
673    report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
674
675    // Generate output
676    let output = match format {
677        OutputFormat::Json => generate_json_report(&report)?,
678        OutputFormat::GithubSummary => generate_github_summary(&report),
679        OutputFormat::Html => generate_html_report(&report),
680        OutputFormat::Csv => generate_csv_report(&report),
681        OutputFormat::Human => format_comparison_output(&report, &baseline),
682    };
683
684    if let Some(ref path) = cli.output {
685        let mut file = std::fs::File::create(path)?;
686        file.write_all(output.as_bytes())?;
687        println!("Report written to: {}", path.display());
688    } else {
689        print!("{}", output);
690    }
691
692    // Save baseline if requested
693    save_baseline_if_needed(cli, config, &report)?;
694
695    // Exit with error if regressions exceed threshold or verifications fail
696    let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
697    if should_fail {
698        if report.summary.regressions > 0 {
699            eprintln!(
700                "\n{} regression(s) detected above {}% threshold",
701                report.summary.regressions, regression_threshold
702            );
703        }
704        if verification_summary.should_fail_ci() {
705            eprintln!(
706                "\n{} critical verification failure(s)",
707                verification_summary.critical_failures + verification_summary.critical_errors
708            );
709        }
710        std::process::exit(1);
711    }
712
713    Ok(())
714}
715
716/// Save the report as a baseline JSON file if configured.
717fn save_baseline_if_needed(
718    cli: &Cli,
719    config: &FluxConfig,
720    report: &fluxbench_report::Report,
721) -> anyhow::Result<()> {
722    // Determine if we should save: CLI --save-baseline flag or config.output.save_baseline
723    let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
724    if !should_save {
725        return Ok(());
726    }
727
728    // Resolve path: CLI value > config value > default
729    let path = cli
730        .save_baseline
731        .as_ref()
732        .and_then(|opt| opt.clone())
733        .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
734        .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
735
736    if let Some(parent) = path.parent() {
737        std::fs::create_dir_all(parent)?;
738    }
739
740    let json = generate_json_report(report)?;
741    std::fs::write(&path, json)?;
742    eprintln!("Baseline saved to: {}", path.display());
743
744    Ok(())
745}
746
747fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
748    let output = std::process::Command::new("git")
749        .args(["rev-parse", "--verify", git_ref])
750        .output()
751        .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
752
753    if !output.status.success() {
754        let stderr = String::from_utf8_lossy(&output.stderr);
755        return Err(anyhow::anyhow!(
756            "Invalid git ref '{}': {}",
757            git_ref,
758            stderr.trim()
759        ));
760    }
761
762    let resolved = String::from_utf8(output.stdout)?.trim().to_string();
763    if resolved.is_empty() {
764        return Err(anyhow::anyhow!(
765            "Git ref '{}' resolved to an empty commit hash",
766            git_ref
767        ));
768    }
769
770    Ok(resolved)
771}
772
773/// Format comparison output for human display
774fn format_comparison_output(
775    report: &fluxbench_report::Report,
776    baseline: &fluxbench_report::Report,
777) -> String {
778    let mut output = String::new();
779
780    output.push('\n');
781    output.push_str("FluxBench Comparison Results\n");
782    output.push_str(&"=".repeat(60));
783    output.push_str("\n\n");
784
785    output.push_str(&format!(
786        "Baseline: {} ({})\n",
787        baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
788        baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
789    ));
790    output.push_str(&format!(
791        "Current:  {} ({})\n\n",
792        report.meta.git_commit.as_deref().unwrap_or("unknown"),
793        report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
794    ));
795
796    for result in &report.results {
797        let status_icon = match result.status {
798            fluxbench_report::BenchmarkStatus::Passed => "✓",
799            fluxbench_report::BenchmarkStatus::Failed => "✗",
800            fluxbench_report::BenchmarkStatus::Crashed => "💥",
801            fluxbench_report::BenchmarkStatus::Skipped => "⊘",
802        };
803
804        output.push_str(&format!("{} {}\n", status_icon, result.id));
805
806        if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
807            let change_icon = if comparison.relative_change > 5.0 {
808                "📈 REGRESSION"
809            } else if comparison.relative_change < -5.0 {
810                "📉 improvement"
811            } else {
812                "≈ no change"
813            };
814
815            output.push_str(&format!(
816                "    baseline: {:.2} ns → current: {:.2} ns\n",
817                comparison.baseline_mean_ns, metrics.mean_ns
818            ));
819            output.push_str(&format!(
820                "    change: {:+.2}% ({:+.2} ns) {}\n",
821                comparison.relative_change, comparison.absolute_change_ns, change_icon
822            ));
823        }
824
825        output.push('\n');
826    }
827
828    // Summary
829    output.push_str("Summary\n");
830    output.push_str(&"-".repeat(60));
831    output.push('\n');
832    output.push_str(&format!(
833        "  Regressions: {}  Improvements: {}  No Change: {}\n",
834        report.summary.regressions,
835        report.summary.improvements,
836        report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
837    ));
838
839    output
840}