Skip to main content

fluxbench_cli/
lib.rs

1//! FluxBench CLI Library
2//!
3//! This module provides the CLI infrastructure for benchmark binaries.
4//! Use `fluxbench::run()` (or `fluxbench_cli::run()`) in your main function to get the full
5//! fluxbench CLI experience with your registered benchmarks.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use fluxbench::prelude::*;
11//!
12//! #[bench]
13//! fn my_benchmark(b: &mut Bencher) {
14//!     b.iter(|| expensive_operation());
15//! }
16//!
17//! fn main() {
18//!     fluxbench_cli::run();
19//! }
20//! ```
21
22mod config;
23mod executor;
24mod planner;
25mod supervisor;
26
27pub use config::*;
28pub use executor::{
29    ExecutionConfig, Executor, IsolatedExecutor, build_report, compute_statistics,
30    execute_verifications, format_human_output,
31};
32pub use supervisor::*;
33
34use clap::{Parser, Subcommand};
35use fluxbench_core::{BenchmarkDef, WorkerMain};
36use fluxbench_logic::aggregate_verifications;
37use fluxbench_report::{
38    OutputFormat, generate_csv_report, generate_github_summary, generate_html_report,
39    generate_json_report,
40};
41use rayon::ThreadPoolBuilder;
42use regex::Regex;
43use std::io::Write;
44use std::path::PathBuf;
45use std::time::Instant;
46
47/// FluxBench CLI arguments
48#[derive(Parser, Debug)]
49#[command(name = "fluxbench")]
50#[command(author, version, about = "FluxBench - benchmarking framework for Rust")]
51pub struct Cli {
52    #[command(subcommand)]
53    pub command: Option<Commands>,
54
55    /// Filter benchmarks by regex pattern
56    #[arg(default_value = ".*")]
57    pub filter: String,
58
59    /// Output format: json, github-summary, csv, html, human
60    #[arg(long, default_value = "human")]
61    pub format: String,
62
63    /// Output file (stdout if not specified)
64    #[arg(short, long)]
65    pub output: Option<PathBuf>,
66
67    /// Load baseline for comparison
68    #[arg(long)]
69    pub baseline: Option<PathBuf>,
70
71    /// Dry run - list benchmarks without executing
72    #[arg(long)]
73    pub dry_run: bool,
74
75    /// Regression threshold percentage
76    #[arg(long)]
77    pub threshold: Option<f64>,
78
79    /// Run benchmarks for this group only
80    #[arg(long)]
81    pub group: Option<String>,
82
83    /// Filter by tag
84    #[arg(long)]
85    pub tag: Option<String>,
86
87    /// Skip benchmarks with this tag
88    #[arg(long)]
89    pub skip_tag: Option<String>,
90
91    /// Warmup time in seconds
92    #[arg(long, default_value = "3")]
93    pub warmup: u64,
94
95    /// Measurement time in seconds
96    #[arg(long, default_value = "5")]
97    pub measurement: u64,
98
99    /// Fixed sample count mode: skip warmup, run exactly N iterations
100    /// Each iteration becomes one sample. Overrides warmup/measurement/min/max.
101    #[arg(long, short = 'n')]
102    pub samples: Option<u64>,
103
104    /// Minimum number of iterations
105    #[arg(long)]
106    pub min_iterations: Option<u64>,
107
108    /// Maximum number of iterations
109    #[arg(long)]
110    pub max_iterations: Option<u64>,
111
112    /// Verbose output
113    #[arg(short, long)]
114    pub verbose: bool,
115
116    /// Run benchmarks in isolated worker processes (default: true)
117    /// Use --isolated=false to disable and run in-process
118    #[arg(long, default_value = "true", action = clap::ArgAction::Set)]
119    pub isolated: bool,
120
121    /// Use fresh worker process for each benchmark (One-Shot mode)
122    /// Default is Persistent mode: reuse worker for safe Rust code
123    #[arg(long)]
124    pub one_shot: bool,
125
126    /// Worker timeout in seconds
127    #[arg(long, default_value = "60")]
128    pub worker_timeout: u64,
129
130    /// Number of parallel isolated workers
131    #[arg(long, default_value = "1")]
132    pub jobs: usize,
133
134    /// Number of threads for parallel statistics computation
135    /// 0 = use all available cores (default), 1 = single-threaded
136    #[arg(long, short = 'j', default_value = "0")]
137    pub threads: usize,
138
139    /// Internal: Run as worker process (used by supervisor)
140    #[arg(long, hide = true)]
141    pub flux_worker: bool,
142
143    /// Save benchmark results as baseline JSON
144    /// Optionally specify a path; defaults to config or target/fluxbench/baseline.json
145    #[arg(long)]
146    pub save_baseline: Option<Option<PathBuf>>,
147
148    /// Internal: Absorb cargo bench's --bench flag
149    #[arg(long, hide = true)]
150    pub bench: bool,
151}
152
153/// CLI subcommands
154#[derive(Subcommand, Debug)]
155pub enum Commands {
156    /// List all discovered benchmarks
157    List,
158    /// Run benchmarks (default)
159    Run,
160    /// Compare against a git ref
161    Compare {
162        /// Git ref to compare against (e.g., origin/main)
163        #[arg(name = "REF")]
164        git_ref: String,
165    },
166}
167
168/// Run the FluxBench CLI with the given arguments.
169/// This is the main entry point for benchmark binaries.
170///
171/// # Returns
172/// Returns `Ok(())` on success, or an error if something goes wrong.
173pub fn run() -> anyhow::Result<()> {
174    let cli = Cli::parse();
175    run_with_cli(cli)
176}
177
178/// Run the FluxBench CLI with pre-parsed arguments.
179pub fn run_with_cli(cli: Cli) -> anyhow::Result<()> {
180    // Handle worker mode first (before any other initialization)
181    if cli.flux_worker {
182        return run_worker_mode();
183    }
184
185    // Initialize logging
186    if cli.verbose {
187        tracing_subscriber::fmt()
188            .with_env_filter("fluxbench=debug")
189            .init();
190    } else {
191        tracing_subscriber::fmt()
192            .with_env_filter("fluxbench=info")
193            .init();
194    }
195
196    // Discover flux.toml configuration (CLI flags override)
197    let config = FluxConfig::discover().unwrap_or_default();
198
199    // Parse output format
200    let format: OutputFormat = cli.format.parse().unwrap_or(OutputFormat::Human);
201
202    // Resolve jobs: CLI wins if explicitly set (not default 1), else flux.toml, else 1
203    let jobs = if cli.jobs != 1 {
204        cli.jobs
205    } else {
206        config.runner.jobs.unwrap_or(1)
207    };
208
209    match cli.command {
210        Some(Commands::List) => {
211            list_benchmarks(&cli)?;
212        }
213        Some(Commands::Run) => {
214            run_benchmarks(&cli, &config, format, jobs)?;
215        }
216        Some(Commands::Compare { ref git_ref }) => {
217            compare_benchmarks(&cli, &config, git_ref, format)?;
218        }
219        None => {
220            // Default: run benchmarks
221            if cli.dry_run {
222                list_benchmarks(&cli)?;
223            } else {
224                run_benchmarks(&cli, &config, format, jobs)?;
225            }
226        }
227    }
228
229    Ok(())
230}
231
232/// Run as a worker process (IPC mode)
233fn run_worker_mode() -> anyhow::Result<()> {
234    let mut worker = WorkerMain::new();
235    worker
236        .run()
237        .map_err(|e| anyhow::anyhow!("Worker error: {}", e))
238}
239
240/// Filter benchmarks based on CLI options using the planner module.
241///
242/// Returns benchmarks sorted alphabetically by ID for deterministic execution.
243fn filter_benchmarks(
244    cli: &Cli,
245    benchmarks: &[&'static BenchmarkDef],
246) -> Vec<&'static BenchmarkDef> {
247    let filter_re = Regex::new(&cli.filter).ok();
248
249    let plan = planner::build_plan(
250        benchmarks.iter().copied(),
251        filter_re.as_ref(),
252        cli.group.as_deref(),
253        cli.tag.as_deref(),
254        cli.skip_tag.as_deref(),
255    );
256
257    plan.benchmarks
258}
259
260fn list_benchmarks(cli: &Cli) -> anyhow::Result<()> {
261    println!("FluxBench Plan:");
262
263    let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
264    let benchmarks = filter_benchmarks(cli, &all_benchmarks);
265
266    let mut groups: std::collections::BTreeMap<&str, Vec<&BenchmarkDef>> =
267        std::collections::BTreeMap::new();
268
269    for bench in &benchmarks {
270        groups.entry(bench.group).or_default().push(bench);
271    }
272
273    let mut total = 0;
274    for (group, benches) in &groups {
275        println!("├── group: {}", group);
276        for bench in benches {
277            let tags = if bench.tags.is_empty() {
278                String::new()
279            } else {
280                format!(" [{}]", bench.tags.join(", "))
281            };
282            println!(
283                "│   ├── {}{} ({}:{})",
284                bench.id, tags, bench.file, bench.line
285            );
286            total += 1;
287        }
288    }
289
290    println!("{} benchmarks found.", total);
291
292    // Show all available tags across the entire suite (not just filtered results)
293    // so users can discover what tags they can filter by.
294    let mut tag_counts: std::collections::BTreeMap<&str, usize> = std::collections::BTreeMap::new();
295    for bench in &all_benchmarks {
296        for tag in bench.tags {
297            *tag_counts.entry(tag).or_default() += 1;
298        }
299    }
300    if !tag_counts.is_empty() {
301        let tags_display: Vec<String> = tag_counts
302            .iter()
303            .map(|(tag, count)| format!("{} ({})", tag, count))
304            .collect();
305        println!("Tags: {}", tags_display.join(", "));
306    }
307
308    Ok(())
309}
310
311/// Build an ExecutionConfig by layering: flux.toml defaults → CLI overrides.
312fn build_execution_config(cli: &Cli, config: &FluxConfig) -> ExecutionConfig {
313    // Start from flux.toml values (parsed durations fall back to defaults on error)
314    let warmup_ns = FluxConfig::parse_duration(&config.runner.warmup_time).unwrap_or(3_000_000_000);
315    let measurement_ns =
316        FluxConfig::parse_duration(&config.runner.measurement_time).unwrap_or(5_000_000_000);
317
318    // CLI flags override config file values.
319    // clap defaults are warmup=3, measurement=5, so we check if the user explicitly
320    // passed different values by comparing against clap defaults. If the CLI value
321    // differs from clap's default, the user explicitly set it and it wins.
322    let warmup_time_ns = if cli.warmup != 3 {
323        cli.warmup * 1_000_000_000
324    } else {
325        warmup_ns
326    };
327    let measurement_time_ns = if cli.measurement != 5 {
328        cli.measurement * 1_000_000_000
329    } else {
330        measurement_ns
331    };
332
333    // --samples N: fixed-count mode, no warmup, each iteration = one sample
334    // CLI wins, then flux.toml
335    if let Some(n) = cli.samples.or(config.runner.samples) {
336        return ExecutionConfig {
337            warmup_time_ns: 0,
338            measurement_time_ns: 0,
339            min_iterations: Some(n),
340            max_iterations: Some(n),
341            track_allocations: config.allocator.track,
342            bootstrap_iterations: config.runner.bootstrap_iterations,
343            confidence_level: config.runner.confidence_level,
344        };
345    }
346
347    // min/max iterations: CLI wins if set, else config, else default
348    let min_iterations = cli.min_iterations.or(config.runner.min_iterations);
349    let max_iterations = cli.max_iterations.or(config.runner.max_iterations);
350
351    ExecutionConfig {
352        warmup_time_ns,
353        measurement_time_ns,
354        min_iterations,
355        max_iterations,
356        track_allocations: config.allocator.track,
357        bootstrap_iterations: config.runner.bootstrap_iterations,
358        confidence_level: config.runner.confidence_level,
359    }
360}
361
362fn run_benchmarks(
363    cli: &Cli,
364    config: &FluxConfig,
365    format: OutputFormat,
366    jobs: usize,
367) -> anyhow::Result<()> {
368    let jobs = jobs.max(1);
369
370    // Configure Rayon thread pool for statistics computation
371    if cli.threads > 0 {
372        ThreadPoolBuilder::new()
373            .num_threads(cli.threads)
374            .build_global()
375            .ok();
376    }
377
378    // Discover benchmarks
379    let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
380    let benchmarks = filter_benchmarks(cli, &all_benchmarks);
381
382    if benchmarks.is_empty() {
383        // If filtering by tag and no matches, check if the tag exists at all
384        if let Some(ref tag) = cli.tag {
385            let all_tags: std::collections::BTreeSet<&str> = all_benchmarks
386                .iter()
387                .flat_map(|b| b.tags.iter().copied())
388                .collect();
389            if !all_tags.contains(tag.as_str()) {
390                let available: Vec<&str> = all_tags.into_iter().collect();
391                eprintln!(
392                    "Warning: tag '{}' not found. Available tags: {}",
393                    tag,
394                    available.join(", ")
395                );
396            }
397        }
398        println!("No benchmarks found.");
399        return Ok(());
400    }
401
402    // Determine isolation mode: flux.toml can override CLI default
403    let isolated = if config.runner.isolation.is_isolated() {
404        cli.isolated
405    } else {
406        false
407    };
408
409    let threads_str = if cli.threads == 0 {
410        "all".to_string()
411    } else {
412        cli.threads.to_string()
413    };
414    let mode_str = if isolated {
415        if cli.one_shot {
416            " (isolated, one-shot)"
417        } else {
418            " (isolated, persistent)"
419        }
420    } else {
421        " (in-process)"
422    };
423    println!(
424        "Running {} benchmarks{}, {} threads, {} worker(s)...\n",
425        benchmarks.len(),
426        mode_str,
427        threads_str,
428        jobs
429    );
430
431    let start_time = Instant::now();
432
433    // Build execution config from flux.toml + CLI overrides
434    let exec_config = build_execution_config(cli, config);
435
436    if exec_config.bootstrap_iterations > 0 && exec_config.bootstrap_iterations < 100 {
437        eprintln!(
438            "Warning: bootstrap_iterations={} is very low; confidence intervals will be unreliable. \
439             Use >= 1000 for meaningful results, or 0 to skip bootstrap.",
440            exec_config.bootstrap_iterations
441        );
442    }
443
444    // Execute benchmarks (isolated by default per TDD)
445    let results = if isolated {
446        let timeout = std::time::Duration::from_secs(cli.worker_timeout);
447        let reuse_workers = !cli.one_shot;
448        let isolated_executor =
449            IsolatedExecutor::new(exec_config.clone(), timeout, reuse_workers, jobs);
450        isolated_executor.execute(&benchmarks)
451    } else {
452        if jobs > 1 {
453            eprintln!(
454                "Warning: --jobs currently applies only to isolated mode; running in-process serially."
455            );
456        }
457        let mut executor = Executor::new(exec_config.clone());
458        executor.execute(&benchmarks)
459    };
460
461    // Compute statistics
462    let stats = compute_statistics(&results, &exec_config);
463
464    // Warn if allocation tracking is enabled but nothing was recorded
465    if exec_config.track_allocations
466        && !results.is_empty()
467        && results
468            .iter()
469            .all(|r| r.alloc_bytes == 0 && r.alloc_count == 0)
470    {
471        eprintln!(
472            "Warning: allocation tracking enabled but all benchmarks reported 0 bytes allocated.\n\
473             Ensure TrackingAllocator is set as #[global_allocator] in your benchmark binary."
474        );
475    }
476
477    // Build report
478    let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
479    let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
480
481    // Run comparisons, synthetics, and verifications
482    let (comparison_results, comparison_series, synthetic_results, verification_results) =
483        execute_verifications(&results, &stats);
484    let verification_summary = aggregate_verifications(&verification_results);
485    report.comparisons = comparison_results;
486    report.comparison_series = comparison_series;
487    report.synthetics = synthetic_results;
488    report.verifications = verification_results;
489
490    // Update summary with verification info
491    report.summary.critical_failures = verification_summary.critical_failures;
492    report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
493
494    // Generate output
495    let output = match format {
496        OutputFormat::Json => generate_json_report(&report)?,
497        OutputFormat::GithubSummary => generate_github_summary(&report),
498        OutputFormat::Html => generate_html_report(&report),
499        OutputFormat::Csv => generate_csv_report(&report),
500        OutputFormat::Human => format_human_output(&report),
501    };
502
503    // Write output
504    if let Some(ref path) = cli.output {
505        let mut file = std::fs::File::create(path)?;
506        file.write_all(output.as_bytes())?;
507        println!("Report written to: {}", path.display());
508    } else {
509        print!("{}", output);
510    }
511
512    // Save baseline if requested
513    save_baseline_if_needed(cli, config, &report)?;
514
515    // Exit with appropriate code
516    let has_crashes = report
517        .results
518        .iter()
519        .any(|r| matches!(r.status, fluxbench_report::BenchmarkStatus::Crashed));
520
521    if verification_summary.should_fail_ci() || has_crashes {
522        if has_crashes {
523            eprintln!("\nBenchmark(s) crashed during execution");
524        }
525        if verification_summary.should_fail_ci() {
526            eprintln!(
527                "\n{} critical verification failure(s)",
528                verification_summary.critical_failures + verification_summary.critical_errors
529            );
530        }
531        std::process::exit(1);
532    }
533
534    Ok(())
535}
536
537fn compare_benchmarks(
538    cli: &Cli,
539    config: &FluxConfig,
540    git_ref: &str,
541    format: OutputFormat,
542) -> anyhow::Result<()> {
543    // Load baseline
544    let baseline_path = cli.baseline.as_ref().ok_or_else(|| {
545        anyhow::anyhow!(
546            "--baseline required for comparison, or use 'compare' command with a git ref"
547        )
548    })?;
549
550    if !baseline_path.exists() {
551        return Err(anyhow::anyhow!(
552            "Baseline file not found: {}",
553            baseline_path.display()
554        ));
555    }
556
557    let baseline_json = std::fs::read_to_string(baseline_path)?;
558    let baseline: fluxbench_report::Report = serde_json::from_str(&baseline_json)?;
559    let resolved_git_ref = resolve_git_ref(git_ref)?;
560
561    if let Some(baseline_commit) = baseline.meta.git_commit.as_deref() {
562        let matches_ref = baseline_commit == resolved_git_ref
563            || baseline_commit.starts_with(&resolved_git_ref)
564            || resolved_git_ref.starts_with(baseline_commit);
565        if !matches_ref {
566            return Err(anyhow::anyhow!(
567                "Baseline commit {} does not match git ref {} ({})",
568                baseline_commit,
569                git_ref,
570                resolved_git_ref
571            ));
572        }
573    } else {
574        eprintln!(
575            "Warning: baseline report has no commit metadata; git ref consistency cannot be verified."
576        );
577    }
578
579    println!("Comparing against baseline: {}", baseline_path.display());
580    println!("Git ref: {} ({})\n", git_ref, resolved_git_ref);
581
582    // Run current benchmarks
583    let all_benchmarks: Vec<_> = inventory::iter::<BenchmarkDef>.into_iter().collect();
584    let benchmarks = filter_benchmarks(cli, &all_benchmarks);
585
586    if benchmarks.is_empty() {
587        println!("No benchmarks found.");
588        return Ok(());
589    }
590
591    let start_time = Instant::now();
592
593    let exec_config = build_execution_config(cli, config);
594
595    let mut executor = Executor::new(exec_config.clone());
596    let results = executor.execute(&benchmarks);
597    let stats = compute_statistics(&results, &exec_config);
598
599    let total_duration_ms = start_time.elapsed().as_secs_f64() * 1000.0;
600    let mut report = build_report(&results, &stats, &exec_config, total_duration_ms);
601
602    // Add comparison data
603    let regression_threshold = cli.threshold.unwrap_or(config.ci.regression_threshold);
604    let baseline_map: std::collections::HashMap<_, _> = baseline
605        .results
606        .iter()
607        .filter_map(|r| r.metrics.as_ref().map(|m| (r.id.clone(), m.clone())))
608        .collect();
609
610    for result in &mut report.results {
611        if let (Some(metrics), Some(baseline_metrics)) =
612            (&result.metrics, baseline_map.get(&result.id))
613        {
614            let baseline_mean = baseline_metrics.mean_ns;
615            let absolute_change = metrics.mean_ns - baseline_mean;
616            let relative_change = if baseline_mean > 0.0 {
617                (absolute_change / baseline_mean) * 100.0
618            } else {
619                0.0
620            };
621
622            // Determine significance via CI non-overlap and threshold crossing.
623            let ci_non_overlap = metrics.ci_upper_ns < baseline_metrics.ci_lower_ns
624                || metrics.ci_lower_ns > baseline_metrics.ci_upper_ns;
625            let is_significant = relative_change.abs() > regression_threshold && ci_non_overlap;
626
627            // Track regressions/improvements
628            if relative_change > regression_threshold {
629                report.summary.regressions += 1;
630            } else if relative_change < -regression_threshold {
631                report.summary.improvements += 1;
632            }
633
634            let mut effect_size = if metrics.std_dev_ns > f64::EPSILON {
635                absolute_change / metrics.std_dev_ns
636            } else {
637                0.0
638            };
639            if !effect_size.is_finite() {
640                effect_size = 0.0;
641            }
642
643            let probability_regression = if ci_non_overlap {
644                if relative_change > 0.0 { 0.99 } else { 0.01 }
645            } else if relative_change > 0.0 {
646                0.60
647            } else {
648                0.40
649            };
650
651            result.comparison = Some(fluxbench_report::Comparison {
652                baseline_mean_ns: baseline_mean,
653                absolute_change_ns: absolute_change,
654                relative_change,
655                probability_regression,
656                is_significant,
657                effect_size,
658            });
659        }
660    }
661
662    // Run comparisons, synthetics, and verifications
663    let (comparison_results, comparison_series, synthetic_results, verification_results) =
664        execute_verifications(&results, &stats);
665    let verification_summary = aggregate_verifications(&verification_results);
666    report.comparisons = comparison_results;
667    report.comparison_series = comparison_series;
668    report.synthetics = synthetic_results;
669    report.verifications = verification_results;
670    report.summary.critical_failures = verification_summary.critical_failures;
671    report.summary.warnings = verification_summary.failed - verification_summary.critical_failures;
672
673    // Generate output
674    let output = match format {
675        OutputFormat::Json => generate_json_report(&report)?,
676        OutputFormat::GithubSummary => generate_github_summary(&report),
677        OutputFormat::Html => generate_html_report(&report),
678        OutputFormat::Csv => generate_csv_report(&report),
679        OutputFormat::Human => format_comparison_output(&report, &baseline),
680    };
681
682    if let Some(ref path) = cli.output {
683        let mut file = std::fs::File::create(path)?;
684        file.write_all(output.as_bytes())?;
685        println!("Report written to: {}", path.display());
686    } else {
687        print!("{}", output);
688    }
689
690    // Save baseline if requested
691    save_baseline_if_needed(cli, config, &report)?;
692
693    // Exit with error if regressions exceed threshold or verifications fail
694    let should_fail = report.summary.regressions > 0 || verification_summary.should_fail_ci();
695    if should_fail {
696        if report.summary.regressions > 0 {
697            eprintln!(
698                "\n{} regression(s) detected above {}% threshold",
699                report.summary.regressions, regression_threshold
700            );
701        }
702        if verification_summary.should_fail_ci() {
703            eprintln!(
704                "\n{} critical verification failure(s)",
705                verification_summary.critical_failures + verification_summary.critical_errors
706            );
707        }
708        std::process::exit(1);
709    }
710
711    Ok(())
712}
713
714/// Save the report as a baseline JSON file if configured.
715fn save_baseline_if_needed(
716    cli: &Cli,
717    config: &FluxConfig,
718    report: &fluxbench_report::Report,
719) -> anyhow::Result<()> {
720    // Determine if we should save: CLI --save-baseline flag or config.output.save_baseline
721    let should_save = cli.save_baseline.is_some() || config.output.save_baseline;
722    if !should_save {
723        return Ok(());
724    }
725
726    // Resolve path: CLI value > config value > default
727    let path = cli
728        .save_baseline
729        .as_ref()
730        .and_then(|opt| opt.clone())
731        .or_else(|| config.output.baseline_path.as_ref().map(PathBuf::from))
732        .unwrap_or_else(|| PathBuf::from("target/fluxbench/baseline.json"));
733
734    if let Some(parent) = path.parent() {
735        std::fs::create_dir_all(parent)?;
736    }
737
738    let json = generate_json_report(report)?;
739    std::fs::write(&path, json)?;
740    eprintln!("Baseline saved to: {}", path.display());
741
742    Ok(())
743}
744
745fn resolve_git_ref(git_ref: &str) -> anyhow::Result<String> {
746    let output = std::process::Command::new("git")
747        .args(["rev-parse", "--verify", git_ref])
748        .output()
749        .map_err(|e| anyhow::anyhow!("Failed to resolve git ref '{}': {}", git_ref, e))?;
750
751    if !output.status.success() {
752        let stderr = String::from_utf8_lossy(&output.stderr);
753        return Err(anyhow::anyhow!(
754            "Invalid git ref '{}': {}",
755            git_ref,
756            stderr.trim()
757        ));
758    }
759
760    let resolved = String::from_utf8(output.stdout)?.trim().to_string();
761    if resolved.is_empty() {
762        return Err(anyhow::anyhow!(
763            "Git ref '{}' resolved to an empty commit hash",
764            git_ref
765        ));
766    }
767
768    Ok(resolved)
769}
770
771/// Format comparison output for human display
772fn format_comparison_output(
773    report: &fluxbench_report::Report,
774    baseline: &fluxbench_report::Report,
775) -> String {
776    let mut output = String::new();
777
778    output.push('\n');
779    output.push_str("FluxBench Comparison Results\n");
780    output.push_str(&"=".repeat(60));
781    output.push_str("\n\n");
782
783    output.push_str(&format!(
784        "Baseline: {} ({})\n",
785        baseline.meta.git_commit.as_deref().unwrap_or("unknown"),
786        baseline.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
787    ));
788    output.push_str(&format!(
789        "Current:  {} ({})\n\n",
790        report.meta.git_commit.as_deref().unwrap_or("unknown"),
791        report.meta.timestamp.format("%Y-%m-%d %H:%M:%S")
792    ));
793
794    for result in &report.results {
795        let status_icon = match result.status {
796            fluxbench_report::BenchmarkStatus::Passed => "✓",
797            fluxbench_report::BenchmarkStatus::Failed => "✗",
798            fluxbench_report::BenchmarkStatus::Crashed => "💥",
799            fluxbench_report::BenchmarkStatus::Skipped => "⊘",
800        };
801
802        output.push_str(&format!("{} {}\n", status_icon, result.id));
803
804        if let (Some(metrics), Some(comparison)) = (&result.metrics, &result.comparison) {
805            let change_icon = if comparison.relative_change > 5.0 {
806                "📈 REGRESSION"
807            } else if comparison.relative_change < -5.0 {
808                "📉 improvement"
809            } else {
810                "≈ no change"
811            };
812
813            output.push_str(&format!(
814                "    baseline: {:.2} ns → current: {:.2} ns\n",
815                comparison.baseline_mean_ns, metrics.mean_ns
816            ));
817            output.push_str(&format!(
818                "    change: {:+.2}% ({:+.2} ns) {}\n",
819                comparison.relative_change, comparison.absolute_change_ns, change_icon
820            ));
821        }
822
823        output.push('\n');
824    }
825
826    // Summary
827    output.push_str("Summary\n");
828    output.push_str(&"-".repeat(60));
829    output.push('\n');
830    output.push_str(&format!(
831        "  Regressions: {}  Improvements: {}  No Change: {}\n",
832        report.summary.regressions,
833        report.summary.improvements,
834        report.summary.total_benchmarks - report.summary.regressions - report.summary.improvements
835    ));
836
837    output
838}