Skip to main content

big_code_analysis_cli/
lib.rs

1//! Library surface for the `bca` CLI.
2//!
3//! Exists so the workspace `xtask` crate can render man pages from the
4//! same `clap::Command` tree that `bca` parses at runtime — the binary
5//! `main` is a one-liner that delegates to [`run`].
6//!
7//! # Embedder contract
8//!
9//! This crate is published to crates.io to support man-page generation
10//! and to keep the binary's `main` trivial; it is **not** a re-entrant
11//! library API. [`run`] and the internal helpers it calls
12//! (`die` / `die_io`, `run_check`, etc.) terminate the calling process
13//! via [`std::process::exit`] on user-input errors (bad threshold
14//! specs, missing paths, parser failures, broken pipes, and so on)
15//! and on the `check` subcommand's "thresholds exceeded" exit-2 path.
16//! Hosting [`run`] inside another process will tear that process down
17//! without unwinding. If you need a re-entrant entry point, drive the
18//! [`big_code_analysis`] library crate directly.
19
20#![allow(
21    clippy::too_many_lines,
22    clippy::struct_excessive_bools,
23    clippy::similar_names,
24    clippy::needless_pass_by_value,
25    // `run` panics on a handful of provably-unreachable invariants
26    // (mutex poisoning where every worker thread has joined, channel
27    // sends after run_walk returns). Each one is documented at the
28    // call site with an `expect` reason — surfacing them in a `# Panics`
29    // section on the entry point adds noise without adding signal.
30    clippy::missing_panics_doc
31)]
32mod baseline;
33mod check_format;
34mod format_util;
35mod formats;
36mod html_report;
37mod markdown_report;
38mod metric_catalog;
39mod thresholds;
40
41use std::collections::{BTreeMap, HashMap, hash_map};
42use std::ffi::OsString;
43use std::fmt::Display;
44use std::io::{ErrorKind, Write};
45use std::path::{Path, PathBuf};
46use std::process;
47use std::sync::atomic::{AtomicUsize, Ordering};
48use std::sync::{Arc, Mutex};
49use std::thread::available_parallelism;
50
51use clap::{Args, Parser, Subcommand, ValueEnum};
52use globset::{Glob, GlobSet, GlobSetBuilder};
53
54use baseline::Baseline;
55use check_format::{AggregatedFormat, violation_to_offender};
56use formats::{CBOR_STDOUT_ERROR, MetricsDispatch, MetricsFormat, ReportFormat, dump_csv};
57use html_report::generate_html_report;
58use markdown_report::{FunctionSummary, extract_summaries, generate_report};
59use metric_catalog::{ListMetricsMode, write_metrics};
60use thresholds::{ThresholdConfig, ThresholdSet, Violation, parse_cli_threshold};
61
62use big_code_analysis::LANG;
63use big_code_analysis::ParserTrait;
64
65/// `expect` message used at every `action::<_>` call site below.
66///
67/// The CLI pins `big-code-analysis` with `features = ["all-languages"]`,
68/// so a `LANG` value that reached this point must be enabled at compile
69/// time. Any future caller that loosens the feature pin must change
70/// this invariant explicitly.
71const FEATURES_PINNED: &str = "CLI pins big-code-analysis features = [\"all-languages\"]";
72use big_code_analysis::{
73    CommentRm, CommentRmCfg, ConcurrentRunner, Count, CountCfg, Dump, DumpCfg, FilesData, Find,
74    FindCfg, Function, FunctionCfg, Metrics, MetricsCfg, MetricsOptions, OpsCfg, OpsCode,
75    PreprocParser, PreprocResults, SuppressionPolicy,
76};
77// The CLI is the canonical path-based caller: `bca` walks a tree on
78// disk and naturally has a `&Path` for every file it processes, so
79// the deprecated path-positional shims (`get_function_spaces_with_options`)
80// are still the most direct entry point here. Migration to the new
81// `Source` / `analyze` API tracks issue #254's follow-up; for now,
82// scope the deprecation lint to this single import to keep the rest
83// of the file clean.
84#[allow(deprecated)]
85use big_code_analysis::get_function_spaces_with_options;
86use big_code_analysis::{
87    action, fix_includes, get_from_ext, get_ops, guess_language, is_generated, preprocess,
88    read_file, read_file_with_eol, write_file,
89};
90
91fn die(msg: impl Display) -> ! {
92    eprintln!("Error: {msg}");
93    process::exit(1);
94}
95
96/// Die with `failed to <verb> <path>: <err>`. Centralizes the most common
97/// I/O error shape: open/read/parse/write of a user-supplied path that
98/// failed with an error implementing `Display`.
99fn die_io(verb: &str, path: &Path, err: impl Display) -> ! {
100    die(format_args!("failed to {verb} {}: {err}", path.display()))
101}
102
103/// Write `bytes` to stdout, tolerating `BrokenPipe` (the typical case when
104/// the consumer is `head`, `less`, etc.) and `die`ing on anything else.
105fn write_stdout_or_die(bytes: &[u8]) {
106    if let Err(e) = std::io::stdout().lock().write_all(bytes)
107        && e.kind() != ErrorKind::BrokenPipe
108    {
109        die(e);
110    }
111}
112
113/// Analyze source code.
114//
115// Single-line doc-comment kept in sync with the `about = "..."` attribute
116// below — clap promotes a doc-comment to `long_about`, which clap-mangen
117// renders into the manpage DESCRIPTION. The embedder contract for this
118// crate (which is why `Cli` is `pub` at all) lives in the crate-level
119// `//!` docs above, not here.
120#[derive(Parser, Debug)]
121#[clap(
122    name = "bca",
123    version,
124    author,
125    about = "Analyze source code.",
126    subcommand_required = true,
127    arg_required_else_help = true,
128    after_help = "Migrating from the flag-style CLI? See the migration guide:\n  big-code-analysis-book/src/migration.md"
129)]
130pub struct Cli {
131    #[clap(flatten)]
132    globals: GlobalOpts,
133    #[command(subcommand)]
134    command: Command,
135}
136
137#[derive(Args, Debug, Default)]
138struct GlobalOpts {
139    /// Input files or directories to analyze.
140    #[clap(long, short, value_parser, global = true)]
141    paths: Vec<PathBuf>,
142    /// Glob to include files.
143    #[clap(long, short = 'I', num_args(0..), global = true)]
144    include: Vec<String>,
145    /// Glob to exclude files.
146    #[clap(long, short = 'X', num_args(0..), global = true)]
147    exclude: Vec<String>,
148    /// Number of jobs.
149    #[clap(long, short = 'j', global = true)]
150    num_jobs: Option<usize>,
151    /// Force a language type instead of inferring from extension.
152    #[clap(long, short = 'l', global = true)]
153    language_type: Option<String>,
154    /// Line start (used by `dump` and `find`).
155    #[clap(long = "ls", global = true)]
156    line_start: Option<usize>,
157    /// Line end (used by `dump` and `find`).
158    #[clap(long = "le", global = true)]
159    line_end: Option<usize>,
160    /// Print warnings (skipped files, unrecognized languages).
161    #[clap(long, short, global = true)]
162    warning: bool,
163    /// Disable auto-skip of files marked as generated (e.g. `@generated`,
164    /// `DO NOT EDIT`, `GENERATED CODE` near the top). By default the CLI
165    /// skips such files so generated bindings do not skew metrics.
166    #[clap(long, global = true)]
167    no_skip_generated: bool,
168    /// Log a "skipped (generated): <path>" line to stderr for each file
169    /// auto-skipped by the generated-code detector. Useful for auditing
170    /// which files were excluded.
171    #[clap(long, global = true)]
172    report_skipped: bool,
173    /// Existing preprocessor-data JSON to consume during C/C++ analysis.
174    /// Use `bca preproc` to produce one.
175    #[clap(long, value_parser, global = true)]
176    preproc_data: Option<PathBuf>,
177    /// Read newline-separated input paths from a file. Use `-` to read
178    /// from stdin. Combined as a union with any `--paths` values; globs
179    /// still apply. Blank lines are skipped; `#` is treated as a path
180    /// character (not a comment). To pass a file literally named `-`,
181    /// use `./-`.
182    #[clap(long = "paths-from", value_parser, global = true)]
183    paths_from: Option<PathBuf>,
184    /// Disable `.gitignore` / `.ignore` / global gitignore awareness
185    /// when expanding directory seeds. Explicit file paths are always
186    /// honored regardless of this flag.
187    #[clap(long = "no-ignore", global = true)]
188    no_ignore: bool,
189    /// Exclude inline test code from metric computation. Currently
190    /// applies to Rust only (skips `#[test]`, `#[cfg(test)]`,
191    /// `#[tokio::test]`, `#[rstest]`, `#![cfg(test)]` items and
192    /// their subtrees). Default is off — every node is counted, so
193    /// numbers match the pre-#182 behaviour byte-for-byte. Languages
194    /// without a `Checker::should_skip_subtree` override ignore this
195    /// flag.
196    #[clap(long = "exclude-tests", global = true)]
197    exclude_tests: bool,
198}
199
200#[derive(Subcommand, Debug)]
201enum Command {
202    /// Compute per-file metrics and emit them in a structured format.
203    Metrics(StructuredArgs),
204    /// Extract per-file operands and operators.
205    Ops(StructuredArgs),
206    /// Generate an aggregated report across the analyzed source.
207    Report(ReportArgs),
208    /// Dump the AST to stdout.
209    Dump,
210    /// Find nodes of one or more types.
211    Find(NodesArgs),
212    /// Count nodes of one or more types.
213    Count(NodesArgs),
214    /// List functions/methods and their spans.
215    Functions,
216    /// Remove comments from source files.
217    StripComments(StripCommentsArgs),
218    /// Generate preprocessor-data JSON for C/C++ analysis.
219    Preproc(PreprocArgs),
220    /// List the metrics this tool can compute and exit.
221    ListMetrics(ListMetricsArgs),
222    /// Check per-function metrics against thresholds. Exits 2 when any
223    /// threshold is exceeded; reserve exit 1 for tool errors so CI can
224    /// distinguish "metric regression" from "tool crashed".
225    Check(CheckArgs),
226}
227
228/// Shared shape for `metrics` and `ops`: same format set, same output
229/// semantics (directory of per-file emissions; stdout if omitted).
230#[derive(Args, Debug)]
231struct StructuredArgs {
232    /// Output format.
233    #[clap(long, short = 'O', value_enum)]
234    output_format: Option<MetricsFormat>,
235    /// Output directory. Filenames mirror input paths plus the format
236    /// extension. Stdout if omitted (CBOR requires this flag).
237    #[clap(long, short, value_parser)]
238    output: Option<PathBuf>,
239    /// Pretty-print JSON / TOML output.
240    #[clap(long)]
241    pretty: bool,
242}
243
244#[derive(Args, Debug)]
245struct ReportArgs {
246    /// Report format.
247    #[clap(value_enum)]
248    format: ReportFormat,
249    /// Output file. Stdout if omitted.
250    #[clap(long, short, value_parser)]
251    output: Option<PathBuf>,
252    /// Maximum number of entries per hotspot table.
253    #[clap(long, default_value_t = 20, value_parser = clap::value_parser!(u32).range(1..))]
254    top: u32,
255    /// Path prefix to strip from displayed file paths.
256    #[clap(long, default_value = "")]
257    strip_prefix: String,
258}
259
260#[derive(Args, Debug)]
261struct NodesArgs {
262    /// Node-type names. Pass one or more, space-separated.
263    #[clap(required = true, num_args = 1..)]
264    nodes: Vec<String>,
265}
266
267#[derive(Args, Debug)]
268struct StripCommentsArgs {
269    /// Rewrite each input file in place instead of writing to stdout.
270    #[clap(long)]
271    in_place: bool,
272}
273
274#[derive(Args, Debug)]
275struct PreprocArgs {
276    /// Output JSON file. Stdout if omitted.
277    #[clap(long, short, value_parser)]
278    output: Option<PathBuf>,
279}
280
281#[derive(Args, Debug)]
282struct CheckArgs {
283    /// Threshold expressed as `<metric>=<limit>`. Repeatable. Metric
284    /// names match `bca list-metrics`; sub-metrics use a dotted form
285    /// (e.g. `loc.lloc`, `halstead.volume`). CLI flags override values
286    /// from `--config`. Limits must be finite and non-negative; `0` is
287    /// allowed and means "no value permitted".
288    #[clap(long = "threshold", value_parser = parse_cli_threshold)]
289    thresholds: Vec<(String, f64)>,
290    /// Path to a TOML config with a `[thresholds]` table:
291    ///
292    /// ```toml
293    /// [thresholds]
294    /// cyclomatic = 15
295    /// "loc.lloc" = 200
296    /// ```
297    #[clap(long, value_parser)]
298    config: Option<PathBuf>,
299    /// Print offenders to stderr but exit 0 even when thresholds are
300    /// exceeded. Useful while adopting baselines without flipping CI red.
301    /// Default: exit 2 when any threshold is exceeded.
302    #[clap(long = "no-fail")]
303    no_fail: bool,
304    /// Ignore in-source suppression markers (`bca: suppress`,
305    /// `#lizard forgives`, etc.). Every threshold violation is
306    /// reported regardless of comment-based silencers. CI auditors
307    /// pass this to see the raw, un-silenced offender list.
308    #[clap(long = "no-suppress")]
309    no_suppress: bool,
310    /// CI/IDE document format for offender records (Checkstyle 4.3 XML,
311    /// SARIF 2.1.0 JSON, clang/GCC warning lines, MSVC warning lines).
312    /// When omitted, only the human-readable stderr stream is emitted;
313    /// the exit-code contract is unaffected.
314    #[clap(long = "output-format", short = 'O', value_enum)]
315    output_format: Option<AggregatedFormat>,
316    /// File path for the aggregated offender document. Stdout if omitted.
317    /// Only meaningful together with `--output-format`. Parent
318    /// directories are created on demand.
319    #[clap(long, short, value_parser)]
320    output: Option<PathBuf>,
321    /// Filter known offenders listed in this TOML baseline. A baselined
322    /// function whose metric value has not worsened is suppressed; a
323    /// worsened value (or any new offender) still fails. See the
324    /// "Baselines" recipe in the book for the full adoption flow.
325    #[clap(long = "baseline", value_parser, conflicts_with = "write_baseline")]
326    baseline: Option<PathBuf>,
327    /// Walk the tree and write the current offender set to this path
328    /// instead of failing. The resulting file pins today's metric
329    /// values as the baseline; subsequent `--baseline <path>` runs
330    /// ratchet down from there. Conflicts with `--baseline`,
331    /// `--output-format`, and `--output` — the baseline file is the
332    /// output.
333    #[clap(
334        long = "write-baseline",
335        value_parser,
336        conflicts_with_all = ["baseline", "output_format", "output"],
337    )]
338    write_baseline: Option<PathBuf>,
339}
340
341#[derive(Args, Debug)]
342struct ListMetricsArgs {
343    /// What to print: `names` (one per line) or `descriptions`
344    /// (name + one-line summary).
345    #[clap(value_enum, default_value_t = ListMetricsMode::Names)]
346    mode: ListMetricsMode,
347}
348
349/// What `act_on_file` should do per file. Drives the inner dispatch and
350/// replaces the prior cluster of mutually-exclusive bool flags.
351#[derive(Debug)]
352enum Action {
353    Dump,
354    Metrics {
355        format: Option<MetricsFormat>,
356        pretty: bool,
357    },
358    Ops {
359        format: Option<MetricsFormat>,
360        pretty: bool,
361    },
362    StripComments {
363        in_place: bool,
364    },
365    Functions,
366    Find(Arc<[String]>),
367    Count(Arc<[String]>),
368    /// Same walk as `Metrics`, but taps each space tree to stream
369    /// `FunctionSummary` records for the post-walk aggregator.
370    Report,
371    /// Walks source to accumulate preprocessor data (no per-file output).
372    PreprocProduce,
373    /// Walks source and streams threshold violations to a channel.
374    Check,
375}
376
377#[derive(Debug)]
378struct Config {
379    action: Action,
380    output: Option<PathBuf>,
381    language: Option<LANG>,
382    line_start: Option<usize>,
383    line_end: Option<usize>,
384    preproc_lock: Option<Arc<Mutex<PreprocResults>>>,
385    preproc: Option<Arc<PreprocResults>>,
386    count_lock: Option<Arc<Mutex<Count>>>,
387    /// Sender for streaming `FunctionSummary` records when running `report`.
388    /// Wrapped in `Mutex` because `mpsc::Sender` is `Send` but not `Sync`.
389    markdown_tx: Option<Mutex<std::sync::mpsc::Sender<FunctionSummary>>>,
390    /// Path prefix stripped from file paths in the markdown report.
391    strip_prefix: String,
392    /// Pre-resolved thresholds for `Action::Check`. `None` for every
393    /// other action.
394    threshold_set: Option<Arc<ThresholdSet>>,
395    /// Sender for streaming [`Violation`] records when running `check`.
396    /// Wrapped in `Mutex` for the same reason as `markdown_tx`.
397    check_tx: Option<Mutex<std::sync::mpsc::Sender<Violation>>>,
398    /// Counts how many files survived expansion and glob filtering and
399    /// were actually dispatched to `act_on_file`. `Action::Check` reads
400    /// this after the walk to distinguish "all clean" (counter > 0,
401    /// no violations) from "no files matched" (counter == 0), so a
402    /// typo in `--paths` does not silently pass CI.
403    files_dispatched: Option<Arc<AtomicUsize>>,
404    /// Whether to honor or ignore in-source suppression markers when
405    /// emitting threshold violations. Only meaningful for
406    /// `Action::Check`; the field is defaulted to `Honor` for every
407    /// other action so the new code path is invisible to existing
408    /// flows. Flipped to `Ignore` by `--no-suppress`.
409    suppression_policy: SuppressionPolicy,
410    warning: bool,
411    /// When true, files whose head matches a generated-code marker are
412    /// skipped before parsing. Defaults on; flipped off by
413    /// `--no-skip-generated`.
414    skip_generated: bool,
415    /// When true, log a stderr line for each file auto-skipped by the
416    /// generated-code detector. Also enabled by `warning` (which logs
417    /// every skip reason); `report_skipped` is the dedicated flag for
418    /// users who want the generated-skip audit without the rest of the
419    /// warning stream.
420    report_skipped: bool,
421    /// When true, [`get_function_spaces_with_options`] is used in
422    /// place of [`get_function_spaces`] and [`MetricsOptions::exclude_tests`]
423    /// is set, so language modules that override
424    /// `Checker::should_skip_subtree` (currently only Rust) prune
425    /// their test subtrees before metric computation. See
426    /// `GlobalOpts::exclude_tests` for the user-facing description.
427    exclude_tests: bool,
428}
429
430impl Config {
431    /// Build a `Config` for `action`, populating the fields every command
432    /// shares from `globals`. Per-command extras (`output`, `count_lock`,
433    /// `markdown_tx`, `strip_prefix`) are set on the returned value at the
434    /// call site.
435    fn new(action: Action, globals: &GlobalOpts, preproc: Option<Arc<PreprocResults>>) -> Self {
436        let language = resolve_language(globals.language_type.as_deref(), &action);
437        Self {
438            action,
439            output: None,
440            language,
441            line_start: globals.line_start,
442            line_end: globals.line_end,
443            preproc_lock: None,
444            preproc,
445            count_lock: None,
446            markdown_tx: None,
447            strip_prefix: String::new(),
448            threshold_set: None,
449            check_tx: None,
450            files_dispatched: None,
451            suppression_policy: SuppressionPolicy::Honor,
452            warning: globals.warning,
453            skip_generated: !globals.no_skip_generated,
454            report_skipped: globals.report_skipped,
455            exclude_tests: globals.exclude_tests,
456        }
457    }
458
459    /// Project this `Config` onto the library's `MetricsOptions`
460    /// surface. Centralising the projection here means new metric
461    /// options land in one place instead of being duplicated across
462    /// every `act_on_file` arm that drives a metric computation.
463    #[inline]
464    fn metrics_options(&self) -> MetricsOptions {
465        MetricsOptions::default().with_exclude_tests(self.exclude_tests)
466    }
467}
468
469fn mk_globset(elems: Vec<String>) -> Result<GlobSet, String> {
470    if elems.is_empty() {
471        return Ok(GlobSet::empty());
472    }
473
474    let mut globset = GlobSetBuilder::new();
475    for e in &elems {
476        if e.is_empty() {
477            continue;
478        }
479        globset.add(Glob::new(e).map_err(|err| format!("invalid glob pattern {e:?}: {err}"))?);
480    }
481    globset
482        .build()
483        .map_err(|err| format!("failed to build glob set: {err}"))
484}
485
486// `act_on_file` is the per-file dispatch hub for the CLI. Every
487// Action variant that needs metric data calls
488// `get_function_spaces_with_options`, which is now `#[deprecated]`
489// in favour of `analyze(Source { ... }, ...)`. The CLI is the
490// canonical path-based caller (it always has a `&Path` for the file
491// it just read), so the deprecated shim remains the most direct
492// entry point here. Migration tracks issue #254's follow-up; the
493// function-scope `#[allow(deprecated)]` keeps the surrounding code
494// readable without per-call-site attributes.
495#[allow(deprecated)]
496fn act_on_file(path: PathBuf, cfg: &Config) -> std::io::Result<()> {
497    if let Some(counter) = &cfg.files_dispatched {
498        // Count every dispatched file, including those skipped below for
499        // empty content / unrecognized language. The user pointed at
500        // these files and the runner walked them — they count as "the
501        // input was non-empty" for the zero-files-matched check in
502        // `run_check`.
503        counter.fetch_add(1, Ordering::Relaxed);
504    }
505
506    let Some(source) = read_file_with_eol(&path)? else {
507        if cfg.warning {
508            eprintln!("warning: skipping empty file: {}", path.display());
509        }
510        return Ok(());
511    };
512
513    // The generated-code skip runs before language detection so we don't
514    // pay parse cost for files we'll discard. It's a CLI-level filter
515    // (preproc has its own pipeline that genuinely needs every C/C++ file
516    // walked), so leave Action::PreprocProduce alone.
517    if cfg.skip_generated && !matches!(cfg.action, Action::PreprocProduce) && is_generated(&source)
518    {
519        if cfg.report_skipped || cfg.warning {
520            eprintln!("skipped (generated): {}", path.display());
521        }
522        return Ok(());
523    }
524
525    let Some(language) = cfg.language.or_else(|| guess_language(&source, &path).0) else {
526        if cfg.warning {
527            eprintln!(
528                "warning: skipping file with unrecognized language: {}",
529                path.display()
530            );
531        }
532        return Ok(());
533    };
534
535    let pr = cfg.preproc.clone();
536    match &cfg.action {
537        Action::Dump => {
538            let dump_cfg = DumpCfg {
539                line_start: cfg.line_start,
540                line_end: cfg.line_end,
541            };
542            // The CLI pins the library's `all-languages` feature, so
543            // `LanguageDisabled` from `action::<T>` is unreachable; the
544            // `expect` documents that invariant.
545            action::<Dump>(&language, source, &path, pr, dump_cfg).expect(FEATURES_PINNED)
546        }
547        Action::Metrics { format, pretty } => {
548            if let Some(fmt) = format {
549                if let Ok(space) = get_function_spaces_with_options(
550                    &language,
551                    source,
552                    &path,
553                    pr,
554                    cfg.metrics_options(),
555                ) {
556                    match fmt.dispatch() {
557                        MetricsDispatch::Generic(g) => {
558                            g.dump(space, path, cfg.output.as_ref(), *pretty)?;
559                        }
560                        MetricsDispatch::Csv => {
561                            dump_csv(&space, path, cfg.output.as_ref())?;
562                        }
563                    }
564                }
565                Ok(())
566            } else {
567                let metrics_cfg = MetricsCfg::new(path).with_options(cfg.metrics_options());
568                let path = metrics_cfg.path.clone();
569                action::<Metrics>(&language, source, &path, pr, metrics_cfg).expect(FEATURES_PINNED)
570            }
571        }
572        Action::Ops { format, pretty } => {
573            if let Some(fmt) = format {
574                if let Ok(ops) = get_ops(&language, source, &path, pr) {
575                    // CSV is rejected upstream in `run()` for the
576                    // Ops command, so the dispatch here is always
577                    // Generic. The match is still exhaustive to keep
578                    // the compiler honest if that upstream guard ever
579                    // drifts.
580                    match fmt.dispatch() {
581                        MetricsDispatch::Generic(g) => {
582                            g.dump(ops, path, cfg.output.as_ref(), *pretty)?;
583                        }
584                        MetricsDispatch::Csv => {}
585                    }
586                }
587                Ok(())
588            } else {
589                let ops_cfg = OpsCfg { path };
590                let path = ops_cfg.path.clone();
591                action::<OpsCode>(&language, source, &path, pr, ops_cfg).expect(FEATURES_PINNED)
592            }
593        }
594        Action::StripComments { in_place } => {
595            let comment_cfg = CommentRmCfg {
596                in_place: *in_place,
597                path,
598            };
599            let path = comment_cfg.path.clone();
600            // C++ comment removal goes through the dedicated Ccomment grammar
601            // even when the file's primary language is Cpp.
602            let lang = if language == LANG::Cpp {
603                LANG::Ccomment
604            } else {
605                language
606            };
607            action::<CommentRm>(&lang, source, &path, pr, comment_cfg).expect(FEATURES_PINNED)
608        }
609        Action::Functions => {
610            let fn_cfg = FunctionCfg { path: path.clone() };
611            action::<Function>(&language, source, &path, pr, fn_cfg).expect(FEATURES_PINNED)
612        }
613        Action::Find(filters) => {
614            let find_cfg = FindCfg {
615                path: path.clone(),
616                filters: Arc::clone(filters),
617                line_start: cfg.line_start,
618                line_end: cfg.line_end,
619            };
620            action::<Find>(&language, source, &path, pr, find_cfg).expect(FEATURES_PINNED)
621        }
622        Action::Count(filters) => {
623            let stats = cfg
624                .count_lock
625                .clone()
626                .expect("Count handler initializes count_lock before dispatch");
627            let count_cfg = CountCfg {
628                filters: Arc::clone(filters),
629                stats,
630            };
631            action::<Count>(&language, source, &path, pr, count_cfg).expect(FEATURES_PINNED)
632        }
633        Action::Report => {
634            if let Ok(space) = get_function_spaces_with_options(
635                &language,
636                source,
637                &path,
638                pr,
639                cfg.metrics_options(),
640            ) && let Some(ref tx) = cfg.markdown_tx
641                && !matches!(language, LANG::Preproc | LANG::Ccomment)
642            {
643                // Markdown reports are human-readable text and the
644                // downstream `FunctionSummary::file: String` is rendered
645                // into the report body, so non-UTF-8 paths cannot
646                // round-trip through this pipeline regardless of how we
647                // carry them upstream. Skip with a warning. The
648                // threshold pipeline (Action::Check) carries `&Path`
649                // end-to-end because its JSON/SARIF outputs can
650                // preserve raw bytes.
651                let Some(file_str) = path.to_str() else {
652                    if cfg.warning {
653                        eprintln!(
654                            "warning: skipping non-UTF-8 path in report: {}",
655                            path.display()
656                        );
657                    }
658                    return Ok(());
659                };
660                let mut summaries = Vec::new();
661                extract_summaries(
662                    &space,
663                    file_str,
664                    language,
665                    &cfg.strip_prefix,
666                    &mut summaries,
667                );
668                let Ok(sender) = tx.lock() else {
669                    if cfg.warning {
670                        eprintln!(
671                            "warning: skipping {}: report channel lock poisoned",
672                            path.display()
673                        );
674                    }
675                    return Ok(());
676                };
677                for s in summaries {
678                    let _ = sender.send(s);
679                }
680            }
681            Ok(())
682        }
683        Action::Check => {
684            if let Ok(space) = get_function_spaces_with_options(
685                &language,
686                source,
687                &path,
688                pr,
689                cfg.metrics_options(),
690            ) && let (Some(set), Some(tx)) = (cfg.threshold_set.as_ref(), cfg.check_tx.as_ref())
691                && !matches!(language, LANG::Preproc | LANG::Ccomment)
692            {
693                // Pass the path through as `&Path` so non-UTF-8 bytes
694                // are preserved on each emitted `Violation`. Display /
695                // offender serialization decide their own lossy
696                // strategy at the output boundary; the threshold
697                // pipeline itself stays byte-faithful.
698                let mut violations = Vec::new();
699                set.evaluate_with_policy(&path, &space, cfg.suppression_policy, &mut violations);
700                if !violations.is_empty() {
701                    let Ok(sender) = tx.lock() else {
702                        if cfg.warning {
703                            eprintln!(
704                                "warning: skipping {}: check channel lock poisoned",
705                                path.display()
706                            );
707                        }
708                        return Ok(());
709                    };
710                    // Receiver lives until `run_check` drains `rx`, which
711                    // happens only after `run_walk` joins all worker
712                    // threads — so `send` cannot fail here. Use `let _`
713                    // rather than `expect` to avoid panicking the worker
714                    // pool on the (unreachable) drop path.
715                    for v in violations {
716                        let _ = sender.send(v);
717                    }
718                }
719            }
720            Ok(())
721        }
722        Action::PreprocProduce => {
723            if let Some(preproc_lock) = &cfg.preproc_lock
724                && let Some(language) = guess_language(&source, &path).0
725                && language == LANG::Cpp
726            {
727                let mut results = preproc_lock.lock().expect("mutex not poisoned");
728                preprocess(
729                    &PreprocParser::new(source, &path, None),
730                    &path,
731                    &mut results,
732                );
733            }
734            Ok(())
735        }
736    }
737}
738
739fn process_dir_path(all_files: &mut HashMap<String, Vec<PathBuf>>, path: &Path, cfg: &Config) {
740    if !matches!(cfg.action, Action::PreprocProduce) {
741        return;
742    }
743    let Some(fname) = path.file_name().and_then(|n| n.to_str()) else {
744        return;
745    };
746    let file_name = fname.to_string();
747    match all_files.entry(file_name) {
748        hash_map::Entry::Occupied(l) => {
749            l.into_mut().push(path.to_path_buf());
750        }
751        hash_map::Entry::Vacant(p) => {
752            p.insert(vec![path.to_path_buf()]);
753        }
754    }
755}
756
757fn resolve_language(typ: Option<&str>, action: &Action) -> Option<LANG> {
758    // Force `Preproc` for the producer so `act_on_file`'s "skip
759    // unrecognized" guard never fires — every walked file must reach the
760    // dispatch where the producer runs its own Cpp check.
761    if matches!(action, Action::PreprocProduce) {
762        return Some(LANG::Preproc);
763    }
764    match typ.unwrap_or("") {
765        "" => None,
766        "ccomment" => Some(LANG::Ccomment),
767        "preproc" => Some(LANG::Preproc),
768        other => get_from_ext(other),
769    }
770}
771
772fn resolve_num_jobs(requested: Option<usize>) -> usize {
773    requested.map_or_else(
774        || {
775            std::cmp::max(
776                2,
777                available_parallelism()
778                    .unwrap_or_else(|e| {
779                        die(format_args!("could not get available parallelism: {e}"))
780                    })
781                    .get(),
782            ) - 1
783        },
784        |num_jobs| std::cmp::max(2, num_jobs) - 1,
785    )
786}
787
788/// Load existing preproc JSON for the consumer side. The producer side
789/// (`bca preproc`) builds its own `Mutex<PreprocResults>` directly.
790fn load_preproc_data(path: &Path) -> Arc<PreprocResults> {
791    let data = read_file(path).unwrap_or_else(|e| die_io("read preproc data", path, e));
792    let parsed = serde_json::from_slice::<PreprocResults>(&data)
793        .unwrap_or_else(|e| die_io("parse preproc JSON from", path, e));
794    Arc::new(parsed)
795}
796
797/// Read newline-separated paths from `src` (a path on disk or `-`
798/// for stdin). Skips blank/whitespace-only lines. `die`s on I/O
799/// failure with the failing line number.
800fn read_paths_from(src: &Path) -> Vec<PathBuf> {
801    if src.as_os_str() == "-" {
802        collect_path_lines(std::io::stdin().lock(), "--paths-from -")
803    } else {
804        let label = format!("--paths-from {}", src.display());
805        let f = std::fs::File::open(src).unwrap_or_else(|e| die(format_args!("{label}: {e}")));
806        collect_path_lines(std::io::BufReader::new(f), &label)
807    }
808}
809
810/// Drain `reader` into `PathBuf`s, one per non-blank line. `die`s on
811/// I/O failure, prefixing the message with `label` and the failing
812/// line number so the caller can identify the source.
813fn collect_path_lines<R: std::io::BufRead>(reader: R, label: &str) -> Vec<PathBuf> {
814    reader
815        .lines()
816        .enumerate()
817        .filter_map(|(i, r)| {
818            let line = r.unwrap_or_else(|e| {
819                die(format_args!("{label}: read error on line {}: {e}", i + 1))
820            });
821            let trimmed = line.trim();
822            (!trimmed.is_empty()).then(|| PathBuf::from(trimmed))
823        })
824        .collect()
825}
826
827/// Expand seed paths for the walk: union `--paths` with
828/// `--paths-from`, then for each seed:
829///   - file → keep as-is (explicit override of any ignore rules);
830///   - directory → expand via `ignore::WalkBuilder`, gitignore-aware
831///     unless `no_ignore` is set.
832///
833/// Returns a flat `Vec<PathBuf>` of files. Include/exclude globs are
834/// applied later by `explore()`, matching today's semantics.
835fn expand_seed_paths(
836    paths: Vec<PathBuf>,
837    paths_from: Option<PathBuf>,
838    no_ignore: bool,
839) -> Vec<PathBuf> {
840    use ignore::WalkBuilder;
841    let mut seeds = paths;
842    if let Some(src) = paths_from {
843        seeds.extend(read_paths_from(&src));
844    }
845    let mut out: Vec<PathBuf> = Vec::new();
846    for seed in seeds {
847        if !seed.exists() {
848            // Match today's `explore()` behavior: warn, do not die.
849            eprintln!("Warning: File doesn't exist: {}", seed.display());
850            continue;
851        }
852        if seed.is_file() {
853            out.push(seed);
854            continue;
855        }
856        let mut wb = WalkBuilder::new(&seed);
857        wb.hidden(true)
858            .follow_links(false)
859            .require_git(false)
860            .git_ignore(!no_ignore)
861            .git_exclude(!no_ignore)
862            .git_global(!no_ignore)
863            .ignore(!no_ignore)
864            .parents(!no_ignore);
865        for entry in wb.build() {
866            let entry = entry
867                .unwrap_or_else(|e| die(format_args!("walk error in {}: {e}", seed.display())));
868            if entry.file_type().is_some_and(|t| t.is_file()) {
869                out.push(entry.into_path());
870            }
871        }
872    }
873    out
874}
875
876fn run_walk(globals: GlobalOpts, cfg: Config) -> HashMap<String, Vec<PathBuf>> {
877    let include = mk_globset(globals.include).unwrap_or_else(|e| die(e));
878    let exclude = mk_globset(globals.exclude).unwrap_or_else(|e| die(e));
879    let num_jobs = resolve_num_jobs(globals.num_jobs);
880    let paths = expand_seed_paths(globals.paths, globals.paths_from, globals.no_ignore);
881    let files_data = FilesData {
882        include,
883        exclude,
884        paths,
885    };
886    ConcurrentRunner::new(num_jobs, act_on_file)
887        .set_proc_dir_paths(process_dir_path)
888        .run(cfg, files_data)
889        .unwrap_or_else(|e| die(format_args!("{e:?}")))
890}
891
892/// Load a `[thresholds]` table from `path`, returning the parsed map.
893/// On any I/O or parse error the process dies with exit code 1, keeping
894/// exit 2 reserved for the "thresholds exceeded" case.
895fn load_threshold_config(path: &Path) -> BTreeMap<String, f64> {
896    let bytes = read_file(path).unwrap_or_else(|e| die_io("read threshold config", path, e));
897    let text = std::str::from_utf8(&bytes)
898        .unwrap_or_else(|e| die_io("decode UTF-8 from threshold config", path, e));
899    let cfg: ThresholdConfig =
900        toml::from_str(text).unwrap_or_else(|e| die_io("parse threshold config", path, e));
901    cfg.thresholds
902}
903
904/// Load a baseline file. Same error contract as `load_threshold_config`:
905/// any I/O, UTF-8, or schema error dies with exit code 1.
906fn load_baseline(path: &Path) -> Baseline {
907    let bytes = read_file(path).unwrap_or_else(|e| die_io("read baseline", path, e));
908    let text = std::str::from_utf8(&bytes)
909        .unwrap_or_else(|e| die_io("decode UTF-8 from baseline", path, e));
910    Baseline::from_str(text).unwrap_or_else(|e| die_io("parse baseline", path, e))
911}
912
913/// Write `bytes` to `path` atomically: create the parent directory if
914/// needed, write to `<path>.bca-tmp`, then rename. Survives a `kill -9`
915/// mid-write — the consumer sees either the previous file or the
916/// fully-written new file, never a half-written one.
917///
918/// The suffix is *appended* to the full path rather than replacing the
919/// extension, so a user-supplied path like `foo.tmp` does not collide
920/// with the temporary file. On rename failure (e.g. cross-filesystem
921/// `EXDEV`, permission denied) the temporary file is removed best-effort
922/// before propagating the original error.
923fn write_atomic(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
924    if let Some(parent) = path.parent()
925        && !parent.as_os_str().is_empty()
926    {
927        std::fs::create_dir_all(parent)?;
928    }
929    let mut tmp = path.as_os_str().to_os_string();
930    tmp.push(".bca-tmp");
931    let tmp = PathBuf::from(tmp);
932    std::fs::write(&tmp, bytes)?;
933    std::fs::rename(&tmp, path).inspect_err(|_| {
934        // Cleanup is best-effort; if the rename failed the user already
935        // has an error to report, and a leftover .bca-tmp removal that
936        // fails would only obscure it.
937        let _ = std::fs::remove_file(&tmp);
938    })
939}
940
941/// Drive the `check` subcommand: build the threshold set, walk the
942/// source tree, drain violations, and exit 0 / 2 per the contract.
943fn run_check(globals: GlobalOpts, args: CheckArgs, preproc: Option<Arc<PreprocResults>>) {
944    // Validate --output / --output-format pairing before the walk so
945    // a misconfigured invocation fails fast instead of after a full
946    // parse. `--output` without `--output-format` is silently ignored
947    // — only the human stderr stream is emitted, which is the
948    // default contract — to keep the simplest invocation
949    // (`bca check --threshold ... --no-fail > /dev/null`) frictionless.
950    if let Some(fmt) = args.output_format
951        && let Some(ref out) = args.output
952        && out.exists()
953        && out.is_dir()
954    {
955        die(format_args!(
956            "--output must be a file path for `check --output-format {}`",
957            fmt.name()
958        ));
959    }
960
961    let mut merged: BTreeMap<String, f64> = args
962        .config
963        .as_deref()
964        .map(load_threshold_config)
965        .unwrap_or_default();
966    // CLI flags override config values for the same metric name.
967    for (name, limit) in args.thresholds {
968        merged.insert(name, limit);
969    }
970    let set = ThresholdSet::build(&merged).unwrap_or_else(|e| die(e));
971    if set.is_empty() {
972        die("no thresholds configured; pass --threshold or --config");
973    }
974    let set = Arc::new(set);
975
976    let (tx, rx) = std::sync::mpsc::channel();
977    let files_dispatched = Arc::new(AtomicUsize::new(0));
978    let cfg = Config {
979        threshold_set: Some(Arc::clone(&set)),
980        check_tx: Some(Mutex::new(tx)),
981        files_dispatched: Some(Arc::clone(&files_dispatched)),
982        suppression_policy: SuppressionPolicy::from_no_suppress(args.no_suppress),
983        ..Config::new(Action::Check, &globals, preproc)
984    };
985    run_walk(globals, cfg);
986
987    if files_dispatched.load(Ordering::Relaxed) == 0 {
988        // No files survived `--paths` expansion + `--include`/`--exclude`
989        // filtering. Treat this as a tool error (exit 1), not a clean
990        // pass (exit 0): a typo in `--paths` would otherwise silently
991        // green-light CI.
992        die("bca check: no input files matched; check --paths, --include, --exclude");
993    }
994
995    // Workers have all joined by the time `run_walk` returns, so the
996    // sender side is dropped and `rx.into_iter()` terminates cleanly.
997    let mut violations: Vec<Violation> = rx.into_iter().collect();
998    // Stable, deterministic stderr output: by path, then start line, then
999    // metric name. Different runs over the same tree produce identical
1000    // output, which CI diff tooling relies on.
1001    violations.sort_by(|a, b| {
1002        a.path
1003            .cmp(&b.path)
1004            .then(a.start_line.cmp(&b.start_line))
1005            .then(a.metric.cmp(b.metric))
1006    });
1007
1008    if let Some(path) = args.write_baseline {
1009        let file = baseline::from_violations(violations);
1010        let entry_count = file.entries.len();
1011        let text = baseline::render(&file)
1012            .unwrap_or_else(|e| die(format_args!("serialize baseline: {e}")));
1013        write_atomic(&path, text.as_bytes()).unwrap_or_else(|e| die_io("write baseline", &path, e));
1014        eprintln!(
1015            "bca: wrote {entry_count} baseline entries to {}",
1016            path.display()
1017        );
1018        return;
1019    }
1020
1021    let violations: Vec<Violation> = if let Some(path) = args.baseline.as_deref() {
1022        let baseline = load_baseline(path);
1023        let before = violations.len();
1024        let kept: Vec<Violation> = violations
1025            .into_iter()
1026            .filter(|v| !baseline.covers(v))
1027            .collect();
1028        let filtered = before - kept.len();
1029        if filtered > 0 {
1030            eprintln!("bca: filtered {filtered} violations via baseline");
1031        }
1032        kept
1033    } else {
1034        violations
1035    };
1036
1037    // BrokenPipe on stderr (e.g. when piped to `head`) is the only
1038    // realistic write failure here; swallow it rather than die so the
1039    // exit-code contract is honored.
1040    let mut stderr = std::io::stderr().lock();
1041    for v in &violations {
1042        let _ = writeln!(stderr, "{v}");
1043    }
1044
1045    // Emit the aggregated CI/IDE document if requested. Empty input
1046    // produces a well-formed but offender-free document, which CI
1047    // consumers can ingest unchanged on clean runs. The exit-code
1048    // contract below is unaffected by this branch.
1049    let any_violations = !violations.is_empty();
1050    if let Some(fmt) = args.output_format {
1051        let offenders: Vec<_> = violations.into_iter().map(violation_to_offender).collect();
1052        fmt.dump(&offenders, args.output.as_deref())
1053            .unwrap_or_else(|e| die(format_args!("failed to write {}: {e}", fmt.name())));
1054    }
1055
1056    if any_violations && !args.no_fail {
1057        process::exit(2);
1058    }
1059}
1060
1061/// Parse `std::env::args_os()` and execute the selected `bca`
1062/// subcommand. Intended to be called from the `bca` binary's `main`,
1063/// which is a one-liner over this function.
1064///
1065/// # Termination contract
1066///
1067/// This function **may terminate the calling process** rather than
1068/// return. It is not a re-entrant library entry point:
1069///
1070/// - clap argument-parsing failures bubble up through
1071///   [`clap::Error::exit`] (exit 0 on `--help` / `--version`, exit 2
1072///   on usage errors).
1073/// - User-input errors (invalid threshold spec, unreadable preproc
1074///   data, missing `--output` parent directory, walk errors, mutually
1075///   exclusive output-format combinations, broken-pipe writes, etc.)
1076///   call `process::exit(1)` via internal `die` / `die_io` helpers.
1077/// - The `check` subcommand calls `process::exit(2)` when any
1078///   threshold is exceeded, reserving exit 1 for tool errors so CI can
1079///   distinguish "metric regression" from "tool crashed".
1080///
1081/// Hosts that call [`run`] will be torn down on any of those paths
1082/// without unwinding. If you need to drive the same functionality from
1083/// inside another process, use the [`big_code_analysis`] library crate
1084/// directly instead of going through this entry point.
1085pub fn run() {
1086    let cli = match Cli::try_parse() {
1087        Ok(cli) => cli,
1088        Err(err) => {
1089            if matches!(
1090                err.kind(),
1091                clap::error::ErrorKind::UnknownArgument
1092                    | clap::error::ErrorKind::InvalidSubcommand
1093                    | clap::error::ErrorKind::InvalidValue
1094                    | clap::error::ErrorKind::MissingSubcommand
1095                    | clap::error::ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand
1096            ) && let Some(hint) = legacy_hint(std::env::args_os())
1097            {
1098                eprintln!("{hint}");
1099            }
1100            err.exit();
1101        }
1102    };
1103
1104    let preproc = cli
1105        .globals
1106        .preproc_data
1107        .as_ref()
1108        .map(|p| load_preproc_data(p));
1109
1110    match cli.command {
1111        Command::ListMetrics(args) => {
1112            let mut buf = Vec::new();
1113            write_metrics(&mut buf, args.mode).expect("writing to Vec<u8> is infallible");
1114            write_stdout_or_die(&buf);
1115        }
1116        Command::Dump => {
1117            let cfg = Config::new(Action::Dump, &cli.globals, preproc);
1118            run_walk(cli.globals, cfg);
1119        }
1120        Command::Functions => {
1121            let cfg = Config::new(Action::Functions, &cli.globals, preproc);
1122            run_walk(cli.globals, cfg);
1123        }
1124        Command::Metrics(args) => {
1125            if matches!(args.output_format, Some(MetricsFormat::Cbor)) && args.output.is_none() {
1126                die(CBOR_STDOUT_ERROR);
1127            }
1128            if args.output_format.is_some()
1129                && let Some(ref out) = args.output
1130                && out.exists()
1131                && !out.is_dir()
1132            {
1133                die("--output must be a directory for `metrics`");
1134            }
1135            let action = Action::Metrics {
1136                format: args.output_format,
1137                pretty: args.pretty,
1138            };
1139            let cfg = Config {
1140                output: args.output,
1141                ..Config::new(action, &cli.globals, preproc)
1142            };
1143            run_walk(cli.globals, cfg);
1144        }
1145        Command::Ops(args) => {
1146            if matches!(args.output_format, Some(MetricsFormat::Cbor)) && args.output.is_none() {
1147                die(CBOR_STDOUT_ERROR);
1148            }
1149            if let Some(MetricsDispatch::Csv) = args.output_format.map(MetricsFormat::dispatch) {
1150                die(
1151                    "CSV is not supported by `ops` because its column schema is metric-shaped; use `bca metrics --output-format <fmt>`",
1152                );
1153            }
1154            if args.output_format.is_some()
1155                && let Some(ref out) = args.output
1156                && out.exists()
1157                && !out.is_dir()
1158            {
1159                die("--output must be a directory for `ops`");
1160            }
1161            let action = Action::Ops {
1162                format: args.output_format,
1163                pretty: args.pretty,
1164            };
1165            let cfg = Config {
1166                output: args.output,
1167                ..Config::new(action, &cli.globals, preproc)
1168            };
1169            run_walk(cli.globals, cfg);
1170        }
1171        Command::Report(args) => {
1172            if let Some(ref output) = args.output {
1173                if output.exists() && output.is_dir() {
1174                    die("--output must be a file path for `report`");
1175                }
1176                if let Some(parent) = output.parent()
1177                    && !parent.as_os_str().is_empty()
1178                    && !parent.exists()
1179                {
1180                    die(format_args!(
1181                        "parent directory of --output does not exist: {}",
1182                        parent.display()
1183                    ));
1184                }
1185            }
1186            let (tx, rx) = std::sync::mpsc::channel();
1187            let cfg = Config {
1188                markdown_tx: Some(Mutex::new(tx)),
1189                strip_prefix: args.strip_prefix,
1190                ..Config::new(Action::Report, &cli.globals, preproc)
1191            };
1192            run_walk(cli.globals, cfg);
1193
1194            // ConcurrentRunner::run() consumed Config (and thus the Sender).
1195            // All worker threads have joined, so `rx.into_iter()` terminates.
1196            let summaries: Vec<FunctionSummary> = rx.into_iter().collect();
1197            let report = match args.format {
1198                ReportFormat::Markdown => generate_report(&summaries, args.top as usize),
1199                ReportFormat::Html => generate_html_report(&summaries, args.top as usize),
1200            };
1201            if let Some(ref output_path) = args.output {
1202                std::fs::write(output_path, &report)
1203                    .unwrap_or_else(|e| die_io("write report to", output_path, e));
1204            } else {
1205                write_stdout_or_die(report.as_bytes());
1206            }
1207        }
1208        Command::Find(args) => {
1209            let cfg = Config::new(Action::Find(args.nodes.into()), &cli.globals, preproc);
1210            run_walk(cli.globals, cfg);
1211        }
1212        Command::Count(args) => {
1213            let count_lock = Arc::new(Mutex::new(Count::default()));
1214            let cfg = Config {
1215                count_lock: Some(count_lock.clone()),
1216                ..Config::new(Action::Count(args.nodes.into()), &cli.globals, preproc)
1217            };
1218            run_walk(cli.globals, cfg);
1219
1220            let count = Arc::try_unwrap(count_lock)
1221                .expect("all worker threads have joined; Arc refcount is 1")
1222                .into_inner()
1223                .expect("mutex not poisoned");
1224            println!("{count}");
1225        }
1226        Command::StripComments(args) => {
1227            let action = Action::StripComments {
1228                in_place: args.in_place,
1229            };
1230            let cfg = Config::new(action, &cli.globals, preproc);
1231            run_walk(cli.globals, cfg);
1232        }
1233        Command::Check(args) => {
1234            run_check(cli.globals, args, preproc);
1235        }
1236        Command::Preproc(args) => {
1237            let preproc_lock = Arc::new(Mutex::new(PreprocResults::default()));
1238            let output = args.output;
1239            let cfg = Config {
1240                preproc_lock: Some(preproc_lock.clone()),
1241                ..Config::new(Action::PreprocProduce, &cli.globals, None)
1242            };
1243            let all_files = run_walk(cli.globals, cfg);
1244
1245            let mut data = Arc::try_unwrap(preproc_lock)
1246                .expect("all worker threads have joined; Arc refcount is 1")
1247                .into_inner()
1248                .expect("mutex not poisoned");
1249            fix_includes(&mut data.files, &all_files);
1250
1251            let serialized = serde_json::to_string(&data)
1252                .unwrap_or_else(|e| die(format_args!("failed to serialize preproc data: {e}")));
1253            if let Some(output_path) = output {
1254                write_file(&output_path, serialized.as_bytes())
1255                    .unwrap_or_else(|e| die_io("write preproc output to", &output_path, e));
1256            } else {
1257                println!("{serialized}");
1258            }
1259        }
1260    }
1261}
1262
1263/// Names of every subcommand on the new CLI. Kept in sync with the
1264/// `Command` enum by `tests::subcommands_match_command_enum`, which
1265/// fails if the two ever drift.
1266const SUBCOMMANDS: &[&str] = &[
1267    "metrics",
1268    "ops",
1269    "report",
1270    "dump",
1271    "find",
1272    "count",
1273    "functions",
1274    "strip-comments",
1275    "preproc",
1276    "list-metrics",
1277    "check",
1278];
1279
1280/// Decode the value of `-O <v>` / `--output-format <v>` /
1281/// `--output-format=<v>` / `-O<v>` from a flat argv slice. Returns
1282/// the first match (callers pre-filter the slice to the legacy
1283/// invocation's tokens, so a single occurrence is the realistic
1284/// case).
1285fn parse_output_format_value(args: &[String]) -> Option<&str> {
1286    args.iter().enumerate().find_map(|(i, a)| {
1287        let s = a.as_str();
1288        if s == "-O" || s == "--output-format" {
1289            args.get(i + 1).map(String::as_str)
1290        } else if let Some(rest) = s.strip_prefix("--output-format=") {
1291            Some(rest)
1292        } else {
1293            s.strip_prefix("-O").filter(|r| !r.is_empty())
1294        }
1295    })
1296}
1297
1298/// Scan `args` for `-O <offender>` / `--output-format <offender>` /
1299/// `--output-format=<offender>` against the four moved formats (any
1300/// variant of [`AggregatedFormat`]) and build a migration hint
1301/// pointing at `bca check`. Returns `None` when no offender format
1302/// is found, so the caller can fall through to clap's own error.
1303fn offender_format_migration_hint(args: &[String]) -> Option<String> {
1304    let fmt =
1305        parse_output_format_value(args).filter(|f| AggregatedFormat::from_str(f, true).is_ok())?;
1306    Some(format!(
1307        "note: -O {fmt} moved to `bca check` in #235; offender formats are no longer accepted on `bca metrics` / `bca ops`.\n  bca metrics -O {fmt} ...  ->  bca check --threshold <metric>=<limit> --output-format {fmt} [--output FILE]\n  Run `bca check --help` for the threshold and output-format flags.\n"
1308    ))
1309}
1310
1311/// If `argv` looks like an invocation of the pre-restructure CLI, return a
1312/// hint pointing the user at the new equivalent. Called only when clap
1313/// rejects the input, so the goal is to make the failure actionable.
1314///
1315/// The hint is best-effort and conservative: it triggers only on tokens
1316/// that are unambiguously legacy (action flags removed in the rewrite, or
1317/// `-O markdown` whose value no longer exists on `metrics`).
1318fn legacy_hint(argv: impl IntoIterator<Item = OsString>) -> Option<String> {
1319    let args: Vec<String> = argv
1320        .into_iter()
1321        .skip(1) // program name
1322        .filter_map(|s| s.into_string().ok())
1323        .collect();
1324    if args.is_empty() {
1325        return None;
1326    }
1327
1328    // If the user invoked a known new-CLI subcommand, they're not on
1329    // the legacy CLI; stay quiet so we don't second-guess legitimate
1330    // args that happen to look like old flags (e.g. `find --dump`
1331    // where the user intended `--dump` as a positional node-type
1332    // value). The one exception is `bca metrics|ops --output-format
1333    // <offender>` — the four offender formats moved to `bca check`
1334    // (issue #235) and the user still needs a one-line pointer at
1335    // the new home.
1336    if let Some(sub) = args.iter().find(|a| SUBCOMMANDS.contains(&a.as_str())) {
1337        if matches!(sub.as_str(), "metrics" | "ops")
1338            && let Some(hint) = offender_format_migration_hint(&args)
1339        {
1340            return Some(hint);
1341        }
1342        return None;
1343    }
1344
1345    // Action flags removed by the rewrite. Each one is unambiguously legacy.
1346    let action_map: &[(&str, &str)] = &[
1347        ("--metrics", "bca metrics"),
1348        ("-m", "bca metrics"),
1349        ("--ops", "bca ops"),
1350        ("--dump", "bca dump"),
1351        ("-d", "bca dump"),
1352        ("--comments", "bca strip-comments [--in-place]"),
1353        ("--function", "bca functions"),
1354        ("-F", "bca functions"),
1355        ("--find", "bca find <NODE> [<NODE>...]"),
1356        ("-f", "bca find <NODE> [<NODE>...]"),
1357        ("--count", "bca count <NODE> [<NODE>...]"),
1358        ("-C", "bca count <NODE> [<NODE>...]"),
1359        ("--list-metrics", "bca list-metrics [names|descriptions]"),
1360        (
1361            "--preproc",
1362            "bca preproc -o OUT.json  (or --preproc-data on consumers)",
1363        ),
1364    ];
1365
1366    let mut lines: Vec<String> = Vec::new();
1367    let mut saw_legacy_action = false;
1368
1369    for arg in &args {
1370        let head = arg.split('=').next().unwrap_or(arg);
1371        if let Some((_, replacement)) = action_map.iter().find(|(old, _)| *old == head) {
1372            saw_legacy_action = true;
1373            lines.push(format!("  {head}  ->  {replacement}"));
1374        }
1375    }
1376
1377    // -O markdown / --output-format markdown is the canonical legacy form
1378    // for the aggregated report. `markdown` is no longer a valid metrics
1379    // format value, so seeing it here is unambiguous.
1380    let format_value = parse_output_format_value(&args);
1381    if format_value == Some("markdown") {
1382        saw_legacy_action = true;
1383        lines.push(String::from(
1384            "  -O markdown  ->  bca report markdown|html [--top N] [--strip-prefix P]",
1385        ));
1386    } else if let Some(fmt) = format_value
1387        && saw_legacy_action
1388    {
1389        // Only suggest a metrics-format mapping when we already confirmed
1390        // this is a legacy invocation; otherwise `-O json` survives in the
1391        // new CLI and we shouldn't second-guess it.
1392        lines.push(format!("  -O {fmt}  ->  bca metrics -O {fmt}"));
1393    }
1394
1395    if !saw_legacy_action {
1396        return None;
1397    }
1398
1399    let mut hint = String::from(
1400        "note: the CLI was restructured into subcommands. See migration.md for the full mapping.\n",
1401    );
1402    for line in &lines {
1403        hint.push_str(line);
1404        hint.push('\n');
1405    }
1406    hint.push_str("  Run `bca --help` for the new command list.\n");
1407    Some(hint)
1408}
1409
1410#[cfg(test)]
1411#[allow(
1412    clippy::float_cmp,
1413    clippy::cast_precision_loss,
1414    clippy::cast_possible_truncation,
1415    clippy::cast_sign_loss,
1416    clippy::similar_names,
1417    clippy::doc_markdown,
1418    clippy::needless_raw_string_hashes,
1419    clippy::too_many_lines
1420)]
1421mod tests {
1422    use super::*;
1423
1424    fn test_config(action: Action) -> Config {
1425        Config {
1426            action,
1427            output: None,
1428            language: None,
1429            line_start: None,
1430            line_end: None,
1431            preproc_lock: None,
1432            preproc: None,
1433            count_lock: None,
1434            markdown_tx: None,
1435            strip_prefix: String::new(),
1436            threshold_set: None,
1437            check_tx: None,
1438            files_dispatched: None,
1439            suppression_policy: SuppressionPolicy::Honor,
1440            warning: false,
1441            skip_generated: true,
1442            report_skipped: false,
1443            exclude_tests: false,
1444        }
1445    }
1446
1447    #[test]
1448    fn process_dir_path_noop_outside_preproc() {
1449        let cfg = test_config(Action::Dump);
1450        let mut all_files = HashMap::new();
1451        process_dir_path(&mut all_files, Path::new("/some/file.cpp"), &cfg);
1452        assert!(all_files.is_empty());
1453    }
1454
1455    #[test]
1456    fn process_dir_path_inserts_valid_utf8_filename() {
1457        let cfg = test_config(Action::PreprocProduce);
1458        let mut all_files = HashMap::new();
1459        process_dir_path(&mut all_files, Path::new("/some/dir/foo.cpp"), &cfg);
1460        assert_eq!(all_files.len(), 1);
1461        assert_eq!(
1462            all_files["foo.cpp"],
1463            vec![PathBuf::from("/some/dir/foo.cpp")]
1464        );
1465    }
1466
1467    #[test]
1468    fn process_dir_path_groups_duplicate_filenames() {
1469        let cfg = test_config(Action::PreprocProduce);
1470        let mut all_files = HashMap::new();
1471        process_dir_path(&mut all_files, Path::new("/a/foo.cpp"), &cfg);
1472        process_dir_path(&mut all_files, Path::new("/b/foo.cpp"), &cfg);
1473        assert_eq!(all_files.len(), 1);
1474        assert_eq!(
1475            all_files["foo.cpp"],
1476            vec![PathBuf::from("/a/foo.cpp"), PathBuf::from("/b/foo.cpp")]
1477        );
1478    }
1479
1480    #[cfg(unix)]
1481    #[test]
1482    fn process_dir_path_skips_non_utf8_filename() {
1483        use std::ffi::OsStr;
1484        use std::os::unix::ffi::OsStrExt;
1485
1486        let cfg = test_config(Action::PreprocProduce);
1487        let mut all_files = HashMap::new();
1488        let bad_name = OsStr::from_bytes(b"\xff\xfe");
1489        let path = PathBuf::from("/some/dir").join(bad_name);
1490        process_dir_path(&mut all_files, &path, &cfg);
1491        assert!(all_files.is_empty());
1492    }
1493
1494    // CLI parsing tests. The shape is now subcommand-driven, so these
1495    // exercise the shape of the top-level parser, not the legacy flag
1496    // mutual-exclusion rules.
1497
1498    fn parse(args: &[&str]) -> clap::error::Result<Cli> {
1499        Cli::try_parse_from(std::iter::once(&"cli").chain(args.iter()))
1500    }
1501
1502    #[test]
1503    fn no_subcommand_prints_help() {
1504        // arg_required_else_help: no args -> clap prints help and exits.
1505        // We just check parsing fails (either DisplayHelp or MissingSubcommand).
1506        assert!(parse(&[]).is_err());
1507    }
1508
1509    #[test]
1510    fn metrics_alone_parses() {
1511        assert!(parse(&["metrics"]).is_ok());
1512    }
1513
1514    #[test]
1515    fn metrics_with_format_parses() {
1516        assert!(parse(&["metrics", "-O", "json"]).is_ok());
1517    }
1518
1519    // Offender formats (Checkstyle, SARIF, clang-warning,
1520    // msvc-warning) moved from `bca metrics` to
1521    // `bca check --output-format` in issue #235. `MetricsFormat` no
1522    // longer enumerates them, so clap rejects them at parse time on
1523    // `metrics` and `ops`.
1524    #[test]
1525    fn metrics_rejects_checkstyle_format() {
1526        assert!(parse(&["metrics", "-O", "checkstyle"]).is_err());
1527    }
1528
1529    #[test]
1530    fn metrics_rejects_sarif_format() {
1531        assert!(parse(&["metrics", "-O", "sarif"]).is_err());
1532    }
1533
1534    #[test]
1535    fn metrics_rejects_clang_warning_format() {
1536        assert!(parse(&["metrics", "-O", "clang-warning"]).is_err());
1537    }
1538
1539    #[test]
1540    fn metrics_rejects_msvc_warning_format() {
1541        assert!(parse(&["metrics", "-O", "msvc-warning"]).is_err());
1542    }
1543
1544    #[test]
1545    fn check_accepts_sarif_output_format() {
1546        assert!(parse(&["check", "--threshold", "cyclomatic=10", "-O", "sarif"]).is_ok());
1547    }
1548
1549    #[test]
1550    fn check_accepts_checkstyle_output_format() {
1551        assert!(
1552            parse(&[
1553                "check",
1554                "--threshold",
1555                "cyclomatic=10",
1556                "--output-format",
1557                "checkstyle",
1558            ])
1559            .is_ok()
1560        );
1561    }
1562
1563    #[test]
1564    fn check_rejects_per_file_format_as_output_format() {
1565        // Per-file formats (json, csv, ...) live on `bca metrics`;
1566        // `bca check` only accepts the four offender formats.
1567        assert!(
1568            parse(&[
1569                "check",
1570                "--threshold",
1571                "cyclomatic=10",
1572                "--output-format",
1573                "json",
1574            ])
1575            .is_err()
1576        );
1577    }
1578
1579    // Note: runtime rejection of `ops -O csv` is covered by
1580    // `ops_rejects_csv_format_at_runtime` in
1581    // tests/action_enforcement.rs, which spawns the binary so the
1582    // dispatcher's die() can be observed.
1583
1584    #[test]
1585    fn metrics_rejects_markdown_format() {
1586        // ReportFormat::Markdown is not in MetricsFormat by construction.
1587        assert!(parse(&["metrics", "-O", "markdown"]).is_err());
1588    }
1589
1590    #[test]
1591    fn metrics_rejects_top_flag() {
1592        // --top lives only on `report`.
1593        assert!(parse(&["metrics", "--top", "5"]).is_err());
1594    }
1595
1596    #[test]
1597    fn metrics_rejects_strip_prefix_flag() {
1598        assert!(parse(&["metrics", "--strip-prefix", "/x"]).is_err());
1599    }
1600
1601    #[test]
1602    fn report_markdown_parses() {
1603        assert!(parse(&["report", "markdown"]).is_ok());
1604    }
1605
1606    #[test]
1607    fn report_html_parses() {
1608        // Inspect the parsed variant so a future alias / value-rename
1609        // that maps `html` to `Markdown` cannot pass this test.
1610        let cli = parse(&["report", "html"]).expect("`report html` parses");
1611        match cli.command {
1612            Command::Report(args) => assert_eq!(args.format, ReportFormat::Html),
1613            other => panic!("expected Command::Report, got {other:?}"),
1614        }
1615    }
1616
1617    #[test]
1618    fn report_requires_format() {
1619        assert!(parse(&["report"]).is_err());
1620    }
1621
1622    #[test]
1623    fn report_with_top_and_strip_prefix() {
1624        assert!(parse(&["report", "markdown", "--top", "10", "--strip-prefix", "/x/"]).is_ok());
1625    }
1626
1627    #[test]
1628    fn report_html_with_top_and_strip_prefix() {
1629        let cli = parse(&["report", "html", "--top", "10", "--strip-prefix", "/x/"])
1630            .expect("flags parse");
1631        match cli.command {
1632            Command::Report(args) => {
1633                assert_eq!(args.format, ReportFormat::Html);
1634                assert_eq!(args.top, 10);
1635                assert_eq!(args.strip_prefix, "/x/");
1636            }
1637            other => panic!("expected Command::Report, got {other:?}"),
1638        }
1639    }
1640
1641    #[test]
1642    fn report_top_zero_rejected() {
1643        assert!(parse(&["report", "markdown", "--top", "0"]).is_err());
1644    }
1645
1646    #[test]
1647    fn report_html_top_zero_rejected() {
1648        assert!(parse(&["report", "html", "--top", "0"]).is_err());
1649    }
1650
1651    #[test]
1652    fn ops_parses() {
1653        assert!(parse(&["ops", "-O", "json"]).is_ok());
1654    }
1655
1656    #[test]
1657    fn dump_parses() {
1658        assert!(parse(&["dump"]).is_ok());
1659    }
1660
1661    #[test]
1662    fn find_requires_a_node() {
1663        assert!(parse(&["find"]).is_err());
1664        assert!(parse(&["find", "call_expression"]).is_ok());
1665    }
1666
1667    #[test]
1668    fn count_requires_a_node() {
1669        assert!(parse(&["count"]).is_err());
1670        assert!(parse(&["count", "if_statement"]).is_ok());
1671    }
1672
1673    #[test]
1674    fn functions_parses() {
1675        assert!(parse(&["functions"]).is_ok());
1676    }
1677
1678    #[test]
1679    fn strip_comments_parses() {
1680        assert!(parse(&["strip-comments"]).is_ok());
1681        assert!(parse(&["strip-comments", "--in-place"]).is_ok());
1682    }
1683
1684    #[test]
1685    fn preproc_parses() {
1686        assert!(parse(&["preproc"]).is_ok());
1687        assert!(parse(&["preproc", "-o", "/tmp/x.json"]).is_ok());
1688    }
1689
1690    #[test]
1691    fn list_metrics_parses() {
1692        let cli = parse(&["list-metrics"]).expect("parses");
1693        assert!(matches!(cli.command, Command::ListMetrics(_)));
1694    }
1695
1696    #[test]
1697    fn list_metrics_with_descriptions() {
1698        let cli = parse(&["list-metrics", "descriptions"]).expect("parses");
1699        match cli.command {
1700            Command::ListMetrics(args) => assert_eq!(args.mode, ListMetricsMode::Descriptions),
1701            _ => panic!("expected ListMetrics"),
1702        }
1703    }
1704
1705    #[test]
1706    fn list_metrics_invalid_mode_rejected() {
1707        assert!(parse(&["list-metrics", "bogus"]).is_err());
1708    }
1709
1710    #[test]
1711    fn global_paths_works_before_or_after_subcommand() {
1712        assert!(parse(&["--paths", "x", "metrics"]).is_ok());
1713        assert!(parse(&["metrics", "--paths", "x"]).is_ok());
1714    }
1715
1716    fn os_args(args: &[&str]) -> Vec<OsString> {
1717        args.iter().map(|s| OsString::from(*s)).collect()
1718    }
1719
1720    #[test]
1721    fn legacy_hint_recognizes_old_metrics() {
1722        let hint = legacy_hint(os_args(&["cli", "--metrics", "-O", "markdown"])).expect("hint");
1723        assert!(hint.contains("report markdown"), "{hint}");
1724        assert!(hint.contains("--metrics"), "{hint}");
1725    }
1726
1727    #[test]
1728    fn legacy_hint_recognizes_output_format_json_with_legacy_action() {
1729        // -O json next to --metrics is unambiguously legacy and should
1730        // map to `bca metrics -O json`.
1731        let hint = legacy_hint(os_args(&["cli", "-m", "--output-format", "json"])).expect("hint");
1732        assert!(hint.contains("metrics -O json"), "{hint}");
1733    }
1734
1735    #[test]
1736    fn legacy_hint_returns_none_for_clean_args() {
1737        // Valid new-CLI args that just happen to also contain `-O` should
1738        // not trigger a legacy hint.
1739        let hint = legacy_hint(os_args(&["cli", "metrics", "-O", "json"]));
1740        assert!(hint.is_none());
1741    }
1742
1743    #[test]
1744    fn legacy_hint_returns_none_for_no_args() {
1745        let hint = legacy_hint(os_args(&["cli"]));
1746        assert!(hint.is_none());
1747    }
1748
1749    #[test]
1750    fn legacy_hint_recognizes_dash_o_markdown_alone() {
1751        // -O markdown is unambiguously legacy: markdown is not a
1752        // MetricsFormat value, so this pattern can only have come from the
1753        // pre-restructure CLI.
1754        let hint = legacy_hint(os_args(&["cli", "-O", "markdown"])).expect("hint");
1755        assert!(hint.contains("report markdown"), "{hint}");
1756    }
1757
1758    #[test]
1759    fn legacy_hint_redirects_metrics_offender_format_to_check() {
1760        // Issue #235: `bca metrics -O sarif` is no longer valid — the
1761        // offender formats live on `bca check` now. The hint should
1762        // point at the new home.
1763        let hint = legacy_hint(os_args(&["cli", "metrics", "-O", "sarif"])).expect("hint");
1764        assert!(hint.contains("bca check"), "{hint}");
1765        assert!(hint.contains("sarif"), "{hint}");
1766    }
1767
1768    #[test]
1769    fn legacy_hint_redirects_metrics_checkstyle_long_form() {
1770        let hint = legacy_hint(os_args(&[
1771            "cli",
1772            "metrics",
1773            "--output-format",
1774            "checkstyle",
1775        ]))
1776        .expect("hint");
1777        assert!(hint.contains("bca check"), "{hint}");
1778        assert!(hint.contains("checkstyle"), "{hint}");
1779    }
1780
1781    #[test]
1782    fn legacy_hint_redirects_ops_offender_format_to_check() {
1783        // Same migration story for `bca ops -O <offender>`.
1784        let hint = legacy_hint(os_args(&["cli", "ops", "-O", "clang-warning"])).expect("hint");
1785        assert!(hint.contains("bca check"), "{hint}");
1786        assert!(hint.contains("clang-warning"), "{hint}");
1787    }
1788
1789    #[test]
1790    fn legacy_hint_quiet_for_metrics_with_per_file_format() {
1791        // `bca metrics -O json` is still valid — no hint should fire.
1792        let hint = legacy_hint(os_args(&["cli", "metrics", "-O", "json"]));
1793        assert!(hint.is_none(), "{hint:?}");
1794    }
1795
1796    #[test]
1797    fn legacy_hint_quiet_when_user_invoked_known_subcommand() {
1798        // `bca find --dump` — user wants `--dump` as a positional node
1799        // type, not a legacy flag. Presence of a known subcommand (`find`)
1800        // suppresses the hint; clap's own "to pass '--dump' as a value,
1801        // use '-- --dump'" tip remains the right guidance.
1802        let hint = legacy_hint(os_args(&["cli", "find", "--dump"]));
1803        assert!(hint.is_none());
1804    }
1805
1806    #[test]
1807    fn legacy_hint_recognizes_dash_d() {
1808        // -d was the short form of --dump in the legacy CLI.
1809        let hint = legacy_hint(os_args(&["cli", "-d", "--paths", "."])).expect("hint");
1810        assert!(hint.contains("bca dump"), "{hint}");
1811    }
1812
1813    /// Sanity: `Cli::command()` builds without panicking. Catches misconfigured
1814    /// derive attributes (e.g., conflicting short flags) at test time.
1815    #[test]
1816    fn cli_is_well_formed() {
1817        use clap::CommandFactory;
1818        Cli::command().debug_assert();
1819    }
1820
1821    /// `SUBCOMMANDS` (used by `legacy_hint` to gate the migration message)
1822    /// must list every variant of the `Command` enum. If a future verb is
1823    /// added to `Command` and this list is not updated, `legacy_hint` will
1824    /// false-positive on that verb's arguments.
1825    #[test]
1826    fn subcommands_match_command_enum() {
1827        use clap::CommandFactory;
1828        use std::collections::HashSet;
1829        let from_clap: HashSet<String> = Cli::command()
1830            .get_subcommands()
1831            .map(|c| c.get_name().to_string())
1832            .filter(|n| n != "help") // clap auto-generates `help`
1833            .collect();
1834        let from_const: HashSet<String> = SUBCOMMANDS.iter().map(|s| (*s).to_string()).collect();
1835        assert_eq!(
1836            from_clap,
1837            from_const,
1838            "SUBCOMMANDS const drifted from Command enum: \
1839             missing from const = {missing:?}, missing from enum = {extra:?}",
1840            missing = from_clap.difference(&from_const).collect::<Vec<_>>(),
1841            extra = from_const.difference(&from_clap).collect::<Vec<_>>(),
1842        );
1843    }
1844}