big_code_analysis_cli/
lib.rs

1//! Library surface for the `bca` CLI.
2//!
3//! Exists so the workspace `xtask` crate can render man pages from the
4//! same `clap::Command` tree that `bca` parses at runtime — the binary
5//! `main` is a one-liner that delegates to [`run`].
6//!
7//! # Embedder contract
8//!
9//! This crate is published to crates.io to support man-page generation
10//! and to keep the binary's `main` trivial; it is **not** a re-entrant
11//! library API. [`run`] and the internal helpers it calls
12//! (`die` / `die_io`, `run_check`, etc.) terminate the calling process
13//! via [`std::process::exit`] on user-input errors (bad threshold
14//! specs, missing paths, parser failures, broken pipes, and so on)
15//! and on the `check` subcommand's "thresholds exceeded" exit-2 path.
16//! Hosting [`run`] inside another process will tear that process down
17//! without unwinding. If you need a re-entrant entry point, drive the
18//! [`big_code_analysis`] library crate directly.
19
20#![allow(
21    clippy::too_many_lines,
22    clippy::struct_excessive_bools,
23    clippy::similar_names,
24    clippy::needless_pass_by_value,
25    // `run` panics on a handful of provably-unreachable invariants
26    // (mutex poisoning where every worker thread has joined, channel
27    // sends after run_walk returns). Each one is documented at the
28    // call site with an `expect` reason — surfacing them in a `# Panics`
29    // section on the entry point adds noise without adding signal.
30    clippy::missing_panics_doc
31)]
32mod baseline;
33mod check_format;
34mod format_util;
35mod formats;
36mod html_report;
37mod markdown_report;
38mod metric_catalog;
39mod thresholds;
40
41use std::collections::{BTreeMap, HashMap, hash_map};
42use std::ffi::OsString;
43use std::fmt::Display;
44use std::io::{ErrorKind, Write};
45use std::path::{Path, PathBuf};
46use std::process;
47use std::sync::atomic::{AtomicUsize, Ordering};
48use std::sync::{Arc, Mutex};
49use std::thread::available_parallelism;
50
51use clap::{Args, Parser, Subcommand, ValueEnum};
52use globset::{Glob, GlobSet, GlobSetBuilder};
53
54use baseline::Baseline;
55use check_format::{AggregatedFormat, violation_to_offender};
56use formats::{CBOR_STDOUT_ERROR, MetricsDispatch, MetricsFormat, ReportFormat, dump_csv};
57use html_report::generate_html_report;
58use markdown_report::{FunctionSummary, extract_summaries, generate_report};
59use metric_catalog::{ListMetricsMode, write_metrics};
60use thresholds::{ThresholdConfig, ThresholdSet, Violation, parse_cli_threshold};
61
62use big_code_analysis::LANG;
63use big_code_analysis::ParserTrait;
64
65/// `expect` message used at every `action::<_>` call site below.
66///
67/// The CLI pins `big-code-analysis` with `features = ["all-languages"]`,
68/// so a `LANG` value that reached this point must be enabled at compile
69/// time. Any future caller that loosens the feature pin must change
70/// this invariant explicitly.
71const FEATURES_PINNED: &str = "CLI pins big-code-analysis features = [\"all-languages\"]";
72use big_code_analysis::{
73    CommentRm, CommentRmCfg, ConcurrentRunner, Count, CountCfg, Dump, DumpCfg, FilesData, Find,
74    FindCfg, Function, FunctionCfg, Metrics, MetricsCfg, MetricsOptions, OpsCfg, OpsCode,
75    PreprocParser, PreprocResults, SuppressionPolicy,
76};
77// The CLI is the canonical path-based caller: `bca` walks a tree on
78// disk and naturally has a `&Path` for every file it processes, so
79// the deprecated path-positional shims (`get_function_spaces_with_options`)
80// are still the most direct entry point here. Migration to the new
81// `Source` / `analyze` API tracks issue #254's follow-up; for now,
82// scope the deprecation lint to this single import to keep the rest
83// of the file clean.
84#[allow(deprecated)]
85use big_code_analysis::get_function_spaces_with_options;
86use big_code_analysis::{
87    action, fix_includes, get_from_ext, get_ops, guess_language, is_generated, preprocess,
88    read_file, read_file_with_eol, write_file,
89};
90
91fn die(msg: impl Display) -> ! {
92    eprintln!("Error: {msg}");
93    process::exit(1);
94}
95
96/// Die with `failed to <verb> <path>: <err>`. Centralizes the most common
97/// I/O error shape: open/read/parse/write of a user-supplied path that
98/// failed with an error implementing `Display`.
99fn die_io(verb: &str, path: &Path, err: impl Display) -> ! {
100    die(format_args!("failed to {verb} {}: {err}", path.display()))
101}
102
103/// Write `bytes` to stdout, tolerating `BrokenPipe` (the typical case when
104/// the consumer is `head`, `less`, etc.) and `die`ing on anything else.
105fn write_stdout_or_die(bytes: &[u8]) {
106    if let Err(e) = std::io::stdout().lock().write_all(bytes)
107        && e.kind() != ErrorKind::BrokenPipe
108    {
109        die(e);
110    }
111}
112
113/// Analyze source code.
114//
115// Single-line doc-comment kept in sync with the `about = "..."` attribute
116// below — clap promotes a doc-comment to `long_about`, which clap-mangen
117// renders into the manpage DESCRIPTION. The embedder contract for this
118// crate (which is why `Cli` is `pub` at all) lives in the crate-level
119// `//!` docs above, not here.
120#[derive(Parser, Debug)]
121#[clap(
122    name = "bca",
123    version,
124    author,
125    about = "Analyze source code.",
126    subcommand_required = true,
127    arg_required_else_help = true,
128    after_help = "Migrating from the flag-style CLI? See the migration guide:\n  big-code-analysis-book/src/migration.md"
129)]
130pub struct Cli {
131    #[clap(flatten)]
132    globals: GlobalOpts,
133    #[command(subcommand)]
134    command: Command,
135}
136
137#[derive(Args, Debug, Default)]
138struct GlobalOpts {
139    /// Input files or directories to analyze.
140    #[clap(long, short, value_parser, global = true)]
141    paths: Vec<PathBuf>,
142    /// Glob to include files.
143    #[clap(long, short = 'I', num_args(0..), global = true)]
144    include: Vec<String>,
145    /// Glob to exclude files.
146    #[clap(long, short = 'X', num_args(0..), global = true)]
147    exclude: Vec<String>,
148    /// Number of jobs.
149    #[clap(long, short = 'j', global = true)]
150    num_jobs: Option<usize>,
151    /// Force a language type instead of inferring from extension.
152    #[clap(long, short = 'l', global = true)]
153    language_type: Option<String>,
154    /// Line start (used by `dump` and `find`).
155    #[clap(long = "ls", global = true)]
156    line_start: Option<usize>,
157    /// Line end (used by `dump` and `find`).
158    #[clap(long = "le", global = true)]
159    line_end: Option<usize>,
160    /// Print warnings (skipped files, unrecognized languages).
161    #[clap(long, short, global = true)]
162    warning: bool,
163    /// Disable auto-skip of files marked as generated (e.g. `@generated`,
164    /// `DO NOT EDIT`, `GENERATED CODE` near the top). By default the CLI
165    /// skips such files so generated bindings do not skew metrics.
166    #[clap(long, global = true)]
167    no_skip_generated: bool,
168    /// Log a "skipped (generated): <path>" line to stderr for each file
169    /// auto-skipped by the generated-code detector. Useful for auditing
170    /// which files were excluded.
171    #[clap(long, global = true)]
172    report_skipped: bool,
173    /// Existing preprocessor-data JSON to consume during C/C++ analysis.
174    /// Use `bca preproc` to produce one.
175    #[clap(long, value_parser, global = true)]
176    preproc_data: Option<PathBuf>,
177    /// Read newline-separated input paths from a file. Use `-` to read
178    /// from stdin. Combined as a union with any `--paths` values; globs
179    /// still apply. Blank lines are skipped; `#` is treated as a path
180    /// character (not a comment). To pass a file literally named `-`,
181    /// use `./-`.
182    #[clap(long = "paths-from", value_parser, global = true)]
183    paths_from: Option<PathBuf>,
184    /// Read additional `--exclude` glob patterns from a file (one per
185    /// line, `.gitignore`-style). Blank lines and lines whose first
186    /// non-whitespace character is `#` are skipped. Use `-` to read
187    /// from stdin; to pass a file literally named `-`, use `./-`.
188    /// Patterns are unioned with any `--exclude` values into a single
189    /// deny-set; order does not matter. Convention is a `.bcaignore`
190    /// at the repo root, mirroring `.gitignore` / `.dockerignore`.
191    #[clap(long = "exclude-from", value_parser, global = true)]
192    exclude_from: Option<PathBuf>,
193    /// Disable `.gitignore` / `.ignore` / global gitignore awareness
194    /// when expanding directory seeds. Explicit file paths are always
195    /// honored regardless of this flag.
196    #[clap(long = "no-ignore", global = true)]
197    no_ignore: bool,
198    /// Exclude inline test code from metric computation. Currently
199    /// applies to Rust only (skips `#[test]`, `#[cfg(test)]`,
200    /// `#[tokio::test]`, `#[rstest]`, `#![cfg(test)]` items and
201    /// their subtrees). Default is off — every node is counted, so
202    /// numbers match the pre-#182 behaviour byte-for-byte. Languages
203    /// without a `Checker::should_skip_subtree` override ignore this
204    /// flag.
205    #[clap(long = "exclude-tests", global = true)]
206    exclude_tests: bool,
207}
208
209#[derive(Subcommand, Debug)]
210enum Command {
211    /// Compute per-file metrics and emit them in a structured format.
212    Metrics(StructuredArgs),
213    /// Extract per-file operands and operators.
214    Ops(StructuredArgs),
215    /// Generate an aggregated report across the analyzed source.
216    Report(ReportArgs),
217    /// Dump the AST to stdout.
218    Dump,
219    /// Find nodes of one or more types.
220    Find(NodesArgs),
221    /// Count nodes of one or more types.
222    Count(NodesArgs),
223    /// List functions/methods and their spans.
224    Functions,
225    /// Remove comments from source files.
226    StripComments(StripCommentsArgs),
227    /// Generate preprocessor-data JSON for C/C++ analysis.
228    Preproc(PreprocArgs),
229    /// List the metrics this tool can compute and exit.
230    ListMetrics(ListMetricsArgs),
231    /// Check per-function metrics against thresholds. Exits 2 when any
232    /// threshold is exceeded; reserve exit 1 for tool errors so CI can
233    /// distinguish "metric regression" from "tool crashed".
234    Check(CheckArgs),
235}
236
237/// Shared shape for `metrics` and `ops`: same format set, same output
238/// semantics (directory of per-file emissions; stdout if omitted).
239#[derive(Args, Debug)]
240struct StructuredArgs {
241    /// Output format.
242    #[clap(long, short = 'O', value_enum)]
243    output_format: Option<MetricsFormat>,
244    /// Output directory. Filenames mirror input paths plus the format
245    /// extension. Stdout if omitted (CBOR requires this flag).
246    #[clap(long, short, value_parser)]
247    output: Option<PathBuf>,
248    /// Pretty-print JSON / TOML output.
249    #[clap(long)]
250    pretty: bool,
251}
252
253#[derive(Args, Debug)]
254struct ReportArgs {
255    /// Report format.
256    #[clap(value_enum)]
257    format: ReportFormat,
258    /// Output file. Stdout if omitted.
259    #[clap(long, short, value_parser)]
260    output: Option<PathBuf>,
261    /// Maximum number of entries per hotspot table.
262    #[clap(long, default_value_t = 20, value_parser = clap::value_parser!(u32).range(1..))]
263    top: u32,
264    /// Path prefix to strip from displayed file paths.
265    #[clap(long, default_value = "")]
266    strip_prefix: String,
267}
268
269#[derive(Args, Debug)]
270struct NodesArgs {
271    /// Node-type names. Pass one or more, space-separated.
272    #[clap(required = true, num_args = 1..)]
273    nodes: Vec<String>,
274}
275
276#[derive(Args, Debug)]
277struct StripCommentsArgs {
278    /// Rewrite each input file in place instead of writing to stdout.
279    #[clap(long)]
280    in_place: bool,
281}
282
283#[derive(Args, Debug)]
284struct PreprocArgs {
285    /// Output JSON file. Stdout if omitted.
286    #[clap(long, short, value_parser)]
287    output: Option<PathBuf>,
288}
289
290#[derive(Args, Debug)]
291struct CheckArgs {
292    /// Threshold expressed as `<metric>=<limit>`. Repeatable. Metric
293    /// names match `bca list-metrics`; sub-metrics use a dotted form
294    /// (e.g. `loc.lloc`, `halstead.volume`). CLI flags override values
295    /// from `--config`. Limits must be finite and non-negative; `0` is
296    /// allowed and means "no value permitted".
297    #[clap(long = "threshold", value_parser = parse_cli_threshold)]
298    thresholds: Vec<(String, f64)>,
299    /// Path to a TOML config with a `[thresholds]` table:
300    ///
301    /// ```toml
302    /// [thresholds]
303    /// cyclomatic = 15
304    /// "loc.lloc" = 200
305    /// ```
306    #[clap(long, value_parser)]
307    config: Option<PathBuf>,
308    /// Print offenders to stderr but exit 0 even when thresholds are
309    /// exceeded. Useful while adopting baselines without flipping CI red.
310    /// Default: exit 2 when any threshold is exceeded.
311    #[clap(long = "no-fail")]
312    no_fail: bool,
313    /// Ignore in-source suppression markers (`bca: suppress`,
314    /// `#lizard forgives`, etc.). Every threshold violation is
315    /// reported regardless of comment-based silencers. CI auditors
316    /// pass this to see the raw, un-silenced offender list.
317    #[clap(long = "no-suppress")]
318    no_suppress: bool,
319    /// CI/IDE document format for offender records (Checkstyle 4.3 XML,
320    /// SARIF 2.1.0 JSON, clang/GCC warning lines, MSVC warning lines).
321    /// When omitted, only the human-readable stderr stream is emitted;
322    /// the exit-code contract is unaffected.
323    #[clap(long = "output-format", short = 'O', value_enum)]
324    output_format: Option<AggregatedFormat>,
325    /// File path for the aggregated offender document. Stdout if omitted.
326    /// Only meaningful together with `--output-format`. Parent
327    /// directories are created on demand.
328    #[clap(long, short, value_parser)]
329    output: Option<PathBuf>,
330    /// Filter known offenders listed in this TOML baseline. A baselined
331    /// function whose metric value has not worsened is suppressed; a
332    /// worsened value (or any new offender) still fails. See the
333    /// "Baselines" recipe in the book for the full adoption flow.
334    #[clap(long = "baseline", value_parser, conflicts_with = "write_baseline")]
335    baseline: Option<PathBuf>,
336    /// Walk the tree and write the current offender set to this path
337    /// instead of failing. The resulting file pins today's metric
338    /// values as the baseline; subsequent `--baseline <path>` runs
339    /// ratchet down from there. Conflicts with `--baseline`,
340    /// `--output-format`, and `--output` — the baseline file is the
341    /// output.
342    #[clap(
343        long = "write-baseline",
344        value_parser,
345        conflicts_with_all = ["baseline", "output_format", "output"],
346    )]
347    write_baseline: Option<PathBuf>,
348}
349
350#[derive(Args, Debug)]
351struct ListMetricsArgs {
352    /// What to print: `names` (one per line) or `descriptions`
353    /// (name + one-line summary).
354    #[clap(value_enum, default_value_t = ListMetricsMode::Names)]
355    mode: ListMetricsMode,
356}
357
358/// What `act_on_file` should do per file. Drives the inner dispatch and
359/// replaces the prior cluster of mutually-exclusive bool flags.
360#[derive(Debug)]
361enum Action {
362    Dump,
363    Metrics {
364        format: Option<MetricsFormat>,
365        pretty: bool,
366    },
367    Ops {
368        format: Option<MetricsFormat>,
369        pretty: bool,
370    },
371    StripComments {
372        in_place: bool,
373    },
374    Functions,
375    Find(Arc<[String]>),
376    Count(Arc<[String]>),
377    /// Same walk as `Metrics`, but taps each space tree to stream
378    /// `FunctionSummary` records for the post-walk aggregator.
379    Report,
380    /// Walks source to accumulate preprocessor data (no per-file output).
381    PreprocProduce,
382    /// Walks source and streams threshold violations to a channel.
383    Check,
384}
385
386#[derive(Debug)]
387struct Config {
388    action: Action,
389    output: Option<PathBuf>,
390    language: Option<LANG>,
391    line_start: Option<usize>,
392    line_end: Option<usize>,
393    preproc_lock: Option<Arc<Mutex<PreprocResults>>>,
394    preproc: Option<Arc<PreprocResults>>,
395    count_lock: Option<Arc<Mutex<Count>>>,
396    /// Sender for streaming `FunctionSummary` records when running `report`.
397    /// Wrapped in `Mutex` because `mpsc::Sender` is `Send` but not `Sync`.
398    markdown_tx: Option<Mutex<std::sync::mpsc::Sender<FunctionSummary>>>,
399    /// Path prefix stripped from file paths in the markdown report.
400    strip_prefix: String,
401    /// Pre-resolved thresholds for `Action::Check`. `None` for every
402    /// other action.
403    threshold_set: Option<Arc<ThresholdSet>>,
404    /// Sender for streaming [`Violation`] records when running `check`.
405    /// Wrapped in `Mutex` for the same reason as `markdown_tx`.
406    check_tx: Option<Mutex<std::sync::mpsc::Sender<Violation>>>,
407    /// Counts how many files survived expansion and glob filtering and
408    /// were actually dispatched to `act_on_file`. `Action::Check` reads
409    /// this after the walk to distinguish "all clean" (counter > 0,
410    /// no violations) from "no files matched" (counter == 0), so a
411    /// typo in `--paths` does not silently pass CI.
412    files_dispatched: Option<Arc<AtomicUsize>>,
413    /// Whether to honor or ignore in-source suppression markers when
414    /// emitting threshold violations. Only meaningful for
415    /// `Action::Check`; the field is defaulted to `Honor` for every
416    /// other action so the new code path is invisible to existing
417    /// flows. Flipped to `Ignore` by `--no-suppress`.
418    suppression_policy: SuppressionPolicy,
419    warning: bool,
420    /// When true, files whose head matches a generated-code marker are
421    /// skipped before parsing. Defaults on; flipped off by
422    /// `--no-skip-generated`.
423    skip_generated: bool,
424    /// When true, log a stderr line for each file auto-skipped by the
425    /// generated-code detector. Also enabled by `warning` (which logs
426    /// every skip reason); `report_skipped` is the dedicated flag for
427    /// users who want the generated-skip audit without the rest of the
428    /// warning stream.
429    report_skipped: bool,
430    /// When true, [`get_function_spaces_with_options`] is used in
431    /// place of [`get_function_spaces`] and [`MetricsOptions::exclude_tests`]
432    /// is set, so language modules that override
433    /// `Checker::should_skip_subtree` (currently only Rust) prune
434    /// their test subtrees before metric computation. See
435    /// `GlobalOpts::exclude_tests` for the user-facing description.
436    exclude_tests: bool,
437}
438
439impl Config {
440    /// Build a `Config` for `action`, populating the fields every command
441    /// shares from `globals`. Per-command extras (`output`, `count_lock`,
442    /// `markdown_tx`, `strip_prefix`) are set on the returned value at the
443    /// call site.
444    fn new(action: Action, globals: &GlobalOpts, preproc: Option<Arc<PreprocResults>>) -> Self {
445        let language = resolve_language(globals.language_type.as_deref(), &action);
446        Self {
447            action,
448            output: None,
449            language,
450            line_start: globals.line_start,
451            line_end: globals.line_end,
452            preproc_lock: None,
453            preproc,
454            count_lock: None,
455            markdown_tx: None,
456            strip_prefix: String::new(),
457            threshold_set: None,
458            check_tx: None,
459            files_dispatched: None,
460            suppression_policy: SuppressionPolicy::Honor,
461            warning: globals.warning,
462            skip_generated: !globals.no_skip_generated,
463            report_skipped: globals.report_skipped,
464            exclude_tests: globals.exclude_tests,
465        }
466    }
467
468    /// Project this `Config` onto the library's `MetricsOptions`
469    /// surface. Centralising the projection here means new metric
470    /// options land in one place instead of being duplicated across
471    /// every `act_on_file` arm that drives a metric computation.
472    #[inline]
473    fn metrics_options(&self) -> MetricsOptions {
474        MetricsOptions::default().with_exclude_tests(self.exclude_tests)
475    }
476}
477
478fn mk_globset(elems: Vec<String>) -> Result<GlobSet, String> {
479    if elems.is_empty() {
480        return Ok(GlobSet::empty());
481    }
482
483    let mut globset = GlobSetBuilder::new();
484    for e in &elems {
485        if e.is_empty() {
486            continue;
487        }
488        globset.add(Glob::new(e).map_err(|err| format!("invalid glob pattern {e:?}: {err}"))?);
489    }
490    globset
491        .build()
492        .map_err(|err| format!("failed to build glob set: {err}"))
493}
494
495// `act_on_file` is the per-file dispatch hub for the CLI. Every
496// Action variant that needs metric data calls
497// `get_function_spaces_with_options`, which is now `#[deprecated]`
498// in favour of `analyze(Source { ... }, ...)`. The CLI is the
499// canonical path-based caller (it always has a `&Path` for the file
500// it just read), so the deprecated shim remains the most direct
501// entry point here. Migration tracks issue #254's follow-up; the
502// function-scope `#[allow(deprecated)]` keeps the surrounding code
503// readable without per-call-site attributes.
504#[allow(deprecated)]
505fn act_on_file(path: PathBuf, cfg: &Config) -> std::io::Result<()> {
506    if let Some(counter) = &cfg.files_dispatched {
507        // Count every dispatched file, including those skipped below for
508        // empty content / unrecognized language. The user pointed at
509        // these files and the runner walked them — they count as "the
510        // input was non-empty" for the zero-files-matched check in
511        // `run_check`.
512        counter.fetch_add(1, Ordering::Relaxed);
513    }
514
515    let Some(source) = read_file_with_eol(&path)? else {
516        if cfg.warning {
517            eprintln!("warning: skipping empty file: {}", path.display());
518        }
519        return Ok(());
520    };
521
522    // The generated-code skip runs before language detection so we don't
523    // pay parse cost for files we'll discard. It's a CLI-level filter
524    // (preproc has its own pipeline that genuinely needs every C/C++ file
525    // walked), so leave Action::PreprocProduce alone.
526    if cfg.skip_generated && !matches!(cfg.action, Action::PreprocProduce) && is_generated(&source)
527    {
528        if cfg.report_skipped || cfg.warning {
529            eprintln!("skipped (generated): {}", path.display());
530        }
531        return Ok(());
532    }
533
534    let Some(language) = cfg.language.or_else(|| guess_language(&source, &path).0) else {
535        if cfg.warning {
536            eprintln!(
537                "warning: skipping file with unrecognized language: {}",
538                path.display()
539            );
540        }
541        return Ok(());
542    };
543
544    let pr = cfg.preproc.clone();
545    match &cfg.action {
546        Action::Dump => {
547            let dump_cfg = DumpCfg {
548                line_start: cfg.line_start,
549                line_end: cfg.line_end,
550            };
551            // The CLI pins the library's `all-languages` feature, so
552            // `LanguageDisabled` from `action::<T>` is unreachable; the
553            // `expect` documents that invariant.
554            action::<Dump>(&language, source, &path, pr, dump_cfg).expect(FEATURES_PINNED)
555        }
556        Action::Metrics { format, pretty } => {
557            if let Some(fmt) = format {
558                if let Ok(space) = get_function_spaces_with_options(
559                    &language,
560                    source,
561                    &path,
562                    pr,
563                    cfg.metrics_options(),
564                ) {
565                    match fmt.dispatch() {
566                        MetricsDispatch::Generic(g) => {
567                            g.dump(space, path, cfg.output.as_ref(), *pretty)?;
568                        }
569                        MetricsDispatch::Csv => {
570                            dump_csv(&space, path, cfg.output.as_ref())?;
571                        }
572                    }
573                }
574                Ok(())
575            } else {
576                let metrics_cfg = MetricsCfg::new(path).with_options(cfg.metrics_options());
577                let path = metrics_cfg.path.clone();
578                action::<Metrics>(&language, source, &path, pr, metrics_cfg).expect(FEATURES_PINNED)
579            }
580        }
581        Action::Ops { format, pretty } => {
582            if let Some(fmt) = format {
583                if let Ok(ops) = get_ops(&language, source, &path, pr) {
584                    // CSV is rejected upstream in `run()` for the
585                    // Ops command, so the dispatch here is always
586                    // Generic. The match is still exhaustive to keep
587                    // the compiler honest if that upstream guard ever
588                    // drifts.
589                    match fmt.dispatch() {
590                        MetricsDispatch::Generic(g) => {
591                            g.dump(ops, path, cfg.output.as_ref(), *pretty)?;
592                        }
593                        MetricsDispatch::Csv => {}
594                    }
595                }
596                Ok(())
597            } else {
598                let ops_cfg = OpsCfg { path };
599                let path = ops_cfg.path.clone();
600                action::<OpsCode>(&language, source, &path, pr, ops_cfg).expect(FEATURES_PINNED)
601            }
602        }
603        Action::StripComments { in_place } => {
604            let comment_cfg = CommentRmCfg {
605                in_place: *in_place,
606                path,
607            };
608            let path = comment_cfg.path.clone();
609            // C++ comment removal goes through the dedicated Ccomment grammar
610            // even when the file's primary language is Cpp.
611            let lang = if language == LANG::Cpp {
612                LANG::Ccomment
613            } else {
614                language
615            };
616            action::<CommentRm>(&lang, source, &path, pr, comment_cfg).expect(FEATURES_PINNED)
617        }
618        Action::Functions => {
619            let fn_cfg = FunctionCfg { path: path.clone() };
620            action::<Function>(&language, source, &path, pr, fn_cfg).expect(FEATURES_PINNED)
621        }
622        Action::Find(filters) => {
623            let find_cfg = FindCfg {
624                path: path.clone(),
625                filters: Arc::clone(filters),
626                line_start: cfg.line_start,
627                line_end: cfg.line_end,
628            };
629            action::<Find>(&language, source, &path, pr, find_cfg).expect(FEATURES_PINNED)
630        }
631        Action::Count(filters) => {
632            let stats = cfg
633                .count_lock
634                .clone()
635                .expect("Count handler initializes count_lock before dispatch");
636            let count_cfg = CountCfg {
637                filters: Arc::clone(filters),
638                stats,
639            };
640            action::<Count>(&language, source, &path, pr, count_cfg).expect(FEATURES_PINNED)
641        }
642        Action::Report => {
643            if let Ok(space) = get_function_spaces_with_options(
644                &language,
645                source,
646                &path,
647                pr,
648                cfg.metrics_options(),
649            ) && let Some(ref tx) = cfg.markdown_tx
650                && !matches!(language, LANG::Preproc | LANG::Ccomment)
651            {
652                // Markdown reports are human-readable text and the
653                // downstream `FunctionSummary::file: String` is rendered
654                // into the report body, so non-UTF-8 paths cannot
655                // round-trip through this pipeline regardless of how we
656                // carry them upstream. Skip with a warning. The
657                // threshold pipeline (Action::Check) carries `&Path`
658                // end-to-end because its JSON/SARIF outputs can
659                // preserve raw bytes.
660                let Some(file_str) = path.to_str() else {
661                    if cfg.warning {
662                        eprintln!(
663                            "warning: skipping non-UTF-8 path in report: {}",
664                            path.display()
665                        );
666                    }
667                    return Ok(());
668                };
669                let mut summaries = Vec::new();
670                extract_summaries(
671                    &space,
672                    file_str,
673                    language,
674                    &cfg.strip_prefix,
675                    &mut summaries,
676                );
677                let Ok(sender) = tx.lock() else {
678                    if cfg.warning {
679                        eprintln!(
680                            "warning: skipping {}: report channel lock poisoned",
681                            path.display()
682                        );
683                    }
684                    return Ok(());
685                };
686                for s in summaries {
687                    let _ = sender.send(s);
688                }
689            }
690            Ok(())
691        }
692        Action::Check => {
693            if let Ok(space) = get_function_spaces_with_options(
694                &language,
695                source,
696                &path,
697                pr,
698                cfg.metrics_options(),
699            ) && let (Some(set), Some(tx)) = (cfg.threshold_set.as_ref(), cfg.check_tx.as_ref())
700                && !matches!(language, LANG::Preproc | LANG::Ccomment)
701            {
702                // Pass the path through as `&Path` so non-UTF-8 bytes
703                // are preserved on each emitted `Violation`. Display /
704                // offender serialization decide their own lossy
705                // strategy at the output boundary; the threshold
706                // pipeline itself stays byte-faithful.
707                let mut violations = Vec::new();
708                set.evaluate_with_policy(&path, &space, cfg.suppression_policy, &mut violations);
709                if !violations.is_empty() {
710                    let Ok(sender) = tx.lock() else {
711                        if cfg.warning {
712                            eprintln!(
713                                "warning: skipping {}: check channel lock poisoned",
714                                path.display()
715                            );
716                        }
717                        return Ok(());
718                    };
719                    // Receiver lives until `run_check` drains `rx`, which
720                    // happens only after `run_walk` joins all worker
721                    // threads — so `send` cannot fail here. Use `let _`
722                    // rather than `expect` to avoid panicking the worker
723                    // pool on the (unreachable) drop path.
724                    for v in violations {
725                        let _ = sender.send(v);
726                    }
727                }
728            }
729            Ok(())
730        }
731        Action::PreprocProduce => {
732            if let Some(preproc_lock) = &cfg.preproc_lock
733                && let Some(language) = guess_language(&source, &path).0
734                && language == LANG::Cpp
735            {
736                let mut results = preproc_lock.lock().expect("mutex not poisoned");
737                preprocess(
738                    &PreprocParser::new(source, &path, None),
739                    &path,
740                    &mut results,
741                );
742            }
743            Ok(())
744        }
745    }
746}
747
748fn process_dir_path(all_files: &mut HashMap<String, Vec<PathBuf>>, path: &Path, cfg: &Config) {
749    if !matches!(cfg.action, Action::PreprocProduce) {
750        return;
751    }
752    let Some(fname) = path.file_name().and_then(|n| n.to_str()) else {
753        return;
754    };
755    let file_name = fname.to_string();
756    match all_files.entry(file_name) {
757        hash_map::Entry::Occupied(l) => {
758            l.into_mut().push(path.to_path_buf());
759        }
760        hash_map::Entry::Vacant(p) => {
761            p.insert(vec![path.to_path_buf()]);
762        }
763    }
764}
765
766fn resolve_language(typ: Option<&str>, action: &Action) -> Option<LANG> {
767    // Force `Preproc` for the producer so `act_on_file`'s "skip
768    // unrecognized" guard never fires — every walked file must reach the
769    // dispatch where the producer runs its own Cpp check.
770    if matches!(action, Action::PreprocProduce) {
771        return Some(LANG::Preproc);
772    }
773    match typ.unwrap_or("") {
774        "" => None,
775        "ccomment" => Some(LANG::Ccomment),
776        "preproc" => Some(LANG::Preproc),
777        other => get_from_ext(other),
778    }
779}
780
781fn resolve_num_jobs(requested: Option<usize>) -> usize {
782    requested.map_or_else(
783        || {
784            std::cmp::max(
785                2,
786                available_parallelism()
787                    .unwrap_or_else(|e| {
788                        die(format_args!("could not get available parallelism: {e}"))
789                    })
790                    .get(),
791            ) - 1
792        },
793        |num_jobs| std::cmp::max(2, num_jobs) - 1,
794    )
795}
796
797/// Load existing preproc JSON for the consumer side. The producer side
798/// (`bca preproc`) builds its own `Mutex<PreprocResults>` directly.
799fn load_preproc_data(path: &Path) -> Arc<PreprocResults> {
800    let data = read_file(path).unwrap_or_else(|e| die_io("read preproc data", path, e));
801    let parsed = serde_json::from_slice::<PreprocResults>(&data)
802        .unwrap_or_else(|e| die_io("parse preproc JSON from", path, e));
803    Arc::new(parsed)
804}
805
806/// Read newline-separated paths from `src` (a path on disk or `-`
807/// for stdin). Skips blank/whitespace-only lines; `#` is treated as a
808/// path character, not a comment. Returns `Err(message)` on I/O
809/// failure with the failing line number; the CLI caller translates
810/// this into a `die` exit.
811fn read_paths_from(src: &Path) -> Result<Vec<PathBuf>, String> {
812    read_lines_from(src, "--paths-from", path_pattern_filter)
813}
814
815/// Retention policy for `--paths-from` lines: keep the trimmed
816/// non-blank text as a literal path. `#` is a path character, not
817/// a comment — paired with [`exclude_pattern_filter`] (the inverse
818/// policy) by the unit tests so the two `read_*_from` wrappers
819/// cannot accidentally swap predicates.
820fn path_pattern_filter(trimmed: &str) -> Option<PathBuf> {
821    (!trimmed.is_empty()).then(|| PathBuf::from(trimmed))
822}
823
824/// Read newline-separated `--exclude` glob patterns from `src` (a
825/// path on disk or `-` for stdin). Blank lines and lines whose first
826/// non-whitespace character is `#` (`.gitignore`-style comments) are
827/// skipped; surrounding whitespace and any UTF-8 BOM on retained
828/// lines are trimmed. Returns `Err(message)` on I/O failure with
829/// the path / failing line; the CLI caller translates this into a
830/// `die` exit.
831fn read_exclude_patterns_from(src: &Path) -> Result<Vec<String>, String> {
832    read_lines_from(src, "--exclude-from", exclude_pattern_filter)
833}
834
835/// Retention policy for `--exclude-from` lines: keep the trimmed
836/// non-blank, non-`#`-prefixed text as an exclude pattern; otherwise
837/// skip. Named so the unit tests can exercise the exact policy the
838/// production reader applies instead of mirroring it.
839fn exclude_pattern_filter(trimmed: &str) -> Option<String> {
840    (!trimmed.is_empty() && !trimmed.starts_with('#')).then(|| trimmed.to_owned())
841}
842
843/// Open `src` (a path on disk or `-` for stdin), buffer it, and
844/// hand each trimmed non-comment line to `map`. Items the closure
845/// returns `Some` for are collected; `None` skips the line. `flag`
846/// is the user-facing CLI flag name (e.g. `--paths-from`), included
847/// in error messages so users can tell which input failed.
848///
849/// Returns `Err(message)` on file-open failure or per-line I/O
850/// failure rather than calling `die` itself, so unit tests and
851/// future non-CLI callers can recover. The CLI wrappers above
852/// translate the `Err` into a `die` exit at their layer.
853fn read_lines_from<T>(
854    src: &Path,
855    flag: &str,
856    map: impl Fn(&str) -> Option<T>,
857) -> Result<Vec<T>, String> {
858    if src.as_os_str() == "-" {
859        let label = format!("{flag} -");
860        collect_lines(std::io::stdin().lock(), &label, map)
861    } else {
862        let label = format!("{flag} {}", src.display());
863        let f = std::fs::File::open(src).map_err(|e| format!("{label}: {e}"))?;
864        collect_lines(std::io::BufReader::new(f), &label, map)
865    }
866}
867
868/// Drain `reader` line-by-line, trimming surrounding whitespace and
869/// any UTF-8 BOMs (leading or trailing), then feeding each result
870/// to `map`. Returns `Err(message)` on the first I/O failure, with
871/// `label` and the failing line number embedded so the caller can
872/// surface which input failed without further context.
873///
874/// BOM stripping is per-line rather than first-line-only: most
875/// lines won't carry a BOM, and `\u{feff}` is not whitespace per
876/// `char::is_whitespace`, so a BOM-prefixed pattern (e.g. an editor
877/// that saved `.bcaignore` as UTF-8-with-BOM) would otherwise
878/// become a literal glob starting with U+FEFF that matches no real
879/// path — silently disabling the first exclude. Trimming treats
880/// whitespace and BOM as a single character class to handle
881/// `\u{feff}  pattern` and `pattern\u{feff}` correctly with one
882/// pass — the previous order-sensitive `trim().trim_start_matches`
883/// chain corrupted those edge cases.
884fn collect_lines<R, T>(
885    reader: R,
886    label: &str,
887    map: impl Fn(&str) -> Option<T>,
888) -> Result<Vec<T>, String>
889where
890    R: std::io::BufRead,
891{
892    reader
893        .lines()
894        .enumerate()
895        .filter_map(|(i, r)| match r {
896            Ok(line) => {
897                map(line.trim_matches(|c: char| c.is_whitespace() || c == '\u{feff}')).map(Ok)
898            }
899            Err(e) => Some(Err(format!("{label}: read error on line {}: {e}", i + 1))),
900        })
901        .collect()
902}
903
904/// Expand seed paths for the walk: union `--paths` with
905/// `--paths-from`, then for each seed:
906///   - file → keep as-is (explicit override of any ignore rules);
907///   - directory → expand via `ignore::WalkBuilder`, gitignore-aware
908///     unless `no_ignore` is set.
909///
910/// Returns a flat `Vec<PathBuf>` of files. Include/exclude globs are
911/// applied later by `explore()`, matching today's semantics.
912fn expand_seed_paths(
913    paths: Vec<PathBuf>,
914    paths_from: Option<PathBuf>,
915    no_ignore: bool,
916) -> Vec<PathBuf> {
917    use ignore::WalkBuilder;
918    let mut seeds = paths;
919    if let Some(src) = paths_from {
920        seeds.extend(read_paths_from(&src).unwrap_or_else(|e| die(e)));
921    }
922    let mut out: Vec<PathBuf> = Vec::new();
923    for seed in seeds {
924        if !seed.exists() {
925            // Match today's `explore()` behavior: warn, do not die.
926            eprintln!("Warning: File doesn't exist: {}", seed.display());
927            continue;
928        }
929        if seed.is_file() {
930            out.push(seed);
931            continue;
932        }
933        let mut wb = WalkBuilder::new(&seed);
934        wb.hidden(true)
935            .follow_links(false)
936            .require_git(false)
937            .git_ignore(!no_ignore)
938            .git_exclude(!no_ignore)
939            .git_global(!no_ignore)
940            .ignore(!no_ignore)
941            .parents(!no_ignore);
942        for entry in wb.build() {
943            let entry = entry
944                .unwrap_or_else(|e| die(format_args!("walk error in {}: {e}", seed.display())));
945            if entry.file_type().is_some_and(|t| t.is_file()) {
946                out.push(entry.into_path());
947            }
948        }
949    }
950    out
951}
952
953fn run_walk(globals: GlobalOpts, cfg: Config) -> HashMap<String, Vec<PathBuf>> {
954    let include = mk_globset(globals.include).unwrap_or_else(|e| die(e));
955    let mut exclude_patterns = globals.exclude;
956    if let Some(src) = globals.exclude_from {
957        exclude_patterns.extend(read_exclude_patterns_from(&src).unwrap_or_else(|e| die(e)));
958    }
959    let exclude = mk_globset(exclude_patterns).unwrap_or_else(|e| die(e));
960    let num_jobs = resolve_num_jobs(globals.num_jobs);
961    let paths = expand_seed_paths(globals.paths, globals.paths_from, globals.no_ignore);
962    let files_data = FilesData {
963        include,
964        exclude,
965        paths,
966    };
967    ConcurrentRunner::new(num_jobs, act_on_file)
968        .set_proc_dir_paths(process_dir_path)
969        .run(cfg, files_data)
970        .unwrap_or_else(|e| die(format_args!("{e:?}")))
971}
972
973/// Load a `[thresholds]` table from `path`, returning the parsed map.
974/// On any I/O or parse error the process dies with exit code 1, keeping
975/// exit 2 reserved for the "thresholds exceeded" case.
976fn load_threshold_config(path: &Path) -> BTreeMap<String, f64> {
977    let bytes = read_file(path).unwrap_or_else(|e| die_io("read threshold config", path, e));
978    let text = std::str::from_utf8(&bytes)
979        .unwrap_or_else(|e| die_io("decode UTF-8 from threshold config", path, e));
980    let cfg: ThresholdConfig =
981        toml::from_str(text).unwrap_or_else(|e| die_io("parse threshold config", path, e));
982    cfg.thresholds
983}
984
985/// Load a baseline file. Same error contract as `load_threshold_config`:
986/// any I/O, UTF-8, or schema error dies with exit code 1.
987fn load_baseline(path: &Path) -> Baseline {
988    let bytes = read_file(path).unwrap_or_else(|e| die_io("read baseline", path, e));
989    let text = std::str::from_utf8(&bytes)
990        .unwrap_or_else(|e| die_io("decode UTF-8 from baseline", path, e));
991    Baseline::from_str(text).unwrap_or_else(|e| die_io("parse baseline", path, e))
992}
993
994/// Write `bytes` to `path` atomically: create the parent directory if
995/// needed, write to `<path>.bca-tmp`, then rename. Survives a `kill -9`
996/// mid-write — the consumer sees either the previous file or the
997/// fully-written new file, never a half-written one.
998///
999/// The suffix is *appended* to the full path rather than replacing the
1000/// extension, so a user-supplied path like `foo.tmp` does not collide
1001/// with the temporary file. On rename failure (e.g. cross-filesystem
1002/// `EXDEV`, permission denied) the temporary file is removed best-effort
1003/// before propagating the original error.
1004fn write_atomic(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
1005    if let Some(parent) = path.parent()
1006        && !parent.as_os_str().is_empty()
1007    {
1008        std::fs::create_dir_all(parent)?;
1009    }
1010    let mut tmp = path.as_os_str().to_os_string();
1011    tmp.push(".bca-tmp");
1012    let tmp = PathBuf::from(tmp);
1013    std::fs::write(&tmp, bytes)?;
1014    std::fs::rename(&tmp, path).inspect_err(|_| {
1015        // Cleanup is best-effort; if the rename failed the user already
1016        // has an error to report, and a leftover .bca-tmp removal that
1017        // fails would only obscure it.
1018        let _ = std::fs::remove_file(&tmp);
1019    })
1020}
1021
1022/// Drive the `check` subcommand: build the threshold set, walk the
1023/// source tree, drain violations, and exit 0 / 2 per the contract.
1024fn run_check(globals: GlobalOpts, args: CheckArgs, preproc: Option<Arc<PreprocResults>>) {
1025    // Validate --output / --output-format pairing before the walk so
1026    // a misconfigured invocation fails fast instead of after a full
1027    // parse. `--output` without `--output-format` is silently ignored
1028    // — only the human stderr stream is emitted, which is the
1029    // default contract — to keep the simplest invocation
1030    // (`bca check --threshold ... --no-fail > /dev/null`) frictionless.
1031    if let Some(fmt) = args.output_format
1032        && let Some(ref out) = args.output
1033        && out.exists()
1034        && out.is_dir()
1035    {
1036        die(format_args!(
1037            "--output must be a file path for `check --output-format {}`",
1038            fmt.name()
1039        ));
1040    }
1041
1042    let mut merged: BTreeMap<String, f64> = args
1043        .config
1044        .as_deref()
1045        .map(load_threshold_config)
1046        .unwrap_or_default();
1047    // CLI flags override config values for the same metric name.
1048    for (name, limit) in args.thresholds {
1049        merged.insert(name, limit);
1050    }
1051    let set = ThresholdSet::build(&merged).unwrap_or_else(|e| die(e));
1052    if set.is_empty() {
1053        die("no thresholds configured; pass --threshold or --config");
1054    }
1055    let set = Arc::new(set);
1056
1057    let (tx, rx) = std::sync::mpsc::channel();
1058    let files_dispatched = Arc::new(AtomicUsize::new(0));
1059    let cfg = Config {
1060        threshold_set: Some(Arc::clone(&set)),
1061        check_tx: Some(Mutex::new(tx)),
1062        files_dispatched: Some(Arc::clone(&files_dispatched)),
1063        suppression_policy: SuppressionPolicy::from_no_suppress(args.no_suppress),
1064        ..Config::new(Action::Check, &globals, preproc)
1065    };
1066    run_walk(globals, cfg);
1067
1068    if files_dispatched.load(Ordering::Relaxed) == 0 {
1069        // No files survived `--paths` expansion + `--include`/`--exclude`
1070        // filtering. Treat this as a tool error (exit 1), not a clean
1071        // pass (exit 0): a typo in `--paths` would otherwise silently
1072        // green-light CI.
1073        die("bca check: no input files matched; check --paths, --include, --exclude");
1074    }
1075
1076    // Workers have all joined by the time `run_walk` returns, so the
1077    // sender side is dropped and `rx.into_iter()` terminates cleanly.
1078    let mut violations: Vec<Violation> = rx.into_iter().collect();
1079    // Stable, deterministic stderr output: by path, then start line, then
1080    // metric name. Different runs over the same tree produce identical
1081    // output, which CI diff tooling relies on.
1082    violations.sort_by(|a, b| {
1083        a.path
1084            .cmp(&b.path)
1085            .then(a.start_line.cmp(&b.start_line))
1086            .then(a.metric.cmp(b.metric))
1087    });
1088
1089    if let Some(path) = args.write_baseline {
1090        let file = baseline::from_violations(violations);
1091        let entry_count = file.entries.len();
1092        let text = baseline::render(&file)
1093            .unwrap_or_else(|e| die(format_args!("serialize baseline: {e}")));
1094        write_atomic(&path, text.as_bytes()).unwrap_or_else(|e| die_io("write baseline", &path, e));
1095        eprintln!(
1096            "bca: wrote {entry_count} baseline entries to {}",
1097            path.display()
1098        );
1099        return;
1100    }
1101
1102    let violations: Vec<Violation> = if let Some(path) = args.baseline.as_deref() {
1103        let baseline = load_baseline(path);
1104        let before = violations.len();
1105        let kept: Vec<Violation> = violations
1106            .into_iter()
1107            .filter(|v| !baseline.covers(v))
1108            .collect();
1109        let filtered = before - kept.len();
1110        if filtered > 0 {
1111            eprintln!("bca: filtered {filtered} violations via baseline");
1112        }
1113        kept
1114    } else {
1115        violations
1116    };
1117
1118    // BrokenPipe on stderr (e.g. when piped to `head`) is the only
1119    // realistic write failure here; swallow it rather than die so the
1120    // exit-code contract is honored.
1121    let mut stderr = std::io::stderr().lock();
1122    for v in &violations {
1123        let _ = writeln!(stderr, "{v}");
1124    }
1125
1126    // Emit the aggregated CI/IDE document if requested. Empty input
1127    // produces a well-formed but offender-free document, which CI
1128    // consumers can ingest unchanged on clean runs. The exit-code
1129    // contract below is unaffected by this branch.
1130    let any_violations = !violations.is_empty();
1131    if let Some(fmt) = args.output_format {
1132        let offenders: Vec<_> = violations.into_iter().map(violation_to_offender).collect();
1133        fmt.dump(&offenders, args.output.as_deref())
1134            .unwrap_or_else(|e| die(format_args!("failed to write {}: {e}", fmt.name())));
1135    }
1136
1137    if any_violations && !args.no_fail {
1138        process::exit(2);
1139    }
1140}
1141
1142/// Parse `std::env::args_os()` and execute the selected `bca`
1143/// subcommand. Intended to be called from the `bca` binary's `main`,
1144/// which is a one-liner over this function.
1145///
1146/// # Termination contract
1147///
1148/// This function **may terminate the calling process** rather than
1149/// return. It is not a re-entrant library entry point:
1150///
1151/// - clap argument-parsing failures bubble up through
1152///   [`clap::Error::exit`] (exit 0 on `--help` / `--version`, exit 2
1153///   on usage errors).
1154/// - User-input errors (invalid threshold spec, unreadable preproc
1155///   data, missing `--output` parent directory, walk errors, mutually
1156///   exclusive output-format combinations, broken-pipe writes, etc.)
1157///   call `process::exit(1)` via internal `die` / `die_io` helpers.
1158/// - The `check` subcommand calls `process::exit(2)` when any
1159///   threshold is exceeded, reserving exit 1 for tool errors so CI can
1160///   distinguish "metric regression" from "tool crashed".
1161///
1162/// Hosts that call [`run`] will be torn down on any of those paths
1163/// without unwinding. If you need to drive the same functionality from
1164/// inside another process, use the [`big_code_analysis`] library crate
1165/// directly instead of going through this entry point.
1166pub fn run() {
1167    let cli = match Cli::try_parse() {
1168        Ok(cli) => cli,
1169        Err(err) => {
1170            if matches!(
1171                err.kind(),
1172                clap::error::ErrorKind::UnknownArgument
1173                    | clap::error::ErrorKind::InvalidSubcommand
1174                    | clap::error::ErrorKind::InvalidValue
1175                    | clap::error::ErrorKind::MissingSubcommand
1176                    | clap::error::ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand
1177            ) && let Some(hint) = legacy_hint(std::env::args_os())
1178            {
1179                eprintln!("{hint}");
1180            }
1181            err.exit();
1182        }
1183    };
1184
1185    let preproc = cli
1186        .globals
1187        .preproc_data
1188        .as_ref()
1189        .map(|p| load_preproc_data(p));
1190
1191    match cli.command {
1192        Command::ListMetrics(args) => {
1193            let mut buf = Vec::new();
1194            write_metrics(&mut buf, args.mode).expect("writing to Vec<u8> is infallible");
1195            write_stdout_or_die(&buf);
1196        }
1197        Command::Dump => {
1198            let cfg = Config::new(Action::Dump, &cli.globals, preproc);
1199            run_walk(cli.globals, cfg);
1200        }
1201        Command::Functions => {
1202            let cfg = Config::new(Action::Functions, &cli.globals, preproc);
1203            run_walk(cli.globals, cfg);
1204        }
1205        Command::Metrics(args) => {
1206            if matches!(args.output_format, Some(MetricsFormat::Cbor)) && args.output.is_none() {
1207                die(CBOR_STDOUT_ERROR);
1208            }
1209            if args.output_format.is_some()
1210                && let Some(ref out) = args.output
1211                && out.exists()
1212                && !out.is_dir()
1213            {
1214                die("--output must be a directory for `metrics`");
1215            }
1216            let action = Action::Metrics {
1217                format: args.output_format,
1218                pretty: args.pretty,
1219            };
1220            let cfg = Config {
1221                output: args.output,
1222                ..Config::new(action, &cli.globals, preproc)
1223            };
1224            run_walk(cli.globals, cfg);
1225        }
1226        Command::Ops(args) => {
1227            if matches!(args.output_format, Some(MetricsFormat::Cbor)) && args.output.is_none() {
1228                die(CBOR_STDOUT_ERROR);
1229            }
1230            if let Some(MetricsDispatch::Csv) = args.output_format.map(MetricsFormat::dispatch) {
1231                die(
1232                    "CSV is not supported by `ops` because its column schema is metric-shaped; use `bca metrics --output-format <fmt>`",
1233                );
1234            }
1235            if args.output_format.is_some()
1236                && let Some(ref out) = args.output
1237                && out.exists()
1238                && !out.is_dir()
1239            {
1240                die("--output must be a directory for `ops`");
1241            }
1242            let action = Action::Ops {
1243                format: args.output_format,
1244                pretty: args.pretty,
1245            };
1246            let cfg = Config {
1247                output: args.output,
1248                ..Config::new(action, &cli.globals, preproc)
1249            };
1250            run_walk(cli.globals, cfg);
1251        }
1252        Command::Report(args) => {
1253            if let Some(ref output) = args.output {
1254                if output.exists() && output.is_dir() {
1255                    die("--output must be a file path for `report`");
1256                }
1257                if let Some(parent) = output.parent()
1258                    && !parent.as_os_str().is_empty()
1259                    && !parent.exists()
1260                {
1261                    die(format_args!(
1262                        "parent directory of --output does not exist: {}",
1263                        parent.display()
1264                    ));
1265                }
1266            }
1267            let (tx, rx) = std::sync::mpsc::channel();
1268            let cfg = Config {
1269                markdown_tx: Some(Mutex::new(tx)),
1270                strip_prefix: args.strip_prefix,
1271                ..Config::new(Action::Report, &cli.globals, preproc)
1272            };
1273            run_walk(cli.globals, cfg);
1274
1275            // ConcurrentRunner::run() consumed Config (and thus the Sender).
1276            // All worker threads have joined, so `rx.into_iter()` terminates.
1277            let summaries: Vec<FunctionSummary> = rx.into_iter().collect();
1278            let report = match args.format {
1279                ReportFormat::Markdown => generate_report(&summaries, args.top as usize),
1280                ReportFormat::Html => generate_html_report(&summaries, args.top as usize),
1281            };
1282            if let Some(ref output_path) = args.output {
1283                std::fs::write(output_path, &report)
1284                    .unwrap_or_else(|e| die_io("write report to", output_path, e));
1285            } else {
1286                write_stdout_or_die(report.as_bytes());
1287            }
1288        }
1289        Command::Find(args) => {
1290            let cfg = Config::new(Action::Find(args.nodes.into()), &cli.globals, preproc);
1291            run_walk(cli.globals, cfg);
1292        }
1293        Command::Count(args) => {
1294            let count_lock = Arc::new(Mutex::new(Count::default()));
1295            let cfg = Config {
1296                count_lock: Some(count_lock.clone()),
1297                ..Config::new(Action::Count(args.nodes.into()), &cli.globals, preproc)
1298            };
1299            run_walk(cli.globals, cfg);
1300
1301            let count = Arc::try_unwrap(count_lock)
1302                .expect("all worker threads have joined; Arc refcount is 1")
1303                .into_inner()
1304                .expect("mutex not poisoned");
1305            println!("{count}");
1306        }
1307        Command::StripComments(args) => {
1308            let action = Action::StripComments {
1309                in_place: args.in_place,
1310            };
1311            let cfg = Config::new(action, &cli.globals, preproc);
1312            run_walk(cli.globals, cfg);
1313        }
1314        Command::Check(args) => {
1315            run_check(cli.globals, args, preproc);
1316        }
1317        Command::Preproc(args) => {
1318            let preproc_lock = Arc::new(Mutex::new(PreprocResults::default()));
1319            let output = args.output;
1320            let cfg = Config {
1321                preproc_lock: Some(preproc_lock.clone()),
1322                ..Config::new(Action::PreprocProduce, &cli.globals, None)
1323            };
1324            let all_files = run_walk(cli.globals, cfg);
1325
1326            let mut data = Arc::try_unwrap(preproc_lock)
1327                .expect("all worker threads have joined; Arc refcount is 1")
1328                .into_inner()
1329                .expect("mutex not poisoned");
1330            fix_includes(&mut data.files, &all_files);
1331
1332            let serialized = serde_json::to_string(&data)
1333                .unwrap_or_else(|e| die(format_args!("failed to serialize preproc data: {e}")));
1334            if let Some(output_path) = output {
1335                write_file(&output_path, serialized.as_bytes())
1336                    .unwrap_or_else(|e| die_io("write preproc output to", &output_path, e));
1337            } else {
1338                println!("{serialized}");
1339            }
1340        }
1341    }
1342}
1343
1344/// Names of every subcommand on the new CLI. Kept in sync with the
1345/// `Command` enum by `tests::subcommands_match_command_enum`, which
1346/// fails if the two ever drift.
1347const SUBCOMMANDS: &[&str] = &[
1348    "metrics",
1349    "ops",
1350    "report",
1351    "dump",
1352    "find",
1353    "count",
1354    "functions",
1355    "strip-comments",
1356    "preproc",
1357    "list-metrics",
1358    "check",
1359];
1360
1361/// Decode the value of `-O <v>` / `--output-format <v>` /
1362/// `--output-format=<v>` / `-O<v>` from a flat argv slice. Returns
1363/// the first match (callers pre-filter the slice to the legacy
1364/// invocation's tokens, so a single occurrence is the realistic
1365/// case).
1366fn parse_output_format_value(args: &[String]) -> Option<&str> {
1367    args.iter().enumerate().find_map(|(i, a)| {
1368        let s = a.as_str();
1369        if s == "-O" || s == "--output-format" {
1370            args.get(i + 1).map(String::as_str)
1371        } else if let Some(rest) = s.strip_prefix("--output-format=") {
1372            Some(rest)
1373        } else {
1374            s.strip_prefix("-O").filter(|r| !r.is_empty())
1375        }
1376    })
1377}
1378
1379/// Scan `args` for `-O <offender>` / `--output-format <offender>` /
1380/// `--output-format=<offender>` against the four moved formats (any
1381/// variant of [`AggregatedFormat`]) and build a migration hint
1382/// pointing at `bca check`. Returns `None` when no offender format
1383/// is found, so the caller can fall through to clap's own error.
1384fn offender_format_migration_hint(args: &[String]) -> Option<String> {
1385    let fmt =
1386        parse_output_format_value(args).filter(|f| AggregatedFormat::from_str(f, true).is_ok())?;
1387    Some(format!(
1388        "note: -O {fmt} moved to `bca check` in #235; offender formats are no longer accepted on `bca metrics` / `bca ops`.\n  bca metrics -O {fmt} ...  ->  bca check --threshold <metric>=<limit> --output-format {fmt} [--output FILE]\n  Run `bca check --help` for the threshold and output-format flags.\n"
1389    ))
1390}
1391
1392/// If `argv` looks like an invocation of the pre-restructure CLI, return a
1393/// hint pointing the user at the new equivalent. Called only when clap
1394/// rejects the input, so the goal is to make the failure actionable.
1395///
1396/// The hint is best-effort and conservative: it triggers only on tokens
1397/// that are unambiguously legacy (action flags removed in the rewrite, or
1398/// `-O markdown` whose value no longer exists on `metrics`).
1399fn legacy_hint(argv: impl IntoIterator<Item = OsString>) -> Option<String> {
1400    let args: Vec<String> = argv
1401        .into_iter()
1402        .skip(1) // program name
1403        .filter_map(|s| s.into_string().ok())
1404        .collect();
1405    if args.is_empty() {
1406        return None;
1407    }
1408
1409    // If the user invoked a known new-CLI subcommand, they're not on
1410    // the legacy CLI; stay quiet so we don't second-guess legitimate
1411    // args that happen to look like old flags (e.g. `find --dump`
1412    // where the user intended `--dump` as a positional node-type
1413    // value). The one exception is `bca metrics|ops --output-format
1414    // <offender>` — the four offender formats moved to `bca check`
1415    // (issue #235) and the user still needs a one-line pointer at
1416    // the new home.
1417    if let Some(sub) = args.iter().find(|a| SUBCOMMANDS.contains(&a.as_str())) {
1418        if matches!(sub.as_str(), "metrics" | "ops")
1419            && let Some(hint) = offender_format_migration_hint(&args)
1420        {
1421            return Some(hint);
1422        }
1423        return None;
1424    }
1425
1426    // Action flags removed by the rewrite. Each one is unambiguously legacy.
1427    let action_map: &[(&str, &str)] = &[
1428        ("--metrics", "bca metrics"),
1429        ("-m", "bca metrics"),
1430        ("--ops", "bca ops"),
1431        ("--dump", "bca dump"),
1432        ("-d", "bca dump"),
1433        ("--comments", "bca strip-comments [--in-place]"),
1434        ("--function", "bca functions"),
1435        ("-F", "bca functions"),
1436        ("--find", "bca find <NODE> [<NODE>...]"),
1437        ("-f", "bca find <NODE> [<NODE>...]"),
1438        ("--count", "bca count <NODE> [<NODE>...]"),
1439        ("-C", "bca count <NODE> [<NODE>...]"),
1440        ("--list-metrics", "bca list-metrics [names|descriptions]"),
1441        (
1442            "--preproc",
1443            "bca preproc -o OUT.json  (or --preproc-data on consumers)",
1444        ),
1445    ];
1446
1447    let mut lines: Vec<String> = Vec::new();
1448    let mut saw_legacy_action = false;
1449
1450    for arg in &args {
1451        let head = arg.split('=').next().unwrap_or(arg);
1452        if let Some((_, replacement)) = action_map.iter().find(|(old, _)| *old == head) {
1453            saw_legacy_action = true;
1454            lines.push(format!("  {head}  ->  {replacement}"));
1455        }
1456    }
1457
1458    // -O markdown / --output-format markdown is the canonical legacy form
1459    // for the aggregated report. `markdown` is no longer a valid metrics
1460    // format value, so seeing it here is unambiguous.
1461    let format_value = parse_output_format_value(&args);
1462    if format_value == Some("markdown") {
1463        saw_legacy_action = true;
1464        lines.push(String::from(
1465            "  -O markdown  ->  bca report markdown|html [--top N] [--strip-prefix P]",
1466        ));
1467    } else if let Some(fmt) = format_value
1468        && saw_legacy_action
1469    {
1470        // Only suggest a metrics-format mapping when we already confirmed
1471        // this is a legacy invocation; otherwise `-O json` survives in the
1472        // new CLI and we shouldn't second-guess it.
1473        lines.push(format!("  -O {fmt}  ->  bca metrics -O {fmt}"));
1474    }
1475
1476    if !saw_legacy_action {
1477        return None;
1478    }
1479
1480    let mut hint = String::from(
1481        "note: the CLI was restructured into subcommands. See migration.md for the full mapping.\n",
1482    );
1483    for line in &lines {
1484        hint.push_str(line);
1485        hint.push('\n');
1486    }
1487    hint.push_str("  Run `bca --help` for the new command list.\n");
1488    Some(hint)
1489}
1490
1491#[cfg(test)]
1492#[allow(
1493    clippy::float_cmp,
1494    clippy::cast_precision_loss,
1495    clippy::cast_possible_truncation,
1496    clippy::cast_sign_loss,
1497    clippy::similar_names,
1498    clippy::doc_markdown,
1499    clippy::needless_raw_string_hashes,
1500    clippy::too_many_lines
1501)]
1502mod tests {
1503    use super::*;
1504
1505    fn test_config(action: Action) -> Config {
1506        Config {
1507            action,
1508            output: None,
1509            language: None,
1510            line_start: None,
1511            line_end: None,
1512            preproc_lock: None,
1513            preproc: None,
1514            count_lock: None,
1515            markdown_tx: None,
1516            strip_prefix: String::new(),
1517            threshold_set: None,
1518            check_tx: None,
1519            files_dispatched: None,
1520            suppression_policy: SuppressionPolicy::Honor,
1521            warning: false,
1522            skip_generated: true,
1523            report_skipped: false,
1524            exclude_tests: false,
1525        }
1526    }
1527
1528    #[test]
1529    fn process_dir_path_noop_outside_preproc() {
1530        let cfg = test_config(Action::Dump);
1531        let mut all_files = HashMap::new();
1532        process_dir_path(&mut all_files, Path::new("/some/file.cpp"), &cfg);
1533        assert!(all_files.is_empty());
1534    }
1535
1536    #[test]
1537    fn process_dir_path_inserts_valid_utf8_filename() {
1538        let cfg = test_config(Action::PreprocProduce);
1539        let mut all_files = HashMap::new();
1540        process_dir_path(&mut all_files, Path::new("/some/dir/foo.cpp"), &cfg);
1541        assert_eq!(all_files.len(), 1);
1542        assert_eq!(
1543            all_files["foo.cpp"],
1544            vec![PathBuf::from("/some/dir/foo.cpp")]
1545        );
1546    }
1547
1548    #[test]
1549    fn process_dir_path_groups_duplicate_filenames() {
1550        let cfg = test_config(Action::PreprocProduce);
1551        let mut all_files = HashMap::new();
1552        process_dir_path(&mut all_files, Path::new("/a/foo.cpp"), &cfg);
1553        process_dir_path(&mut all_files, Path::new("/b/foo.cpp"), &cfg);
1554        assert_eq!(all_files.len(), 1);
1555        assert_eq!(
1556            all_files["foo.cpp"],
1557            vec![PathBuf::from("/a/foo.cpp"), PathBuf::from("/b/foo.cpp")]
1558        );
1559    }
1560
1561    #[cfg(unix)]
1562    #[test]
1563    fn process_dir_path_skips_non_utf8_filename() {
1564        use std::ffi::OsStr;
1565        use std::os::unix::ffi::OsStrExt;
1566
1567        let cfg = test_config(Action::PreprocProduce);
1568        let mut all_files = HashMap::new();
1569        let bad_name = OsStr::from_bytes(b"\xff\xfe");
1570        let path = PathBuf::from("/some/dir").join(bad_name);
1571        process_dir_path(&mut all_files, &path, &cfg);
1572        assert!(all_files.is_empty());
1573    }
1574
1575    // CLI parsing tests. The shape is now subcommand-driven, so these
1576    // exercise the shape of the top-level parser, not the legacy flag
1577    // mutual-exclusion rules.
1578
1579    fn parse(args: &[&str]) -> clap::error::Result<Cli> {
1580        Cli::try_parse_from(std::iter::once(&"cli").chain(args.iter()))
1581    }
1582
1583    #[test]
1584    fn no_subcommand_prints_help() {
1585        // arg_required_else_help: no args -> clap prints help and exits.
1586        // We just check parsing fails (either DisplayHelp or MissingSubcommand).
1587        assert!(parse(&[]).is_err());
1588    }
1589
1590    #[test]
1591    fn metrics_alone_parses() {
1592        assert!(parse(&["metrics"]).is_ok());
1593    }
1594
1595    #[test]
1596    fn metrics_with_format_parses() {
1597        assert!(parse(&["metrics", "-O", "json"]).is_ok());
1598    }
1599
1600    // Offender formats (Checkstyle, SARIF, clang-warning,
1601    // msvc-warning) moved from `bca metrics` to
1602    // `bca check --output-format` in issue #235. `MetricsFormat` no
1603    // longer enumerates them, so clap rejects them at parse time on
1604    // `metrics` and `ops`.
1605    #[test]
1606    fn metrics_rejects_checkstyle_format() {
1607        assert!(parse(&["metrics", "-O", "checkstyle"]).is_err());
1608    }
1609
1610    #[test]
1611    fn metrics_rejects_sarif_format() {
1612        assert!(parse(&["metrics", "-O", "sarif"]).is_err());
1613    }
1614
1615    #[test]
1616    fn metrics_rejects_clang_warning_format() {
1617        assert!(parse(&["metrics", "-O", "clang-warning"]).is_err());
1618    }
1619
1620    #[test]
1621    fn metrics_rejects_msvc_warning_format() {
1622        assert!(parse(&["metrics", "-O", "msvc-warning"]).is_err());
1623    }
1624
1625    #[test]
1626    fn check_accepts_sarif_output_format() {
1627        assert!(parse(&["check", "--threshold", "cyclomatic=10", "-O", "sarif"]).is_ok());
1628    }
1629
1630    #[test]
1631    fn check_accepts_checkstyle_output_format() {
1632        assert!(
1633            parse(&[
1634                "check",
1635                "--threshold",
1636                "cyclomatic=10",
1637                "--output-format",
1638                "checkstyle",
1639            ])
1640            .is_ok()
1641        );
1642    }
1643
1644    #[test]
1645    fn check_rejects_per_file_format_as_output_format() {
1646        // Per-file formats (json, csv, ...) live on `bca metrics`;
1647        // `bca check` only accepts the four offender formats.
1648        assert!(
1649            parse(&[
1650                "check",
1651                "--threshold",
1652                "cyclomatic=10",
1653                "--output-format",
1654                "json",
1655            ])
1656            .is_err()
1657        );
1658    }
1659
1660    // Note: runtime rejection of `ops -O csv` is covered by
1661    // `ops_rejects_csv_format_at_runtime` in
1662    // tests/action_enforcement.rs, which spawns the binary so the
1663    // dispatcher's die() can be observed.
1664
1665    #[test]
1666    fn metrics_rejects_markdown_format() {
1667        // ReportFormat::Markdown is not in MetricsFormat by construction.
1668        assert!(parse(&["metrics", "-O", "markdown"]).is_err());
1669    }
1670
1671    #[test]
1672    fn metrics_rejects_top_flag() {
1673        // --top lives only on `report`.
1674        assert!(parse(&["metrics", "--top", "5"]).is_err());
1675    }
1676
1677    #[test]
1678    fn metrics_rejects_strip_prefix_flag() {
1679        assert!(parse(&["metrics", "--strip-prefix", "/x"]).is_err());
1680    }
1681
1682    #[test]
1683    fn report_markdown_parses() {
1684        assert!(parse(&["report", "markdown"]).is_ok());
1685    }
1686
1687    #[test]
1688    fn report_html_parses() {
1689        // Inspect the parsed variant so a future alias / value-rename
1690        // that maps `html` to `Markdown` cannot pass this test.
1691        let cli = parse(&["report", "html"]).expect("`report html` parses");
1692        match cli.command {
1693            Command::Report(args) => assert_eq!(args.format, ReportFormat::Html),
1694            other => panic!("expected Command::Report, got {other:?}"),
1695        }
1696    }
1697
1698    #[test]
1699    fn report_requires_format() {
1700        assert!(parse(&["report"]).is_err());
1701    }
1702
1703    #[test]
1704    fn report_with_top_and_strip_prefix() {
1705        assert!(parse(&["report", "markdown", "--top", "10", "--strip-prefix", "/x/"]).is_ok());
1706    }
1707
1708    #[test]
1709    fn report_html_with_top_and_strip_prefix() {
1710        let cli = parse(&["report", "html", "--top", "10", "--strip-prefix", "/x/"])
1711            .expect("flags parse");
1712        match cli.command {
1713            Command::Report(args) => {
1714                assert_eq!(args.format, ReportFormat::Html);
1715                assert_eq!(args.top, 10);
1716                assert_eq!(args.strip_prefix, "/x/");
1717            }
1718            other => panic!("expected Command::Report, got {other:?}"),
1719        }
1720    }
1721
1722    #[test]
1723    fn report_top_zero_rejected() {
1724        assert!(parse(&["report", "markdown", "--top", "0"]).is_err());
1725    }
1726
1727    #[test]
1728    fn report_html_top_zero_rejected() {
1729        assert!(parse(&["report", "html", "--top", "0"]).is_err());
1730    }
1731
1732    #[test]
1733    fn ops_parses() {
1734        assert!(parse(&["ops", "-O", "json"]).is_ok());
1735    }
1736
1737    #[test]
1738    fn dump_parses() {
1739        assert!(parse(&["dump"]).is_ok());
1740    }
1741
1742    #[test]
1743    fn find_requires_a_node() {
1744        assert!(parse(&["find"]).is_err());
1745        assert!(parse(&["find", "call_expression"]).is_ok());
1746    }
1747
1748    #[test]
1749    fn count_requires_a_node() {
1750        assert!(parse(&["count"]).is_err());
1751        assert!(parse(&["count", "if_statement"]).is_ok());
1752    }
1753
1754    #[test]
1755    fn functions_parses() {
1756        assert!(parse(&["functions"]).is_ok());
1757    }
1758
1759    #[test]
1760    fn strip_comments_parses() {
1761        assert!(parse(&["strip-comments"]).is_ok());
1762        assert!(parse(&["strip-comments", "--in-place"]).is_ok());
1763    }
1764
1765    #[test]
1766    fn preproc_parses() {
1767        assert!(parse(&["preproc"]).is_ok());
1768        assert!(parse(&["preproc", "-o", "/tmp/x.json"]).is_ok());
1769    }
1770
1771    #[test]
1772    fn list_metrics_parses() {
1773        let cli = parse(&["list-metrics"]).expect("parses");
1774        assert!(matches!(cli.command, Command::ListMetrics(_)));
1775    }
1776
1777    #[test]
1778    fn list_metrics_with_descriptions() {
1779        let cli = parse(&["list-metrics", "descriptions"]).expect("parses");
1780        match cli.command {
1781            Command::ListMetrics(args) => assert_eq!(args.mode, ListMetricsMode::Descriptions),
1782            _ => panic!("expected ListMetrics"),
1783        }
1784    }
1785
1786    #[test]
1787    fn list_metrics_invalid_mode_rejected() {
1788        assert!(parse(&["list-metrics", "bogus"]).is_err());
1789    }
1790
1791    #[test]
1792    fn global_paths_works_before_or_after_subcommand() {
1793        assert!(parse(&["--paths", "x", "metrics"]).is_ok());
1794        assert!(parse(&["metrics", "--paths", "x"]).is_ok());
1795    }
1796
1797    fn os_args(args: &[&str]) -> Vec<OsString> {
1798        args.iter().map(|s| OsString::from(*s)).collect()
1799    }
1800
1801    #[test]
1802    fn legacy_hint_recognizes_old_metrics() {
1803        let hint = legacy_hint(os_args(&["cli", "--metrics", "-O", "markdown"])).expect("hint");
1804        assert!(hint.contains("report markdown"), "{hint}");
1805        assert!(hint.contains("--metrics"), "{hint}");
1806    }
1807
1808    #[test]
1809    fn legacy_hint_recognizes_output_format_json_with_legacy_action() {
1810        // -O json next to --metrics is unambiguously legacy and should
1811        // map to `bca metrics -O json`.
1812        let hint = legacy_hint(os_args(&["cli", "-m", "--output-format", "json"])).expect("hint");
1813        assert!(hint.contains("metrics -O json"), "{hint}");
1814    }
1815
1816    #[test]
1817    fn legacy_hint_returns_none_for_clean_args() {
1818        // Valid new-CLI args that just happen to also contain `-O` should
1819        // not trigger a legacy hint.
1820        let hint = legacy_hint(os_args(&["cli", "metrics", "-O", "json"]));
1821        assert!(hint.is_none());
1822    }
1823
1824    #[test]
1825    fn legacy_hint_returns_none_for_no_args() {
1826        let hint = legacy_hint(os_args(&["cli"]));
1827        assert!(hint.is_none());
1828    }
1829
1830    #[test]
1831    fn legacy_hint_recognizes_dash_o_markdown_alone() {
1832        // -O markdown is unambiguously legacy: markdown is not a
1833        // MetricsFormat value, so this pattern can only have come from the
1834        // pre-restructure CLI.
1835        let hint = legacy_hint(os_args(&["cli", "-O", "markdown"])).expect("hint");
1836        assert!(hint.contains("report markdown"), "{hint}");
1837    }
1838
1839    #[test]
1840    fn legacy_hint_redirects_metrics_offender_format_to_check() {
1841        // Issue #235: `bca metrics -O sarif` is no longer valid — the
1842        // offender formats live on `bca check` now. The hint should
1843        // point at the new home.
1844        let hint = legacy_hint(os_args(&["cli", "metrics", "-O", "sarif"])).expect("hint");
1845        assert!(hint.contains("bca check"), "{hint}");
1846        assert!(hint.contains("sarif"), "{hint}");
1847    }
1848
1849    #[test]
1850    fn legacy_hint_redirects_metrics_checkstyle_long_form() {
1851        let hint = legacy_hint(os_args(&[
1852            "cli",
1853            "metrics",
1854            "--output-format",
1855            "checkstyle",
1856        ]))
1857        .expect("hint");
1858        assert!(hint.contains("bca check"), "{hint}");
1859        assert!(hint.contains("checkstyle"), "{hint}");
1860    }
1861
1862    #[test]
1863    fn legacy_hint_redirects_ops_offender_format_to_check() {
1864        // Same migration story for `bca ops -O <offender>`.
1865        let hint = legacy_hint(os_args(&["cli", "ops", "-O", "clang-warning"])).expect("hint");
1866        assert!(hint.contains("bca check"), "{hint}");
1867        assert!(hint.contains("clang-warning"), "{hint}");
1868    }
1869
1870    #[test]
1871    fn legacy_hint_quiet_for_metrics_with_per_file_format() {
1872        // `bca metrics -O json` is still valid — no hint should fire.
1873        let hint = legacy_hint(os_args(&["cli", "metrics", "-O", "json"]));
1874        assert!(hint.is_none(), "{hint:?}");
1875    }
1876
1877    #[test]
1878    fn legacy_hint_quiet_when_user_invoked_known_subcommand() {
1879        // `bca find --dump` — user wants `--dump` as a positional node
1880        // type, not a legacy flag. Presence of a known subcommand (`find`)
1881        // suppresses the hint; clap's own "to pass '--dump' as a value,
1882        // use '-- --dump'" tip remains the right guidance.
1883        let hint = legacy_hint(os_args(&["cli", "find", "--dump"]));
1884        assert!(hint.is_none());
1885    }
1886
1887    #[test]
1888    fn legacy_hint_recognizes_dash_d() {
1889        // -d was the short form of --dump in the legacy CLI.
1890        let hint = legacy_hint(os_args(&["cli", "-d", "--paths", "."])).expect("hint");
1891        assert!(hint.contains("bca dump"), "{hint}");
1892    }
1893
1894    /// Sanity: `Cli::command()` builds without panicking. Catches misconfigured
1895    /// derive attributes (e.g., conflicting short flags) at test time.
1896    #[test]
1897    fn cli_is_well_formed() {
1898        use clap::CommandFactory;
1899        Cli::command().debug_assert();
1900    }
1901
1902    /// `SUBCOMMANDS` (used by `legacy_hint` to gate the migration message)
1903    /// must list every variant of the `Command` enum. If a future verb is
1904    /// added to `Command` and this list is not updated, `legacy_hint` will
1905    /// false-positive on that verb's arguments.
1906    #[test]
1907    fn subcommands_match_command_enum() {
1908        use clap::CommandFactory;
1909        use std::collections::HashSet;
1910        let from_clap: HashSet<String> = Cli::command()
1911            .get_subcommands()
1912            .map(|c| c.get_name().to_string())
1913            .filter(|n| n != "help") // clap auto-generates `help`
1914            .collect();
1915        let from_const: HashSet<String> = SUBCOMMANDS.iter().map(|s| (*s).to_string()).collect();
1916        assert_eq!(
1917            from_clap,
1918            from_const,
1919            "SUBCOMMANDS const drifted from Command enum: \
1920             missing from const = {missing:?}, missing from enum = {extra:?}",
1921            missing = from_clap.difference(&from_const).collect::<Vec<_>>(),
1922            extra = from_const.difference(&from_clap).collect::<Vec<_>>(),
1923        );
1924    }
1925
1926    #[test]
1927    fn collect_lines_skips_blank_and_comment_lines() {
1928        // The literal trailing spaces on the last pattern are
1929        // intentional — they exercise the right-side trim. Keep
1930        // them; reformatters that strip trailing whitespace on save
1931        // would weaken the test.
1932        let input = concat!(
1933            "# comment at top\n",
1934            "target/\n",
1935            "\n",
1936            "  # indented comment\n",
1937            "node_modules/\n",
1938            "\n",
1939            "\t\n",
1940            "**/*.snap\n",
1941            "   tests/repositories/**   \n",
1942        );
1943        let got = collect_lines(std::io::Cursor::new(input), "test", exclude_pattern_filter)
1944            .expect("ASCII fixture decodes cleanly");
1945        assert_eq!(
1946            got,
1947            vec![
1948                "target/",
1949                "node_modules/",
1950                "**/*.snap",
1951                "tests/repositories/**"
1952            ],
1953            "blank lines, comment lines, and surrounding whitespace must all be stripped",
1954        );
1955    }
1956
1957    #[test]
1958    fn collect_lines_treats_hash_inside_pattern_as_literal() {
1959        let input = "\
1960a/#weird/path
1961#full-line-comment
1962";
1963        let got = collect_lines(std::io::Cursor::new(input), "test", exclude_pattern_filter)
1964            .expect("ASCII fixture decodes cleanly");
1965        assert_eq!(
1966            got,
1967            vec!["a/#weird/path"],
1968            "only lines whose first non-whitespace char is `#` count as comments",
1969        );
1970    }
1971
1972    #[test]
1973    fn collect_lines_returns_empty_for_only_blanks_and_comments() {
1974        let input = "\n# only comments\n\t  \n# another\n";
1975        let got = collect_lines(std::io::Cursor::new(input), "test", exclude_pattern_filter)
1976            .expect("ASCII fixture decodes cleanly");
1977        assert!(got.is_empty(), "expected empty Vec, got {got:?}");
1978    }
1979
1980    #[test]
1981    fn collect_lines_strips_bom_on_inner_lines_not_just_first() {
1982        // BOM on the third pattern line. The doc comment for
1983        // `collect_lines` promises per-line BOM stripping; this
1984        // pins it. A regression that limited stripping to line 0
1985        // would leave `\u{feff}**/inner.py` as a literal-U+FEFF
1986        // glob and the assertion below would fail.
1987        let input = "**/a.py\n**/b.py\n\u{feff}**/inner.py\n";
1988        let got = collect_lines(std::io::Cursor::new(input), "test", exclude_pattern_filter)
1989            .expect("ASCII fixture decodes cleanly");
1990        assert_eq!(
1991            got,
1992            vec!["**/a.py", "**/b.py", "**/inner.py"],
1993            "BOM on an inner line must be stripped, not just on line 0",
1994        );
1995    }
1996
1997    #[test]
1998    fn collect_lines_strips_trailing_bom() {
1999        // Trailing BOM (e.g. from a concatenated or
2000        // half-broken-editor file). `trim_matches` with a
2001        // BOM-or-whitespace predicate must strip it from the end
2002        // too — otherwise the pattern carries a literal U+FEFF
2003        // suffix matching no real path.
2004        let input = "**/a.py\u{feff}\n";
2005        let got = collect_lines(std::io::Cursor::new(input), "test", exclude_pattern_filter)
2006            .expect("ASCII fixture decodes cleanly");
2007        assert_eq!(got, vec!["**/a.py"], "trailing BOM must be stripped");
2008    }
2009
2010    #[test]
2011    fn collect_lines_handles_bom_then_whitespace_then_pattern() {
2012        // `\u{feff}  **/foo.rs` — the order-sensitive
2013        // `trim().trim_start_matches('\u{feff}')` chain used to
2014        // leave literal leading spaces here because `trim()` stops
2015        // at the non-whitespace BOM. The fixed implementation
2016        // treats whitespace and BOM as one character class.
2017        let input = "\u{feff}  **/foo.rs\n";
2018        let got = collect_lines(std::io::Cursor::new(input), "test", exclude_pattern_filter)
2019            .expect("ASCII fixture decodes cleanly");
2020        assert_eq!(
2021            got,
2022            vec!["**/foo.rs"],
2023            "BOM-then-whitespace combinations must strip cleanly with no literal leading spaces",
2024        );
2025    }
2026
2027    #[test]
2028    fn path_pattern_filter_keeps_hash_prefixed_lines_as_literal_paths() {
2029        // Pins the doc claim on `read_paths_from`: `#` is a path
2030        // character, not a comment. The test calls
2031        // `path_pattern_filter` directly so a refactor that
2032        // accidentally swapped in `exclude_pattern_filter` (the two
2033        // are adjacent and share the signature) would silently
2034        // filter `#`-prefixed paths AND fail this test.
2035        let input = "/tmp/normal/path\n#weird-but-valid-path\n";
2036        let got = collect_lines(std::io::Cursor::new(input), "test", path_pattern_filter)
2037            .expect("ASCII fixture decodes cleanly");
2038        assert_eq!(
2039            got,
2040            vec![
2041                PathBuf::from("/tmp/normal/path"),
2042                PathBuf::from("#weird-but-valid-path"),
2043            ],
2044            "`#`-prefixed lines are literal paths for `--paths-from`, NOT comments",
2045        );
2046    }
2047
2048    #[test]
2049    fn path_pattern_filter_direct_policy_check() {
2050        // Symmetric to `exclude_pattern_filter_direct_policy_check`
2051        // — exercises the helper in isolation, outside the
2052        // `collect_lines` integration path.
2053        assert_eq!(path_pattern_filter(""), None, "blank line skipped");
2054        assert_eq!(
2055            path_pattern_filter("# foo"),
2056            Some(PathBuf::from("# foo")),
2057            "`#`-prefix retained as path char (inverse of exclude_pattern_filter)",
2058        );
2059        assert_eq!(
2060            path_pattern_filter("/tmp/x"),
2061            Some(PathBuf::from("/tmp/x")),
2062            "absolute path retained",
2063        );
2064    }
2065
2066    #[test]
2067    fn exclude_pattern_filter_direct_policy_check() {
2068        // The function exists "so unit tests can exercise the
2069        // exact policy" per its doc — this is that exercise,
2070        // outside the `collect_lines` integration path.
2071        assert_eq!(exclude_pattern_filter(""), None, "blank line skipped");
2072        assert_eq!(
2073            exclude_pattern_filter("# top comment"),
2074            None,
2075            "`#`-prefix skipped"
2076        );
2077        assert_eq!(
2078            exclude_pattern_filter("**/foo.rs"),
2079            Some("**/foo.rs".to_owned()),
2080            "normal pattern retained",
2081        );
2082        assert_eq!(
2083            exclude_pattern_filter("a/#weird/path"),
2084            Some("a/#weird/path".to_owned()),
2085            "`#` mid-line is literal, only leading-`#` counts as comment",
2086        );
2087    }
2088}
big_code_analysis_cli/lib.rs

big_code_analysis_cli/
lib.rs