bomdrift 0.9.9 - Docs.rs

use std::fs;
use std::io::IsTerminal;
use std::path::Path;

use anyhow::{Context, Result};

use crate::cli::{BaselineAction, Cli, Command, DiffArgs, FailOn, InitArgs, OutputFormat};
use crate::diff::ChangeSet;
use crate::enrich::{Enrichment, Severity};
use crate::{
    attestation, baseline, cli, clock, config, diff, enrich, model, parse, plugin, refresh, render,
    vex,
};

/// Process exit code emitted when `--fail-on` trips. Distinct from clap's
/// usage-error exit (`2`-ish on parse failure) because clap exits before
/// `run` is called — there's no overlap window where this code is ambiguous.
pub const FAIL_ON_EXIT_CODE: i32 = 2;

pub fn run(cli: Cli) -> Result<()> {
    match cli.command {
        Command::Diff(args) => run_diff(*args),
        Command::RefreshTyposquat(args) => refresh::run(args),
        Command::Baseline { action } => run_baseline(action),
        Command::Init(args) => run_init(args),
    }
}

fn run_init(args: InitArgs) -> Result<()> {
    write_scaffold_file(Path::new(".bomdrift.toml"), INIT_CONFIG, args.force)?;
    if !args.config_only {
        write_scaffold_file(
            Path::new(".github/workflows/sbom-diff.yml"),
            INIT_SBOM_WORKFLOW,
            args.force,
        )?;
        write_scaffold_file(
            Path::new(".github/workflows/bomdrift-suppress.yml"),
            INIT_SUPPRESS_WORKFLOW,
            args.force,
        )?;
    }
    eprintln!("bomdrift: initialized repository files");
    Ok(())
}

fn write_scaffold_file(path: &Path, contents: &str, force: bool) -> Result<()> {
    if path.exists() && !force {
        anyhow::bail!(
            "{} already exists; re-run with --force to overwrite",
            path.display()
        );
    }
    if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) {
        fs::create_dir_all(parent)
            .with_context(|| format!("creating parent directory: {}", parent.display()))?;
    }
    fs::write(path, contents).with_context(|| format!("writing scaffold file: {}", path.display()))
}

fn run_baseline(action: BaselineAction) -> Result<()> {
    match action {
        BaselineAction::Add(args) => {
            // Validate --expires upfront so a typo'd date doesn't write a
            // bad entry that errors on the NEXT diff load.
            if let Some(s) = &args.expires {
                clock::parse_ymd(s)
                    .with_context(|| format!("--expires must be YYYY-MM-DD, got {s:?}"))?;
            }

            // --from-comment overrides positional id/reason. Used by the
            // GitLab webhook bridge (Phase L). Non-zero exit when the
            // body has no directive — silent no-op would let mis-configured
            // bridges look like they worked.
            let (id, reason_owned) = if let Some(body) = &args.from_comment {
                match baseline::parse_comment_directive(body)? {
                    Some((id, reason)) => (id, reason),
                    None => {
                        eprintln!(
                            "bomdrift: --from-comment body contained no `/bomdrift suppress <ID>` directive"
                        );
                        std::process::exit(1);
                    }
                }
            } else {
                let Some(id) = args.id.clone() else {
                    eprintln!(
                        "bomdrift baseline add: missing required ADVISORY_ID (use a positional argument or --from-comment <BODY>)"
                    );
                    std::process::exit(2);
                };
                (id, args.reason.clone())
            };

            let outcome = baseline::add_suppression_full(
                &args.path,
                &id,
                args.expires.as_deref(),
                reason_owned.as_deref(),
            )?;
            match outcome {
                baseline::AddOutcome::Added => {
                    eprintln!(
                        "bomdrift: added '{id}' to {path}",
                        id = id.trim(),
                        path = args.path.display(),
                    );
                }
                baseline::AddOutcome::AlreadyPresent => {
                    eprintln!(
                        "bomdrift: '{id}' already present in {path}; no change",
                        id = id.trim(),
                        path = args.path.display(),
                    );
                }
            }
            Ok(())
        }
    }
}

fn run_diff(mut args: DiffArgs) -> Result<()> {
    config::apply_diff_config(&mut args)?;

    if args.require_attestation
        && (args.before_attestation.is_none() || args.after_attestation.is_none())
    {
        anyhow::bail!(
            "--require-attestation needs both --before-attestation and --after-attestation"
        );
    }

    let output = args.output.unwrap_or(OutputFormat::Terminal);
    let format = args.format.unwrap_or(cli::InputFormat::Auto);
    let fail_on = args.fail_on.unwrap_or(FailOn::None);

    let format_hint = format.to_sbom_format();
    let before = load_sbom_or_attestation(
        args.before.as_deref(),
        args.before_attestation.as_deref(),
        args.cosign_identity.as_deref(),
        args.cosign_issuer.as_deref(),
        format_hint,
        args.include_file_components,
        "before",
        args.debug_calibration,
        args.debug_calibration_format,
    )?;
    let after = load_sbom_or_attestation(
        args.after.as_deref(),
        args.after_attestation.as_deref(),
        args.cosign_identity.as_deref(),
        args.cosign_issuer.as_deref(),
        format_hint,
        args.include_file_components,
        "after",
        args.debug_calibration,
        args.debug_calibration_format,
    )?;

    let mut cs = diff::diff(&before, &after);

    let mut enrichment = if args.no_osv {
        enrich::Enrichment::default()
    } else {
        // OSV enrichment is best-effort. Network failures must not block the diff
        // from rendering — a PR review is still useful without CVE data.
        match enrich::osv::enrich_cached_with_ttl(&cs, args.no_osv_cache, args.cache_ttl_hours) {
            Ok(e) => e,
            Err(err) => {
                eprintln!("warning: OSV enrichment failed, continuing without it: {err:#}");
                enrich::Enrichment::default()
            }
        }
    };

    // EPSS / KEV enrichment piggyback on OSV's VulnRefs and only have
    // anything to do when there are CVE-aliased advisories. Skip both if
    // there are no vulns.
    if !args.no_epss
        && !enrichment.vulns.is_empty()
        && let Err(err) = enrich::epss::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours)
    {
        eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}");
    }
    if !args.no_kev
        && !enrichment.vulns.is_empty()
        && let Err(err) = enrich::kev::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours)
    {
        eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}");
    }

    // Typosquat detection is pure-compute (embedded reference list) and always
    // runs, regardless of `--no-osv`. Findings are informational.
    enrichment.typosquats =
        enrich::typosquat::enrich_with_threshold(&cs, args.typosquat_similarity_threshold);

    // Multi-major version-jump detection is pure-compute and also always runs.
    // Findings are informational.
    enrichment.version_jumps = enrich::version_jump::enrich_with(&cs, args.multi_major_delta);

    // Maintainer-age enrichment hits the GitHub REST API; gated behind
    // `--no-maintainer-age` for offline runs. Best-effort: failures warn and
    // continue, mirroring the OSV enricher's contract.
    if !args.no_maintainer_age {
        match enrich::maintainer::enrich_with(
            &cs,
            "https://api.github.com",
            std::time::Duration::from_secs(15),
            args.young_maintainer_days,
        ) {
            Ok(findings) => enrichment.maintainer_age = findings,
            Err(err) => {
                eprintln!(
                    "warning: maintainer-age enrichment failed, continuing without it: {err:#}"
                );
            }
        }
    }

    // License-policy enrichment (Phase D, v0.8). Pure-compute, runs after
    // OSV/EPSS/KEV. Empty allow + empty deny means "no policy" — the
    // enricher returns no violations.
    let license_policy = enrich::license::Policy {
        allow: args.allow_licenses.clone(),
        deny: args.deny_licenses.clone(),
        allow_ambiguous: args.allow_ambiguous_licenses,
        allow_exceptions: args.allow_exception.clone(),
        deny_exceptions: args.deny_exception.clone(),
    };
    enrichment.license_violations = enrich::license::enrich(&cs, &license_policy);

    // Registry-metadata enrichers (Phase K, v0.9). Best-effort — a
    // registry timeout returns Ok with no findings.
    if !args.no_registry {
        let findings =
            enrich::registry::enrich(&cs, args.recently_published_days, args.cache_ttl_hours);
        enrichment.recently_published = findings.recently_published;
        enrichment.deprecated = findings.deprecated;
        enrichment.maintainer_set_changed = findings.maintainer_set_changed;
    }

    // Plugin findings (Phase C, v0.9.6). Run after every built-in
    // enricher so plugins observe the same `cs` view bomdrift renders;
    // before baseline so plugin findings can be baselined too. Plugin
    // failures degrade gracefully — a malformed manifest aborts the
    // run (config error), but plugin runtime failures emit only a
    // BOMDRIFT_DEBUG-gated stderr warning and contribute no findings.
    if !args.plugin.is_empty() {
        let mut manifests = Vec::with_capacity(args.plugin.len());
        for path in &args.plugin {
            let manifest = plugin::load_manifest(path)
                .with_context(|| format!("loading --plugin {}", path.display()))?;
            manifests.push(manifest);
        }
        enrichment.plugin_findings = plugin::run_plugins(&manifests, &cs);
    }

    // Apply the baseline AFTER all enrichers run — suppression operates on
    // the realized finding set, not on intermediate inputs. This keeps the
    // baseline file format stable as new enrichers are added: a new finding
    // type that the baseline doesn't know about simply isn't suppressed.
    let mut baseline_entries: Vec<crate::baseline::BaselineEntry> = Vec::new();
    if let Some(path) = &args.baseline {
        let baseline = baseline::Baseline::load(path)?;
        for ent in &baseline.expired_entries {
            eprintln!(
                "warning: baseline entry {id}{purl} expired {expires}; finding will surface in this run{reason}",
                id = ent.id,
                purl = ent
                    .purl
                    .as_deref()
                    .map(|p| format!(" ({p})"))
                    .unwrap_or_default(),
                expires = ent.expires.as_deref().unwrap_or(""),
                reason = ent
                    .reason
                    .as_deref()
                    .map(|r| format!(" — was: {r}"))
                    .unwrap_or_default(),
            );
        }
        baseline_entries = baseline.entries.clone();
        baseline::apply(&mut cs, &mut enrichment, &baseline);
    }

    // VEX consumption (Phase G, v0.9). Applied AFTER baseline so VEX
    // statements operate on the post-baseline view — this matches what
    // a downstream tool would see and avoids double-counting "already
    // suppressed" findings in the VEX-suppressed tally.
    if !args.vex.is_empty() {
        match vex::load(&args.vex) {
            Ok(stmts) => {
                let idx = vex::VexIndex::build(stmts);
                vex::apply(&mut enrichment, &idx);
            }
            Err(err) => {
                eprintln!("warning: VEX load failed, continuing without VEX filtering: {err:#}");
            }
        }
    }

    // VEX emission (Phase H, v0.9). Writes a single OpenVEX 0.2.0 doc
    // to the requested path, covering baseline-suppressed entries and
    // un-suppressed findings. Byte-deterministic when SOURCE_DATE_EPOCH
    // is set.
    if let Some(path) = &args.emit_vex {
        let author = args
            .vex_author
            .clone()
            .or_else(|| args.repo_url.clone())
            .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok())
            .filter(|s| !s.is_empty())
            .unwrap_or_else(|| "bomdrift".to_string());
        let default_just = args
            .vex_default_justification
            .clone()
            .unwrap_or_else(|| "vulnerable_code_not_in_execute_path".to_string());
        let opts = vex::EmitOptions {
            author: &author,
            default_justification: &default_just,
            baseline_entries: &baseline_entries,
        };
        let body = vex::emit(&cs, &enrichment, &opts);
        std::fs::write(path, body)
            .with_context(|| format!("writing --emit-vex {}", path.display()))?;
    }

    // Calibration tap. Off by default; opt-in via `--debug-calibration`.
    // Emits one CSV-friendly line per finding to stderr so an adopter
    // can run the flag across a representative N PRs and feed the
    // resulting CSV back as tuning data (issue #5). The output is
    // deliberately plain — no JSON, no schema versioning — because the
    // intended consumer is a one-off awk/jq pipeline, not a long-lived
    // integration. Format: `kind|key|score|threshold`. No telemetry: the
    // user owns the bytes and pipes them wherever they want.
    if args.debug_calibration {
        write_calibration_lines(
            &enrichment,
            &mut std::io::stderr(),
            args.debug_calibration_format,
            CalibrationOverrides {
                similarity_threshold: args.typosquat_similarity_threshold,
                young_maintainer_days: args.young_maintainer_days,
                multi_major_delta: args.multi_major_delta,
            },
        );
    }

    // CLI flag wins; otherwise the env var supplies the default. Empty
    // strings are treated as unset to match shell-script callers that
    // pass `BOMDRIFT_REPO_URL=` to clear the value rather than `unset`.
    // GitLab CI exposes the project URL as `CI_PROJECT_URL` (analog of
    // GitHub's `GITHUB_REPOSITORY`-derived URL); honor it as a third
    // fallback so users on the GitLab template don't have to plumb
    // `BOMDRIFT_REPO_URL` themselves.
    let repo_url = args
        .repo_url
        .clone()
        .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok())
        .or_else(|| std::env::var("CI_PROJECT_URL").ok())
        .or_else(|| std::env::var("BITBUCKET_GIT_HTTP_ORIGIN").ok())
        .or_else(|| std::env::var("BUILD_REPOSITORY_URI").ok())
        .filter(|s| !s.is_empty());

    // Platform precedence: explicit `--platform` (or `[diff] platform`
    // in `.bomdrift.toml`, already merged into `args.platform`) wins;
    // otherwise auto-detect from CI env. Detection order: GitLab
    // (`GITLAB_CI=true`), Bitbucket (`BITBUCKET_BUILD_NUMBER`), Azure
    // DevOps (`TF_BUILD`), then default GitHub.
    let platform = args.platform.unwrap_or_else(|| {
        if std::env::var("GITLAB_CI").is_ok_and(|v| v == "true") {
            crate::cli::Platform::GitLab
        } else if std::env::var("BITBUCKET_BUILD_NUMBER").is_ok() {
            crate::cli::Platform::Bitbucket
        } else if std::env::var("TF_BUILD").is_ok() {
            crate::cli::Platform::AzureDevOps
        } else {
            crate::cli::Platform::GitHub
        }
    });
    let md_options = render::markdown::Options {
        summary_only: args.summary_only,
        findings_only: args.findings_only,
        repo_url,
        platform: platform.into(),
    };
    let rendered = match output {
        OutputFormat::Terminal => {
            // ANSI escapes are only safe on a real TTY. Piped/redirected stdout
            // (e.g. captured by a CI step that posts a PR comment) must stay
            // plain markdown so it renders correctly in a comment body.
            if std::io::stdout().is_terminal() {
                render::term::render(&cs, &enrichment)
            } else {
                render::markdown::render_with_options(&cs, &enrichment, md_options)
            }
        }
        OutputFormat::Markdown => {
            render::markdown::render_with_options(&cs, &enrichment, md_options)
        }
        OutputFormat::Json => render::json::render(&cs, &enrichment),
        OutputFormat::Sarif => render::sarif::render(&cs, &enrichment),
    };

    if let Some(path) = &args.output_file {
        std::fs::write(path, &rendered)
            .with_context(|| format!("writing --output-file {}", path.display()))?;
    } else {
        print!("{rendered}");
    }

    // Body must be fully written before we exit-2 — the action's `tee`
    // wrapper still wants the comment posted even when fail-on trips.
    let budget_tripped = budget_tripped(
        &cs,
        args.max_added,
        args.max_removed,
        args.max_version_changed,
    );
    if budget_tripped {
        log_budget_trips(
            &cs,
            args.max_added,
            args.max_removed,
            args.max_version_changed,
        );
    }

    let epss_tripped = args
        .fail_on_epss
        .is_some_and(|threshold| any_epss_at_or_above(&enrichment, threshold));
    if epss_tripped {
        let threshold = args.fail_on_epss.unwrap_or(0.0);
        eprintln!(
            "bomdrift: policy gate tripped: --fail-on-epss {threshold:.2} (one or more advisories at or above this score)"
        );
    }

    if tripped(&cs, &enrichment, fail_on) || budget_tripped || epss_tripped {
        std::process::exit(FAIL_ON_EXIT_CODE);
    }

    Ok(())
}

/// Pure helper: does this `(changeset, enrichment)` pair trip the configured
/// fail-on threshold? Side-effect-free so the policy is easy to unit-test
/// without spinning up the full pipeline.
///
/// `FailOn::CriticalCve` filters on real severity now that OSV `/v1/vulns/{id}`
/// is fetched; only advisories with [`Severity::High`] or higher trip it.
/// (High is included because GHSA's `CRITICAL` label is relatively rare —
/// many actively-exploited supply-chain advisories ship as `HIGH`. Treating
/// "critical-cve" as "high-or-critical" matches what the option's name
/// communicates to a CI policy author: "block on the actionable bucket".)
pub fn tripped(cs: &ChangeSet, e: &Enrichment, threshold: FailOn) -> bool {
    match threshold {
        FailOn::None => false,
        FailOn::Cve => !e.vulns.is_empty(),
        FailOn::CriticalCve => any_advisory_at_or_above(e, Severity::High),
        FailOn::Typosquat => !e.typosquats.is_empty(),
        FailOn::LicenseChange => !cs.license_changed.is_empty(),
        FailOn::Kev => any_kev(e),
        FailOn::LicenseViolation => !e.license_violations.is_empty(),
        FailOn::RecentlyPublished => !e.recently_published.is_empty(),
        FailOn::Deprecated => !e.deprecated.is_empty(),
        FailOn::Any => e.has_findings() || !cs.license_changed.is_empty() || any_kev(e),
    }
}

/// True when any advisory across all components has its CISA KEV flag set.
pub fn any_kev(e: &Enrichment) -> bool {
    e.vulns.values().any(|refs| refs.iter().any(|r| r.kev))
}

/// True when any advisory has an EPSS score >= the threshold.
pub fn any_epss_at_or_above(e: &Enrichment, threshold: f32) -> bool {
    e.vulns.values().any(|refs| {
        refs.iter()
            .any(|r| r.epss_score.is_some_and(|s| s >= threshold))
    })
}

pub fn budget_tripped(
    cs: &ChangeSet,
    max_added: Option<usize>,
    max_removed: Option<usize>,
    max_version_changed: Option<usize>,
) -> bool {
    max_added.is_some_and(|max| cs.added.len() > max)
        || max_removed.is_some_and(|max| cs.removed.len() > max)
        || max_version_changed.is_some_and(|max| cs.version_changed.len() > max)
}

/// Emit one CSV-friendly line per finding to the given writer, capturing
/// the score and the constant it was compared against. Off by default
/// (driven by `--debug-calibration`); when set, the user pipes stderr
/// to a file and feeds the resulting CSV back as tuning data.
///
/// Schema: `kind|key|score|threshold` — pipe-delimited because purls
/// already contain commas (`pkg:npm/@scope/name`) which would force CSV
/// quoting. `kind` ∈ {`typosquat`, `version-jump`, `maintainer-age`,
/// `cve`}. `score` is the underlying numeric the enricher computed
/// (similarity for typosquat, major-version delta for version-jump,
/// days-old for maintainer-age, max CVSS-equivalent for cve);
/// `threshold` is the constant the score was gated against. CVE rows
/// surface every advisory (no internal threshold) so adopters can see
/// the score distribution before tuning `--fail-on critical-cve`.
/// Active overrides for the configurable calibration thresholds. Threaded
/// into [`write_calibration_lines`] so emitted rows reflect the effective
/// threshold the enricher actually used, not the unconditional const default.
#[derive(Debug, Default, Clone, Copy)]
pub(crate) struct CalibrationOverrides {
    pub similarity_threshold: Option<f64>,
    pub young_maintainer_days: Option<i64>,
    pub multi_major_delta: Option<u32>,
}

fn write_calibration_lines<W: std::io::Write>(
    e: &Enrichment,
    out: &mut W,
    format: crate::cli::DebugFormat,
    overrides: CalibrationOverrides,
) {
    use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS;
    use crate::enrich::typosquat::SIMILARITY_THRESHOLD;
    use crate::enrich::version_jump::MIN_MAJOR_DELTA;

    let active_similarity = overrides
        .similarity_threshold
        .unwrap_or(SIMILARITY_THRESHOLD);
    let active_young = overrides
        .young_maintainer_days
        .unwrap_or(YOUNG_MAINTAINER_DAYS);
    let active_major_delta = overrides.multi_major_delta.unwrap_or(MIN_MAJOR_DELTA);

    for f in &e.typosquats {
        write_calibration_row(
            out,
            "typosquat",
            f.component
                .purl
                .as_deref()
                .unwrap_or(f.component.name.as_str()),
            CalibrationScore::Float(f.score),
            CalibrationThreshold::Float(active_similarity),
            format,
        );
    }
    for f in &e.version_jumps {
        write_calibration_row(
            out,
            "version-jump",
            f.after.purl.as_deref().unwrap_or(f.after.name.as_str()),
            CalibrationScore::Int(f.after_major.saturating_sub(f.before_major) as i64),
            CalibrationThreshold::Int(active_major_delta as i64),
            format,
        );
    }
    for f in &e.maintainer_age {
        write_calibration_row(
            out,
            "maintainer-age",
            f.component
                .purl
                .as_deref()
                .unwrap_or(f.component.name.as_str()),
            CalibrationScore::Int(f.days_old),
            CalibrationThreshold::Int(active_young),
            format,
        );
    }
    for (purl, refs) in &e.vulns {
        for vuln in refs {
            // Severity has no numeric score in our model; emit the bucket
            // label as a non-numeric "score" so the row stays well-formed
            // (string in JSONL, plain token in pipe).
            write_calibration_row(
                out,
                "cve",
                &format!("{purl}#{}", vuln.id),
                CalibrationScore::Text(vuln.severity.as_str()),
                CalibrationThreshold::Text("high+"),
                format,
            );
            for cve in vuln.cves() {
                if let Some(score) = vuln.epss_score {
                    write_calibration_row(
                        out,
                        "epss",
                        &format!("{purl}+{cve}"),
                        CalibrationScore::Float(score as f64),
                        CalibrationThreshold::Float(0.5),
                        format,
                    );
                }
                if vuln.kev {
                    write_calibration_row(
                        out,
                        "kev",
                        &format!("{purl}+{cve}"),
                        CalibrationScore::Text("true"),
                        CalibrationThreshold::Text("kev"),
                        format,
                    );
                }
            }
        }
    }
    for v in &e.license_violations {
        // Threshold field carries the precise matched_rule (e.g.
        // "deny: GPL-3.0-only" or "exception:LLVM-exception denied")
        // so calibration consumers see the WHY, not just the kind tag.
        write_calibration_row(
            out,
            "license",
            v.component
                .purl
                .as_deref()
                .unwrap_or(v.component.name.as_str()),
            CalibrationScore::Text(&v.license),
            CalibrationThreshold::Text(&v.matched_rule),
            format,
        );
    }
    for f in &e.recently_published {
        write_calibration_row(
            out,
            "recently-published",
            f.component
                .purl
                .as_deref()
                .unwrap_or(f.component.name.as_str()),
            CalibrationScore::Int(f.days_old),
            CalibrationThreshold::Int(crate::enrich::registry::MIN_PUBLISHED_AGE_DAYS),
            format,
        );
    }
    for f in &e.deprecated {
        write_calibration_row(
            out,
            "deprecated",
            f.component
                .purl
                .as_deref()
                .unwrap_or(f.component.name.as_str()),
            CalibrationScore::Text(f.message.as_deref().unwrap_or("(deprecated)")),
            CalibrationThreshold::Text("any"),
            format,
        );
    }
    for f in &e.maintainer_set_changed {
        write_calibration_row(
            out,
            "maintainer-set-changed",
            f.after.purl.as_deref().unwrap_or(f.after.name.as_str()),
            CalibrationScore::Int((f.added.len() + f.removed.len()) as i64),
            CalibrationThreshold::Int(1),
            format,
        );
    }
}

/// Numeric or symbolic score for a calibration row. Float/Int rendered
/// without quotes in JSONL; Text rendered as a JSON string.
pub(crate) enum CalibrationScore<'a> {
    Float(f64),
    Int(i64),
    Text(&'a str),
}

pub(crate) enum CalibrationThreshold<'a> {
    Float(f64),
    Int(i64),
    Text(&'a str),
}

/// Single dispatch point for both pipe and JSONL calibration formats.
/// Adding a new finding kind is one call site, not two — the format
/// branches stay localized to this helper.
pub(crate) fn write_calibration_row<W: std::io::Write>(
    out: &mut W,
    kind: &str,
    key: &str,
    score: CalibrationScore<'_>,
    threshold: CalibrationThreshold<'_>,
    format: crate::cli::DebugFormat,
) {
    match format {
        crate::cli::DebugFormat::Pipe => {
            let score_s = match score {
                CalibrationScore::Float(v) => format!("{v:.4}"),
                CalibrationScore::Int(v) => v.to_string(),
                CalibrationScore::Text(s) => s.to_string(),
            };
            let thr_s = match threshold {
                CalibrationThreshold::Float(v) => format!("{v:.4}"),
                CalibrationThreshold::Int(v) => v.to_string(),
                CalibrationThreshold::Text(s) => s.to_string(),
            };
            let _ = writeln!(out, "{kind}|{key}|{score_s}|{thr_s}");
        }
        crate::cli::DebugFormat::Jsonl => {
            let score_v = match score {
                CalibrationScore::Float(v) => serde_json::Value::from(v),
                CalibrationScore::Int(v) => serde_json::Value::from(v),
                CalibrationScore::Text(s) => serde_json::Value::from(s),
            };
            let thr_v = match threshold {
                CalibrationThreshold::Float(v) => serde_json::Value::from(v),
                CalibrationThreshold::Int(v) => serde_json::Value::from(v),
                CalibrationThreshold::Text(s) => serde_json::Value::from(s),
            };
            let line = serde_json::json!({
                "kind": kind,
                "key": key,
                "score": score_v,
                "threshold": thr_v,
            });
            let _ = writeln!(out, "{line}");
        }
    }
}

fn log_budget_trips(
    cs: &ChangeSet,
    max_added: Option<usize>,
    max_removed: Option<usize>,
    max_version_changed: Option<usize>,
) {
    if let Some(max) = max_added.filter(|max| cs.added.len() > *max) {
        eprintln!(
            "bomdrift: policy gate tripped: added count {} exceeds --max-added {}",
            cs.added.len(),
            max
        );
    }
    if let Some(max) = max_removed.filter(|max| cs.removed.len() > *max) {
        eprintln!(
            "bomdrift: policy gate tripped: removed count {} exceeds --max-removed {}",
            cs.removed.len(),
            max
        );
    }
    if let Some(max) = max_version_changed.filter(|max| cs.version_changed.len() > *max) {
        eprintln!(
            "bomdrift: policy gate tripped: version-changed count {} exceeds --max-version-changed {}",
            cs.version_changed.len(),
            max
        );
    }
}

fn any_advisory_at_or_above(e: &Enrichment, threshold: Severity) -> bool {
    e.vulns.values().flatten().any(|v| v.severity >= threshold)
}

const INIT_CONFIG: &str = r#"# bomdrift repo policy.
# CLI flags override these defaults for one-off runs.

[diff]
fail_on = "critical-cve"
baseline = ".bomdrift/baseline.json"
findings_only = false

# Optional churn budgets. Uncomment to fail the workflow when a PR changes too
# many dependencies at once.
# max_added = 25
# max_removed = 50
# max_version_changed = 10
"#;

const INIT_SBOM_WORKFLOW: &str = r#"name: SBOM diff

on: pull_request

permissions:
  contents: read
  pull-requests: write

jobs:
  diff:
    runs-on: ubuntu-latest
    steps:
      - uses: Metbcy/bomdrift@v1
        with:
          config: .bomdrift.toml
"#;

const INIT_SUPPRESS_WORKFLOW: &str = r#"name: bomdrift suppress

on:
  issue_comment:
    types: [created]

permissions:
  contents: write
  pull-requests: write

jobs:
  suppress:
    if: |
      github.event.issue.pull_request &&
      startsWith(github.event.comment.body, '/bomdrift suppress ')
    runs-on: ubuntu-latest
    steps:
      - uses: Metbcy/bomdrift/comment-suppress@v1
"#;

fn load_sbom(
    path: &Path,
    format_hint: Option<model::SbomFormat>,
    include_file_components: bool,
) -> Result<model::Sbom> {
    let raw = fs::read_to_string(path)
        .with_context(|| format!("reading SBOM file: {}", path.display()))?;
    parse_sbom_bytes(
        &raw,
        &path.display().to_string(),
        format_hint,
        include_file_components,
    )
}

fn parse_sbom_bytes(
    raw: &str,
    source_label: &str,
    format_hint: Option<model::SbomFormat>,
    include_file_components: bool,
) -> Result<model::Sbom> {
    let value: serde_json::Value =
        serde_json::from_str(raw).with_context(|| format!("parsing JSON in: {source_label}"))?;
    let mut sbom = parse::parse_with_format(value, format_hint)
        .with_context(|| format!("normalizing SBOM from: {source_label}"))?;
    if !include_file_components {
        parse::filter_file_components(&mut sbom);
    }
    Ok(sbom)
}

#[allow(clippy::too_many_arguments)]
fn load_sbom_or_attestation(
    path: Option<&Path>,
    oci_ref: Option<&str>,
    cosign_identity: Option<&str>,
    cosign_issuer: Option<&str>,
    format_hint: Option<model::SbomFormat>,
    include_file_components: bool,
    side: &str,
    debug_calibration: bool,
    debug_format: crate::cli::DebugFormat,
) -> Result<model::Sbom> {
    if let Some(oci) = oci_ref {
        let identity = cosign_identity.ok_or_else(|| {
            anyhow::anyhow!(
                "--{side}-attestation requires --cosign-identity (regex passed to cosign --certificate-identity-regexp)"
            )
        })?;
        let issuer = cosign_issuer.ok_or_else(|| {
            anyhow::anyhow!(
                "--{side}-attestation requires --cosign-issuer (URL passed to cosign --certificate-oidc-issuer)"
            )
        })?;
        let body = attestation::fetch_verified_sbom(oci, identity, issuer)
            .with_context(|| format!("fetching --{side}-attestation {oci}"))?;
        if debug_calibration {
            // One row per verified attestation; surfaces the cert
            // regex cosign accepted so adopters can confirm policy.
            let _ =
                write_attestation_calibration(&mut std::io::stderr(), oci, identity, debug_format);
        }
        return parse_sbom_bytes(
            &body,
            &format!("attestation:{oci}"),
            format_hint,
            include_file_components,
        );
    }
    let path = path.ok_or_else(|| {
        anyhow::anyhow!(
            "internal: {side} requires either a positional path or --{side}-attestation"
        )
    })?;
    load_sbom(path, format_hint, include_file_components)
}

fn write_attestation_calibration<W: std::io::Write>(
    out: &mut W,
    oci_ref: &str,
    identity: &str,
    format: crate::cli::DebugFormat,
) -> std::io::Result<()> {
    match format {
        crate::cli::DebugFormat::Pipe => {
            writeln!(out, "attestation|{oci_ref}|verified|{identity}")
        }
        crate::cli::DebugFormat::Jsonl => {
            let row = serde_json::json!({
                "kind": "attestation",
                "key": oci_ref,
                "score": "verified",
                "threshold": identity,
            });
            writeln!(out, "{row}")
        }
    }
}

#[cfg(test)]
mod tests {
    #![allow(
        clippy::unwrap_used,
        clippy::expect_used,
        clippy::panic,
        clippy::todo,
        clippy::unimplemented
    )]
    use super::*;
    use std::collections::HashMap;

    use crate::enrich::typosquat::TyposquatFinding;
    use crate::enrich::version_jump::VersionJumpFinding;
    use crate::enrich::{LicenseViolation, Severity, VulnRef};
    use crate::model::{Component, Ecosystem, Relationship};

    fn comp(name: &str) -> Component {
        Component {
            name: name.to_string(),
            version: "1.0.0".to_string(),
            ecosystem: Ecosystem::Npm,
            purl: Some(format!("pkg:npm/{name}@1.0.0")),
            licenses: Vec::new(),
            supplier: None,
            hashes: Vec::new(),
            relationship: Relationship::Unknown,
            source_url: None,
            bom_ref: None,
        }
    }

    fn enrichment_with_cve_at(severity: Severity) -> Enrichment {
        let mut vulns: HashMap<String, Vec<VulnRef>> = HashMap::new();
        vulns.insert(
            "pkg:npm/foo@1.0.0".into(),
            vec![VulnRef {
                id: "CVE-2025-1".into(),
                severity,
                aliases: Vec::new(),
                epss_score: None,
                kev: false,
            }],
        );
        Enrichment {
            vulns,
            ..Default::default()
        }
    }

    fn enrichment_with_cve() -> Enrichment {
        // Severity::None is what every v0.2-era test implicitly assumed — the
        // pre-severity world. Tests that don't care about the bucket use this.
        enrichment_with_cve_at(Severity::None)
    }

    fn enrichment_with_typosquat() -> Enrichment {
        Enrichment {
            typosquats: vec![TyposquatFinding {
                component: comp("plain-crypto-js"),
                closest: "crypto-js".to_string(),
                score: 0.95,
            }],
            ..Default::default()
        }
    }

    fn enrichment_with_version_jump() -> Enrichment {
        Enrichment {
            version_jumps: vec![VersionJumpFinding {
                before: comp("foo"),
                after: comp("foo"),
                before_major: 1,
                after_major: 4,
            }],
            ..Default::default()
        }
    }

    fn cs_with_license_change() -> ChangeSet {
        let mut before = comp("foo");
        before.licenses = vec!["MIT".into()];
        let mut after = comp("foo");
        after.licenses = vec!["GPL-3.0".into()];
        ChangeSet {
            license_changed: vec![(before, after)],
            ..Default::default()
        }
    }

    #[test]
    fn fail_on_none_never_trips() {
        assert!(!tripped(
            &ChangeSet::default(),
            &Enrichment::default(),
            FailOn::None
        ));
        assert!(!tripped(
            &cs_with_license_change(),
            &enrichment_with_cve(),
            FailOn::None
        ));
    }

    #[test]
    fn fail_on_cve_trips_only_on_cve_findings() {
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_cve(),
            FailOn::Cve
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_typosquat(),
            FailOn::Cve
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &Enrichment::default(),
            FailOn::Cve
        ));
    }

    #[test]
    fn fail_on_critical_cve_filters_on_severity_high_or_above() {
        // Critical and High advisories trip; Medium / Low / None don't. The
        // doc on `tripped()` explains why High is included in the
        // "critical-cve" bucket.
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_cve_at(Severity::Critical),
            FailOn::CriticalCve
        ));
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_cve_at(Severity::High),
            FailOn::CriticalCve
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_cve_at(Severity::Medium),
            FailOn::CriticalCve
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_cve_at(Severity::None),
            FailOn::CriticalCve
        ));
    }

    #[test]
    fn fail_on_cve_still_trips_on_severity_none_advisories() {
        // --fail-on cve is the broad "any advisory" bucket; severity threading
        // doesn't change its semantics. An advisory with unresolved severity
        // still trips it (the alternative — silent suppression — would be the
        // real footgun).
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_cve_at(Severity::None),
            FailOn::Cve
        ));
    }

    #[test]
    fn fail_on_typosquat_trips_only_on_typosquat_findings() {
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_typosquat(),
            FailOn::Typosquat
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_cve(),
            FailOn::Typosquat
        ));
    }

    #[test]
    fn fail_on_any_trips_on_each_finding_kind_and_license_changes() {
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_cve(),
            FailOn::Any
        ));
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_typosquat(),
            FailOn::Any
        ));
        assert!(tripped(
            &ChangeSet::default(),
            &enrichment_with_version_jump(),
            FailOn::Any
        ));
        // license-changed-without-version-bump alone trips Any (the suspicious
        // case lives on the ChangeSet, not the enrichment).
        assert!(tripped(
            &cs_with_license_change(),
            &Enrichment::default(),
            FailOn::Any
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &Enrichment::default(),
            FailOn::Any
        ));
    }

    #[test]
    fn fail_on_license_change_trips_only_on_license_changes() {
        assert!(tripped(
            &cs_with_license_change(),
            &Enrichment::default(),
            FailOn::LicenseChange
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_cve(),
            FailOn::LicenseChange
        ));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_typosquat(),
            FailOn::LicenseChange
        ));
    }

    #[test]
    fn fail_on_typosquat_ignores_license_change() {
        // license_changed is a ChangeSet field, not an enrichment. The
        // typosquat threshold is strictly about typosquat findings — license
        // drift must NOT trip it (otherwise consumers using --fail-on=typosquat
        // get unexpected exit-2's on every license correction).
        assert!(!tripped(
            &cs_with_license_change(),
            &Enrichment::default(),
            FailOn::Typosquat
        ));
    }

    #[test]
    fn budget_trips_when_counts_exceed_limits() {
        let cs = ChangeSet {
            added: vec![comp("a"), comp("b")],
            removed: vec![comp("c")],
            version_changed: vec![(comp("d"), comp("d"))],
            ..Default::default()
        };
        assert!(budget_tripped(&cs, Some(1), None, None));
        assert!(budget_tripped(&cs, None, Some(0), None));
        assert!(budget_tripped(&cs, None, None, Some(0)));
        assert!(!budget_tripped(&cs, Some(2), Some(1), Some(1)));
    }

    #[test]
    fn calibration_pipe_format_matches_v0_7_layout() {
        let e = enrichment_with_typosquat();
        let mut buf = Vec::new();
        write_calibration_lines(
            &e,
            &mut buf,
            crate::cli::DebugFormat::Pipe,
            CalibrationOverrides::default(),
        );
        let s = String::from_utf8(buf).unwrap();
        assert!(s.starts_with("typosquat|"), "got: {s}");
        assert_eq!(
            s.matches('|').count(),
            3,
            "pipe row has 4 fields → 3 separators; got: {s}"
        );
    }

    #[test]
    fn calibration_jsonl_format_emits_one_object_per_line() {
        let e = enrichment_with_typosquat();
        let mut buf = Vec::new();
        write_calibration_lines(
            &e,
            &mut buf,
            crate::cli::DebugFormat::Jsonl,
            CalibrationOverrides::default(),
        );
        let s = String::from_utf8(buf).unwrap();
        let lines: Vec<&str> = s.lines().collect();
        assert_eq!(lines.len(), 1);
        let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid jsonl");
        assert_eq!(v["kind"], "typosquat");
        assert!(v["score"].is_number(), "numeric score in jsonl");
        assert!(v["threshold"].is_number());
        assert!(v["key"].is_string());
    }

    #[test]
    fn calibration_jsonl_keeps_severity_label_as_string() {
        let e = enrichment_with_cve_at(Severity::High);
        let mut buf = Vec::new();
        write_calibration_lines(
            &e,
            &mut buf,
            crate::cli::DebugFormat::Jsonl,
            CalibrationOverrides::default(),
        );
        let s = String::from_utf8(buf).unwrap();
        let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap();
        assert_eq!(v["kind"], "cve");
        assert_eq!(v["score"], "HIGH");
        assert_eq!(v["threshold"], "high+");
    }

    #[test]
    fn fail_on_kev_trips_when_any_advisory_kev_set() {
        let mut e = enrichment_with_cve_at(Severity::Medium);
        // Flip the kev flag on the single advisory.
        for refs in e.vulns.values_mut() {
            refs[0].kev = true;
        }
        assert!(tripped(&ChangeSet::default(), &e, FailOn::Kev));
        assert!(!tripped(
            &ChangeSet::default(),
            &enrichment_with_cve_at(Severity::Medium),
            FailOn::Kev
        ));
    }

    #[test]
    fn any_epss_threshold_gating() {
        let mut e = enrichment_with_cve_at(Severity::Medium);
        for refs in e.vulns.values_mut() {
            refs[0].epss_score = Some(0.6);
        }
        assert!(any_epss_at_or_above(&e, 0.5));
        assert!(any_epss_at_or_above(&e, 0.6));
        assert!(!any_epss_at_or_above(&e, 0.7));
    }

    #[test]
    fn calibration_emits_epss_and_kev_rows_when_set() {
        let mut e = enrichment_with_cve_at(Severity::High);
        for refs in e.vulns.values_mut() {
            refs[0].epss_score = Some(0.87);
            refs[0].kev = true;
        }
        let mut buf = Vec::new();
        write_calibration_lines(
            &e,
            &mut buf,
            crate::cli::DebugFormat::Pipe,
            CalibrationOverrides::default(),
        );
        let s = String::from_utf8(buf).unwrap();
        assert!(s.contains("epss|"), "missing epss row: {s}");
        assert!(s.contains("kev|"), "missing kev row: {s}");
    }

    #[test]
    fn calibration_license_row_includes_exception_detail() {
        // v0.9.5: matched_rule on an exception-driven license violation
        // must surface the exception identifier in the calibration tap
        // so operators tuning policy see why a row fired.
        let mut e = Enrichment::default();
        let component = crate::model::Component {
            name: "llvm-sys".into(),
            version: "1.0.0".into(),
            ecosystem: crate::model::Ecosystem::Cargo,
            purl: Some("pkg:cargo/llvm-sys@1.0.0".into()),
            licenses: vec!["Apache-2.0 WITH LLVM-exception".into()],
            supplier: None,
            hashes: Vec::new(),
            relationship: crate::model::Relationship::Unknown,
            source_url: None,
            bom_ref: None,
        };
        e.license_violations.push(LicenseViolation {
            component,
            license: "Apache-2.0 WITH LLVM-exception".into(),
            matched_rule: "exception:LLVM-exception denied".into(),
            kind: crate::enrich::LicenseViolationKind::Deny,
        });
        let mut buf = Vec::new();
        write_calibration_lines(
            &e,
            &mut buf,
            crate::cli::DebugFormat::Pipe,
            CalibrationOverrides::default(),
        );
        let s = String::from_utf8(buf).unwrap();
        assert!(
            s.contains("license|"),
            "missing license calibration row: {s}"
        );
        assert!(
            s.contains("exception:LLVM-exception denied"),
            "row must surface matched_rule with exception detail: {s}"
        );
    }

    #[test]
    fn fail_on_license_violation_trips() {
        use crate::enrich::{LicenseViolation, LicenseViolationKind};
        let mut e = Enrichment::default();
        e.license_violations.push(LicenseViolation {
            component: comp("foo"),
            license: "GPL-3.0-only".into(),
            matched_rule: "deny: GPL-3.0-only".into(),
            kind: LicenseViolationKind::Deny,
        });
        assert!(tripped(&ChangeSet::default(), &e, FailOn::LicenseViolation));
        assert!(tripped(&ChangeSet::default(), &e, FailOn::Any));
        assert!(!tripped(
            &ChangeSet::default(),
            &Enrichment::default(),
            FailOn::LicenseViolation
        ));
    }
}