nyx-scanner 0.6.1

A multi-language static analysis tool for detecting security vulnerabilities
Documentation
//! Analysis-engine options: stable, serializable toggles that control which
//! analysis passes run inside the scanner.
//!
//! These are the release-grade knobs that used to live as ad-hoc `NYX_*`
//! environment variables (`NYX_CONSTRAINT`, `NYX_ABSTRACT_INTERP`, `NYX_SYMEX`,
//! `NYX_CROSS_FILE_SYMEX`, `NYX_SYMEX_INTERPROC`, `NYX_CONTEXT_SENSITIVE`,
//! `NYX_PARSE_TIMEOUT_MS`, `NYX_SMT`).  They are now a single struct loaded
//! from the `[analysis.engine]` section of `nyx.conf` and overridable by CLI
//! flags.
//!
//! Engine code calls [`current`] to read the active options.  Before a scan
//! begins, the CLI entry point installs a resolved [`AnalysisOptions`] via
//! [`install`].  Library consumers that never call `install` get
//! [`AnalysisOptions::default`], which is the documented release default.
//!
//! The legacy `NYX_*` variables still read **only** when no runtime has been
//! installed and serve as a last-resort override for library users; running
//! the `nyx` binary always goes through the configured runtime.

use serde::{Deserialize, Serialize};
use std::sync::RwLock;

/// Default parse timeout (milliseconds).  See [`AnalysisOptions::parse_timeout_ms`].
pub const DEFAULT_PARSE_TIMEOUT_MS: u64 = 10_000;

/// Default upper bound on the number of taint origins tracked per lattice
/// value.  Raised from the historical `4` to `32` so realistic codebases
/// with wide joins (many param sources, deep helper chains) no longer
/// silently drop origin attribution.  Tunable via
/// [`AnalysisOptions::max_origins`], see
/// `src/taint/ssa_transfer/state.rs::effective_max_origins`.
pub const DEFAULT_MAX_ORIGINS: u32 = 32;

/// Minimum permitted `max_origins` value.  A cap of `0` would make origin
/// tracking impossible (every merge would truncate); the test override
/// still accepts `0` through its own path, but runtime config clamps to
/// at least `1` so production scans always carry *some* provenance.
pub const MIN_MAX_ORIGINS: u32 = 1;

/// Default upper bound on the number of abstract heap objects tracked per
/// intra-procedural points-to set.  Set to `32`, high enough that
/// realistic factory/builder/DI patterns (routine 10–30 allocation sites
/// aliased into one variable) stay precise, low enough to keep
/// `HeapState` join/clone cost bounded in the worklist.  Tunable via
/// [`AnalysisOptions::max_pointsto`], see
/// `src/ssa/heap.rs::effective_max_pointsto`.
pub const DEFAULT_MAX_POINTSTO: u32 = 32;

/// Minimum permitted `max_pointsto` value.  A cap of `0` would make
/// points-to tracking impossible; runtime config clamps to at least `1`.
pub const MIN_MAX_POINTSTO: u32 = 1;

/// Options for the symbolic-execution pipeline.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)]
pub struct SymexOptions {
    /// Run the symex pass at all.  When `false`, findings get no
    /// `symbolic` verdict and cross-file body extraction is skipped.
    pub enabled: bool,
    /// Persist and consult cross-file SSA bodies so symex can model
    /// callees defined in other files.
    pub cross_file: bool,
    /// Dive into intra-file callee bodies during symex (k ≥ 2 via the
    /// interprocedural frame stack).
    pub interprocedural: bool,
    /// Use the SMT backend when available.  Only meaningful when nyx is
    /// compiled with the `smt` feature; silently ignored otherwise.
    pub smt: bool,
}

impl Default for SymexOptions {
    fn default() -> Self {
        Self {
            enabled: true,
            cross_file: true,
            interprocedural: true,
            smt: true,
        }
    }
}

/// Stable configuration for the analysis engine.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)]
pub struct AnalysisOptions {
    /// Path-constraint solving.  Prunes infeasible paths from the taint
    /// worklist and records unsat contexts in findings.
    pub constraint_solving: bool,
    /// Abstract interpretation: interval/string/bit domains carried through
    /// the SSA worklist and used to suppress provably safe sinks.
    pub abstract_interpretation: bool,
    /// k=1 context-sensitive inlining for intra-file callees.
    pub context_sensitive: bool,
    /// Symbolic-execution pipeline.
    pub symex: SymexOptions,
    /// Demand-driven backwards taint analysis from sinks.
    ///
    /// When enabled, after forward pass 2 completes, a backwards walk runs
    /// from each sink's tainted SSA operands to corroborate or rule out the
    /// forward finding.  Corroborated findings get a `backwards-confirmed`
    /// note; flows the backward walk proves infeasible get a
    /// `backwards-infeasible` note that caps confidence.  Defaults off.
    pub backwards_analysis: bool,
    /// Per-file tree-sitter parse timeout in milliseconds.  `0` disables the
    /// cap entirely (not recommended outside of controlled benchmarks).
    pub parse_timeout_ms: u64,
    /// Maximum taint origins retained per lattice value.
    ///
    /// Controls both [`crate::taint::domain::VarTaint::origins`] and
    /// the equivalent per-object bound inside the heap state.  When a
    /// merge would exceed this bound, origins are dropped deterministically
    /// (sorted by source location) and an
    /// [`crate::engine_notes::EngineNote::OriginsTruncated`] note is
    /// recorded on the affected finding.  Raising this reduces the
    /// chance of silent under-reporting at the cost of slightly wider
    /// lattice values.  See [`DEFAULT_MAX_ORIGINS`].
    pub max_origins: u32,
    /// Maximum abstract heap objects retained per intra-procedural
    /// points-to set.
    ///
    /// When an allocation-site union would exceed this bound, the
    /// largest-keyed heap objects are dropped and an
    /// [`crate::engine_notes::EngineNote::PointsToTruncated`] note is
    /// recorded.  Taint flows that should have reached the dropped
    /// objects via this aliasing path are lost (under-report).  Raise
    /// for factory-heavy codebases where truncation is observed; lower
    /// only when points-to width is a measured bottleneck.  See
    /// [`DEFAULT_MAX_POINTSTO`].
    pub max_pointsto: u32,
}

impl Default for AnalysisOptions {
    fn default() -> Self {
        Self {
            constraint_solving: true,
            abstract_interpretation: true,
            context_sensitive: true,
            symex: SymexOptions::default(),
            backwards_analysis: false,
            parse_timeout_ms: DEFAULT_PARSE_TIMEOUT_MS,
            max_origins: DEFAULT_MAX_ORIGINS,
            max_pointsto: DEFAULT_MAX_POINTSTO,
        }
    }
}

/// Process-wide installed options.  Accessors fall back to
/// [`AnalysisOptions::default`] (with env-var overrides for backward
/// compatibility) until a caller installs a value.
///
/// A `RwLock` is used rather than a `OnceLock` so that long-lived callers
/// (notably `nyx serve`, which resolves the engine profile per scan
/// request) can replace the installed options between scans via
/// [`reinstall`].  Within a single scan run, engine toggles must not
/// change mid-flight, the caller is responsible for that invariant
/// (`JobManager`'s single-scan guarantee provides it in the server).
static RUNTIME: RwLock<Option<AnalysisOptions>> = RwLock::new(None);

/// Install the process-wide analysis options, first-wins.  Subsequent
/// calls are a no-op and return `false`, matching the semantics the CLI
/// entry point relies on (one install per process lifetime for non-serve
/// commands).  Servers that resolve options per request should use
/// [`reinstall`] instead.
pub fn install(opts: AnalysisOptions) -> bool {
    let mut guard = RUNTIME.write().expect("analysis options RwLock poisoned");
    if guard.is_some() {
        return false;
    }
    *guard = Some(opts);
    true
}

/// Replace the installed options unconditionally.  Intended for the HTTP
/// server's scan thread, which re-resolves the engine profile from each
/// incoming request; `install`'s first-wins semantics would otherwise
/// pin the first scan's choice for the lifetime of the server.  Callers
/// must ensure no scan is concurrently reading `current()`, in practice
/// this means calling `reinstall` before the scan's rayon pool starts.
pub fn reinstall(opts: AnalysisOptions) {
    *RUNTIME.write().expect("analysis options RwLock poisoned") = Some(opts);
}

/// Read the active options.  Returns the installed runtime when present,
/// otherwise defaults merged with env-var fallbacks (legacy path).
pub fn current() -> AnalysisOptions {
    if let Some(rt) = *RUNTIME.read().expect("analysis options RwLock poisoned") {
        return rt;
    }
    // Legacy env-var fallback: applies only when no runtime has been
    // installed (primarily for library consumers and old tests).  Logged
    // at debug level so CI/test output isn't spammed.
    AnalysisOptions {
        constraint_solving: env_bool_default("NYX_CONSTRAINT", true),
        abstract_interpretation: env_bool_default("NYX_ABSTRACT_INTERP", true),
        context_sensitive: env_bool_default("NYX_CONTEXT_SENSITIVE", true),
        symex: SymexOptions {
            enabled: env_bool_default("NYX_SYMEX", true),
            cross_file: env_bool_default("NYX_CROSS_FILE_SYMEX", true),
            interprocedural: env_bool_default("NYX_SYMEX_INTERPROC", true),
            smt: env_bool_default("NYX_SMT", true),
        },
        backwards_analysis: env_bool_default("NYX_BACKWARDS", false),
        parse_timeout_ms: env_u64_default("NYX_PARSE_TIMEOUT_MS", DEFAULT_PARSE_TIMEOUT_MS),
        max_origins: env_u32_default("NYX_MAX_ORIGINS", DEFAULT_MAX_ORIGINS).max(MIN_MAX_ORIGINS),
        max_pointsto: env_u32_default("NYX_MAX_POINTSTO", DEFAULT_MAX_POINTSTO)
            .max(MIN_MAX_POINTSTO),
    }
}

fn env_bool_default(key: &str, default: bool) -> bool {
    match std::env::var(key) {
        Ok(v) => !(v == "0" || v.eq_ignore_ascii_case("false")),
        Err(_) => default,
    }
}

fn env_u64_default(key: &str, default: u64) -> u64 {
    match std::env::var(key) {
        Ok(v) => v.parse::<u64>().unwrap_or(default),
        Err(_) => default,
    }
}

fn env_u32_default(key: &str, default: u32) -> u32 {
    match std::env::var(key) {
        Ok(v) => v.parse::<u32>().unwrap_or(default),
        Err(_) => default,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn defaults_match_documented() {
        let opts = AnalysisOptions::default();
        assert!(opts.constraint_solving);
        assert!(opts.abstract_interpretation);
        assert!(opts.context_sensitive);
        assert!(opts.symex.enabled);
        assert!(opts.symex.cross_file);
        assert!(opts.symex.interprocedural);
        assert!(opts.symex.smt);
        assert!(!opts.backwards_analysis, "backwards analysis defaults off");
        assert_eq!(opts.parse_timeout_ms, DEFAULT_PARSE_TIMEOUT_MS);
        assert_eq!(opts.max_origins, DEFAULT_MAX_ORIGINS);
        assert_eq!(opts.max_pointsto, DEFAULT_MAX_POINTSTO);
    }

    #[test]
    fn toml_roundtrip() {
        let opts = AnalysisOptions {
            constraint_solving: false,
            abstract_interpretation: true,
            context_sensitive: false,
            symex: SymexOptions {
                enabled: true,
                cross_file: false,
                interprocedural: true,
                smt: false,
            },
            backwards_analysis: true,
            parse_timeout_ms: 5_000,
            max_origins: 64,
            max_pointsto: 48,
        };
        let s = toml::to_string(&opts).unwrap();
        let back: AnalysisOptions = toml::from_str(&s).unwrap();
        assert_eq!(opts, back);
    }
}