Skip to main content

nyx_scanner/utils/
analysis_options.rs

1//! Analysis-engine options: stable, serializable toggles that control which
2//! analysis passes run inside the scanner.
3//!
4//! These are the release-grade knobs that used to live as ad-hoc `NYX_*`
5//! environment variables (`NYX_CONSTRAINT`, `NYX_ABSTRACT_INTERP`, `NYX_SYMEX`,
6//! `NYX_CROSS_FILE_SYMEX`, `NYX_SYMEX_INTERPROC`, `NYX_CONTEXT_SENSITIVE`,
7//! `NYX_PARSE_TIMEOUT_MS`, `NYX_SMT`).  They are now a single struct loaded
8//! from the `[analysis.engine]` section of `nyx.conf` and overridable by CLI
9//! flags.
10//!
11//! Engine code calls [`current`] to read the active options.  Before a scan
12//! begins, the CLI entry point installs a resolved [`AnalysisOptions`] via
13//! [`install`].  Library consumers that never call `install` get
14//! [`AnalysisOptions::default`], which is the documented release default.
15//!
16//! The legacy `NYX_*` variables still read **only** when no runtime has been
17//! installed and serve as a last-resort override for library users; running
18//! the `nyx` binary always goes through the configured runtime.
19
20use serde::{Deserialize, Serialize};
21use std::sync::RwLock;
22
23/// Default parse timeout (milliseconds).  See [`AnalysisOptions::parse_timeout_ms`].
24pub const DEFAULT_PARSE_TIMEOUT_MS: u64 = 10_000;
25
26/// Default upper bound on the number of taint origins tracked per lattice
27/// value.  Raised from the historical `4` to `32` so realistic codebases
28/// with wide joins (many param sources, deep helper chains) no longer
29/// silently drop origin attribution.  Tunable via
30/// [`AnalysisOptions::max_origins`], see
31/// `src/taint/ssa_transfer/state.rs::effective_max_origins`.
32pub const DEFAULT_MAX_ORIGINS: u32 = 32;
33
34/// Minimum permitted `max_origins` value.  A cap of `0` would make origin
35/// tracking impossible (every merge would truncate); the test override
36/// still accepts `0` through its own path, but runtime config clamps to
37/// at least `1` so production scans always carry *some* provenance.
38pub const MIN_MAX_ORIGINS: u32 = 1;
39
40/// Default upper bound on the number of abstract heap objects tracked per
41/// intra-procedural points-to set.  Set to `32`, high enough that
42/// realistic factory/builder/DI patterns (routine 10–30 allocation sites
43/// aliased into one variable) stay precise, low enough to keep
44/// `HeapState` join/clone cost bounded in the worklist.  Tunable via
45/// [`AnalysisOptions::max_pointsto`], see
46/// `src/ssa/heap.rs::effective_max_pointsto`.
47pub const DEFAULT_MAX_POINTSTO: u32 = 32;
48
49/// Minimum permitted `max_pointsto` value.  A cap of `0` would make
50/// points-to tracking impossible; runtime config clamps to at least `1`.
51pub const MIN_MAX_POINTSTO: u32 = 1;
52
53/// Options for the symbolic-execution pipeline.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
55#[serde(default)]
56pub struct SymexOptions {
57    /// Run the symex pass at all.  When `false`, findings get no
58    /// `symbolic` verdict and cross-file body extraction is skipped.
59    pub enabled: bool,
60    /// Persist and consult cross-file SSA bodies so symex can model
61    /// callees defined in other files.
62    pub cross_file: bool,
63    /// Dive into intra-file callee bodies during symex (k ≥ 2 via the
64    /// interprocedural frame stack).
65    pub interprocedural: bool,
66    /// Use the SMT backend when available.  Only meaningful when nyx is
67    /// compiled with the `smt` feature; silently ignored otherwise.
68    pub smt: bool,
69}
70
71impl Default for SymexOptions {
72    fn default() -> Self {
73        Self {
74            enabled: true,
75            cross_file: true,
76            interprocedural: true,
77            smt: true,
78        }
79    }
80}
81
82/// Stable configuration for the analysis engine.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
84#[serde(default)]
85pub struct AnalysisOptions {
86    /// Path-constraint solving.  Prunes infeasible paths from the taint
87    /// worklist and records unsat contexts in findings.
88    pub constraint_solving: bool,
89    /// Abstract interpretation: interval/string/bit domains carried through
90    /// the SSA worklist and used to suppress provably safe sinks.
91    pub abstract_interpretation: bool,
92    /// k=1 context-sensitive inlining for intra-file callees.
93    pub context_sensitive: bool,
94    /// Symbolic-execution pipeline.
95    pub symex: SymexOptions,
96    /// Demand-driven backwards taint analysis from sinks.
97    ///
98    /// When enabled, after forward pass 2 completes, a backwards walk runs
99    /// from each sink's tainted SSA operands to corroborate or rule out the
100    /// forward finding.  Corroborated findings get a `backwards-confirmed`
101    /// note; flows the backward walk proves infeasible get a
102    /// `backwards-infeasible` note that caps confidence.  Defaults off.
103    pub backwards_analysis: bool,
104    /// Per-file tree-sitter parse timeout in milliseconds.  `0` disables the
105    /// cap entirely (not recommended outside of controlled benchmarks).
106    pub parse_timeout_ms: u64,
107    /// Maximum taint origins retained per lattice value.
108    ///
109    /// Controls both [`crate::taint::domain::VarTaint::origins`] and
110    /// the equivalent per-object bound inside the heap state.  When a
111    /// merge would exceed this bound, origins are dropped deterministically
112    /// (sorted by source location) and an
113    /// [`crate::engine_notes::EngineNote::OriginsTruncated`] note is
114    /// recorded on the affected finding.  Raising this reduces the
115    /// chance of silent under-reporting at the cost of slightly wider
116    /// lattice values.  See [`DEFAULT_MAX_ORIGINS`].
117    pub max_origins: u32,
118    /// Maximum abstract heap objects retained per intra-procedural
119    /// points-to set.
120    ///
121    /// When an allocation-site union would exceed this bound, the
122    /// largest-keyed heap objects are dropped and an
123    /// [`crate::engine_notes::EngineNote::PointsToTruncated`] note is
124    /// recorded.  Taint flows that should have reached the dropped
125    /// objects via this aliasing path are lost (under-report).  Raise
126    /// for factory-heavy codebases where truncation is observed; lower
127    /// only when points-to width is a measured bottleneck.  See
128    /// [`DEFAULT_MAX_POINTSTO`].
129    pub max_pointsto: u32,
130}
131
132impl Default for AnalysisOptions {
133    fn default() -> Self {
134        Self {
135            constraint_solving: true,
136            abstract_interpretation: true,
137            context_sensitive: true,
138            symex: SymexOptions::default(),
139            backwards_analysis: false,
140            parse_timeout_ms: DEFAULT_PARSE_TIMEOUT_MS,
141            max_origins: DEFAULT_MAX_ORIGINS,
142            max_pointsto: DEFAULT_MAX_POINTSTO,
143        }
144    }
145}
146
147/// Process-wide installed options.  Accessors fall back to
148/// [`AnalysisOptions::default`] (with env-var overrides for backward
149/// compatibility) until a caller installs a value.
150///
151/// A `RwLock` is used rather than a `OnceLock` so that long-lived callers
152/// (notably `nyx serve`, which resolves the engine profile per scan
153/// request) can replace the installed options between scans via
154/// [`reinstall`].  Within a single scan run, engine toggles must not
155/// change mid-flight, the caller is responsible for that invariant
156/// (`JobManager`'s single-scan guarantee provides it in the server).
157static RUNTIME: RwLock<Option<AnalysisOptions>> = RwLock::new(None);
158
159/// Install the process-wide analysis options, first-wins.  Subsequent
160/// calls are a no-op and return `false`, matching the semantics the CLI
161/// entry point relies on (one install per process lifetime for non-serve
162/// commands).  Servers that resolve options per request should use
163/// [`reinstall`] instead.
164pub fn install(opts: AnalysisOptions) -> bool {
165    let mut guard = RUNTIME.write().expect("analysis options RwLock poisoned");
166    if guard.is_some() {
167        return false;
168    }
169    *guard = Some(opts);
170    true
171}
172
173/// Replace the installed options unconditionally.  Intended for the HTTP
174/// server's scan thread, which re-resolves the engine profile from each
175/// incoming request; `install`'s first-wins semantics would otherwise
176/// pin the first scan's choice for the lifetime of the server.  Callers
177/// must ensure no scan is concurrently reading `current()`, in practice
178/// this means calling `reinstall` before the scan's rayon pool starts.
179pub fn reinstall(opts: AnalysisOptions) {
180    *RUNTIME.write().expect("analysis options RwLock poisoned") = Some(opts);
181}
182
183/// Read the active options.  Returns the installed runtime when present,
184/// otherwise defaults merged with env-var fallbacks (legacy path).
185pub fn current() -> AnalysisOptions {
186    if let Some(rt) = *RUNTIME.read().expect("analysis options RwLock poisoned") {
187        return rt;
188    }
189    // Legacy env-var fallback: applies only when no runtime has been
190    // installed (primarily for library consumers and old tests).  Logged
191    // at debug level so CI/test output isn't spammed.
192    AnalysisOptions {
193        constraint_solving: env_bool_default("NYX_CONSTRAINT", true),
194        abstract_interpretation: env_bool_default("NYX_ABSTRACT_INTERP", true),
195        context_sensitive: env_bool_default("NYX_CONTEXT_SENSITIVE", true),
196        symex: SymexOptions {
197            enabled: env_bool_default("NYX_SYMEX", true),
198            cross_file: env_bool_default("NYX_CROSS_FILE_SYMEX", true),
199            interprocedural: env_bool_default("NYX_SYMEX_INTERPROC", true),
200            smt: env_bool_default("NYX_SMT", true),
201        },
202        backwards_analysis: env_bool_default("NYX_BACKWARDS", false),
203        parse_timeout_ms: env_u64_default("NYX_PARSE_TIMEOUT_MS", DEFAULT_PARSE_TIMEOUT_MS),
204        max_origins: env_u32_default("NYX_MAX_ORIGINS", DEFAULT_MAX_ORIGINS).max(MIN_MAX_ORIGINS),
205        max_pointsto: env_u32_default("NYX_MAX_POINTSTO", DEFAULT_MAX_POINTSTO)
206            .max(MIN_MAX_POINTSTO),
207    }
208}
209
210fn env_bool_default(key: &str, default: bool) -> bool {
211    match std::env::var(key) {
212        Ok(v) => !(v == "0" || v.eq_ignore_ascii_case("false")),
213        Err(_) => default,
214    }
215}
216
217fn env_u64_default(key: &str, default: u64) -> u64 {
218    match std::env::var(key) {
219        Ok(v) => v.parse::<u64>().unwrap_or(default),
220        Err(_) => default,
221    }
222}
223
224fn env_u32_default(key: &str, default: u32) -> u32 {
225    match std::env::var(key) {
226        Ok(v) => v.parse::<u32>().unwrap_or(default),
227        Err(_) => default,
228    }
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn defaults_match_documented() {
237        let opts = AnalysisOptions::default();
238        assert!(opts.constraint_solving);
239        assert!(opts.abstract_interpretation);
240        assert!(opts.context_sensitive);
241        assert!(opts.symex.enabled);
242        assert!(opts.symex.cross_file);
243        assert!(opts.symex.interprocedural);
244        assert!(opts.symex.smt);
245        assert!(!opts.backwards_analysis, "backwards analysis defaults off");
246        assert_eq!(opts.parse_timeout_ms, DEFAULT_PARSE_TIMEOUT_MS);
247        assert_eq!(opts.max_origins, DEFAULT_MAX_ORIGINS);
248        assert_eq!(opts.max_pointsto, DEFAULT_MAX_POINTSTO);
249    }
250
251    #[test]
252    fn toml_roundtrip() {
253        let opts = AnalysisOptions {
254            constraint_solving: false,
255            abstract_interpretation: true,
256            context_sensitive: false,
257            symex: SymexOptions {
258                enabled: true,
259                cross_file: false,
260                interprocedural: true,
261                smt: false,
262            },
263            backwards_analysis: true,
264            parse_timeout_ms: 5_000,
265            max_origins: 64,
266            max_pointsto: 48,
267        };
268        let s = toml::to_string(&opts).unwrap();
269        let back: AnalysisOptions = toml::from_str(&s).unwrap();
270        assert_eq!(opts, back);
271    }
272}