nyx_scanner/utils/analysis_options.rs
1//! Analysis-engine options: stable, serializable toggles that control which
2//! analysis passes run inside the scanner.
3//!
4//! These are the release-grade knobs that used to live as ad-hoc `NYX_*`
5//! environment variables (`NYX_CONSTRAINT`, `NYX_ABSTRACT_INTERP`, `NYX_SYMEX`,
6//! `NYX_CROSS_FILE_SYMEX`, `NYX_SYMEX_INTERPROC`, `NYX_CONTEXT_SENSITIVE`,
7//! `NYX_PARSE_TIMEOUT_MS`, `NYX_SMT`). They are now a single struct loaded
8//! from the `[analysis.engine]` section of `nyx.conf` and overridable by CLI
9//! flags.
10//!
11//! Engine code calls [`current`] to read the active options. Before a scan
12//! begins, the CLI entry point installs a resolved [`AnalysisOptions`] via
13//! [`install`]. Library consumers that never call `install` get
14//! [`AnalysisOptions::default`], which is the documented release default.
15//!
16//! The legacy `NYX_*` variables still read **only** when no runtime has been
17//! installed and serve as a last-resort override for library users; running
18//! the `nyx` binary always goes through the configured runtime.
19
20use serde::{Deserialize, Serialize};
21use std::sync::RwLock;
22
23/// Default parse timeout (milliseconds). See [`AnalysisOptions::parse_timeout_ms`].
24pub const DEFAULT_PARSE_TIMEOUT_MS: u64 = 10_000;
25
26/// Default upper bound on the number of taint origins tracked per lattice
27/// value. Raised from the historical `4` to `32` so realistic codebases
28/// with wide joins (many param sources, deep helper chains) no longer
29/// silently drop origin attribution. Tunable via
30/// [`AnalysisOptions::max_origins`], see
31/// `src/taint/ssa_transfer/state.rs::effective_max_origins`.
32pub const DEFAULT_MAX_ORIGINS: u32 = 32;
33
34/// Minimum permitted `max_origins` value. A cap of `0` would make origin
35/// tracking impossible (every merge would truncate); the test override
36/// still accepts `0` through its own path, but runtime config clamps to
37/// at least `1` so production scans always carry *some* provenance.
38pub const MIN_MAX_ORIGINS: u32 = 1;
39
40/// Default upper bound on the number of abstract heap objects tracked per
41/// intra-procedural points-to set. Set to `32`, high enough that
42/// realistic factory/builder/DI patterns (routine 10–30 allocation sites
43/// aliased into one variable) stay precise, low enough to keep
44/// `HeapState` join/clone cost bounded in the worklist. Tunable via
45/// [`AnalysisOptions::max_pointsto`], see
46/// `src/ssa/heap.rs::effective_max_pointsto`.
47pub const DEFAULT_MAX_POINTSTO: u32 = 32;
48
49/// Minimum permitted `max_pointsto` value. A cap of `0` would make
50/// points-to tracking impossible; runtime config clamps to at least `1`.
51pub const MIN_MAX_POINTSTO: u32 = 1;
52
53/// Options for the symbolic-execution pipeline.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
55#[serde(default)]
56pub struct SymexOptions {
57 /// Run the symex pass at all. When `false`, findings get no
58 /// `symbolic` verdict and cross-file body extraction is skipped.
59 pub enabled: bool,
60 /// Persist and consult cross-file SSA bodies so symex can model
61 /// callees defined in other files.
62 pub cross_file: bool,
63 /// Dive into intra-file callee bodies during symex (k ≥ 2 via the
64 /// interprocedural frame stack).
65 pub interprocedural: bool,
66 /// Use the SMT backend when available. Only meaningful when nyx is
67 /// compiled with the `smt` feature; silently ignored otherwise.
68 pub smt: bool,
69}
70
71impl Default for SymexOptions {
72 fn default() -> Self {
73 Self {
74 enabled: true,
75 cross_file: true,
76 interprocedural: true,
77 smt: true,
78 }
79 }
80}
81
82/// Stable configuration for the analysis engine.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
84#[serde(default)]
85pub struct AnalysisOptions {
86 /// Path-constraint solving. Prunes infeasible paths from the taint
87 /// worklist and records unsat contexts in findings.
88 pub constraint_solving: bool,
89 /// Abstract interpretation: interval/string/bit domains carried through
90 /// the SSA worklist and used to suppress provably safe sinks.
91 pub abstract_interpretation: bool,
92 /// k=1 context-sensitive inlining for intra-file callees.
93 pub context_sensitive: bool,
94 /// Symbolic-execution pipeline.
95 pub symex: SymexOptions,
96 /// Demand-driven backwards taint analysis from sinks.
97 ///
98 /// When enabled, after forward pass 2 completes, a backwards walk runs
99 /// from each sink's tainted SSA operands to corroborate or rule out the
100 /// forward finding. Corroborated findings get a `backwards-confirmed`
101 /// note; flows the backward walk proves infeasible get a
102 /// `backwards-infeasible` note that caps confidence. Defaults off.
103 pub backwards_analysis: bool,
104 /// Per-file tree-sitter parse timeout in milliseconds. `0` disables the
105 /// cap entirely (not recommended outside of controlled benchmarks).
106 pub parse_timeout_ms: u64,
107 /// Maximum taint origins retained per lattice value.
108 ///
109 /// Controls both [`crate::taint::domain::VarTaint::origins`] and
110 /// the equivalent per-object bound inside the heap state. When a
111 /// merge would exceed this bound, origins are dropped deterministically
112 /// (sorted by source location) and an
113 /// [`crate::engine_notes::EngineNote::OriginsTruncated`] note is
114 /// recorded on the affected finding. Raising this reduces the
115 /// chance of silent under-reporting at the cost of slightly wider
116 /// lattice values. See [`DEFAULT_MAX_ORIGINS`].
117 pub max_origins: u32,
118 /// Maximum abstract heap objects retained per intra-procedural
119 /// points-to set.
120 ///
121 /// When an allocation-site union would exceed this bound, the
122 /// largest-keyed heap objects are dropped and an
123 /// [`crate::engine_notes::EngineNote::PointsToTruncated`] note is
124 /// recorded. Taint flows that should have reached the dropped
125 /// objects via this aliasing path are lost (under-report). Raise
126 /// for factory-heavy codebases where truncation is observed; lower
127 /// only when points-to width is a measured bottleneck. See
128 /// [`DEFAULT_MAX_POINTSTO`].
129 pub max_pointsto: u32,
130}
131
132impl Default for AnalysisOptions {
133 fn default() -> Self {
134 Self {
135 constraint_solving: true,
136 abstract_interpretation: true,
137 context_sensitive: true,
138 symex: SymexOptions::default(),
139 backwards_analysis: false,
140 parse_timeout_ms: DEFAULT_PARSE_TIMEOUT_MS,
141 max_origins: DEFAULT_MAX_ORIGINS,
142 max_pointsto: DEFAULT_MAX_POINTSTO,
143 }
144 }
145}
146
147/// Process-wide installed options. Accessors fall back to
148/// [`AnalysisOptions::default`] (with env-var overrides for backward
149/// compatibility) until a caller installs a value.
150///
151/// A `RwLock` is used rather than a `OnceLock` so that long-lived callers
152/// (notably `nyx serve`, which resolves the engine profile per scan
153/// request) can replace the installed options between scans via
154/// [`reinstall`]. Within a single scan run, engine toggles must not
155/// change mid-flight, the caller is responsible for that invariant
156/// (`JobManager`'s single-scan guarantee provides it in the server).
157static RUNTIME: RwLock<Option<AnalysisOptions>> = RwLock::new(None);
158
159/// Install the process-wide analysis options, first-wins. Subsequent
160/// calls are a no-op and return `false`, matching the semantics the CLI
161/// entry point relies on (one install per process lifetime for non-serve
162/// commands). Servers that resolve options per request should use
163/// [`reinstall`] instead.
164pub fn install(opts: AnalysisOptions) -> bool {
165 let mut guard = RUNTIME.write().expect("analysis options RwLock poisoned");
166 if guard.is_some() {
167 return false;
168 }
169 *guard = Some(opts);
170 true
171}
172
173/// Replace the installed options unconditionally. Intended for the HTTP
174/// server's scan thread, which re-resolves the engine profile from each
175/// incoming request; `install`'s first-wins semantics would otherwise
176/// pin the first scan's choice for the lifetime of the server. Callers
177/// must ensure no scan is concurrently reading `current()`, in practice
178/// this means calling `reinstall` before the scan's rayon pool starts.
179pub fn reinstall(opts: AnalysisOptions) {
180 *RUNTIME.write().expect("analysis options RwLock poisoned") = Some(opts);
181}
182
183/// Read the active options. Returns the installed runtime when present,
184/// otherwise defaults merged with env-var fallbacks (legacy path).
185pub fn current() -> AnalysisOptions {
186 if let Some(rt) = *RUNTIME.read().expect("analysis options RwLock poisoned") {
187 return rt;
188 }
189 // Legacy env-var fallback: applies only when no runtime has been
190 // installed (primarily for library consumers and old tests). Logged
191 // at debug level so CI/test output isn't spammed.
192 AnalysisOptions {
193 constraint_solving: env_bool_default("NYX_CONSTRAINT", true),
194 abstract_interpretation: env_bool_default("NYX_ABSTRACT_INTERP", true),
195 context_sensitive: env_bool_default("NYX_CONTEXT_SENSITIVE", true),
196 symex: SymexOptions {
197 enabled: env_bool_default("NYX_SYMEX", true),
198 cross_file: env_bool_default("NYX_CROSS_FILE_SYMEX", true),
199 interprocedural: env_bool_default("NYX_SYMEX_INTERPROC", true),
200 smt: env_bool_default("NYX_SMT", true),
201 },
202 backwards_analysis: env_bool_default("NYX_BACKWARDS", false),
203 parse_timeout_ms: env_u64_default("NYX_PARSE_TIMEOUT_MS", DEFAULT_PARSE_TIMEOUT_MS),
204 max_origins: env_u32_default("NYX_MAX_ORIGINS", DEFAULT_MAX_ORIGINS).max(MIN_MAX_ORIGINS),
205 max_pointsto: env_u32_default("NYX_MAX_POINTSTO", DEFAULT_MAX_POINTSTO)
206 .max(MIN_MAX_POINTSTO),
207 }
208}
209
210fn env_bool_default(key: &str, default: bool) -> bool {
211 match std::env::var(key) {
212 Ok(v) => !(v == "0" || v.eq_ignore_ascii_case("false")),
213 Err(_) => default,
214 }
215}
216
217fn env_u64_default(key: &str, default: u64) -> u64 {
218 match std::env::var(key) {
219 Ok(v) => v.parse::<u64>().unwrap_or(default),
220 Err(_) => default,
221 }
222}
223
224fn env_u32_default(key: &str, default: u32) -> u32 {
225 match std::env::var(key) {
226 Ok(v) => v.parse::<u32>().unwrap_or(default),
227 Err(_) => default,
228 }
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn defaults_match_documented() {
237 let opts = AnalysisOptions::default();
238 assert!(opts.constraint_solving);
239 assert!(opts.abstract_interpretation);
240 assert!(opts.context_sensitive);
241 assert!(opts.symex.enabled);
242 assert!(opts.symex.cross_file);
243 assert!(opts.symex.interprocedural);
244 assert!(opts.symex.smt);
245 assert!(!opts.backwards_analysis, "backwards analysis defaults off");
246 assert_eq!(opts.parse_timeout_ms, DEFAULT_PARSE_TIMEOUT_MS);
247 assert_eq!(opts.max_origins, DEFAULT_MAX_ORIGINS);
248 assert_eq!(opts.max_pointsto, DEFAULT_MAX_POINTSTO);
249 }
250
251 #[test]
252 fn toml_roundtrip() {
253 let opts = AnalysisOptions {
254 constraint_solving: false,
255 abstract_interpretation: true,
256 context_sensitive: false,
257 symex: SymexOptions {
258 enabled: true,
259 cross_file: false,
260 interprocedural: true,
261 smt: false,
262 },
263 backwards_analysis: true,
264 parse_timeout_ms: 5_000,
265 max_origins: 64,
266 max_pointsto: 48,
267 };
268 let s = toml::to_string(&opts).unwrap();
269 let back: AnalysisOptions = toml::from_str(&s).unwrap();
270 assert_eq!(opts, back);
271 }
272}