#![allow(clippy::collapsible_if, clippy::type_complexity)]
pub(crate) use crate::ast::{
analyse_file_fused, extract_all_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
};
use crate::callgraph::{CallGraph, FileBatch};
use crate::cli::{IndexMode, OutputFormat};
use crate::database::index::{Indexer, IssueRow};
use crate::errors::NyxResult;
use crate::patterns::{FindingCategory, Severity, SeverityFilter};
use crate::server::progress::{ScanMetrics, ScanProgress, ScanStage};
use crate::server::scan_log::ScanLogCollector;
use crate::summary::{self, GlobalSummaries};
use crate::utils::config::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_file_walker;
use console::style;
use dashmap::DashMap;
use indicatif::{ProgressBar, ProgressStyle};
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;
use rayon::prelude::*;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
fn make_progress_bar(len: u64, msg: &str, show: bool) -> ProgressBar {
if !show {
return ProgressBar::hidden();
}
let pb = ProgressBar::new(len);
pb.set_style(
ProgressStyle::with_template(
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
)
.unwrap()
.progress_chars("##-"),
);
pb.set_message(msg.to_string());
pb
}
fn record_persist_error(errors: &Arc<Mutex<Vec<String>>>, message: String) {
let mut guard = errors.lock().unwrap_or_else(|p| p.into_inner());
guard.push(message);
}
fn recover_or_propagate<T>(
enabled: bool,
path: &Path,
logs: Option<&Arc<ScanLogCollector>>,
f: impl FnOnce() -> NyxResult<T>,
) -> NyxResult<T> {
if !enabled {
return f();
}
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
Ok(r) => r,
Err(panic) => {
let msg = panic
.downcast_ref::<&str>()
.copied()
.map(str::to_owned)
.or_else(|| panic.downcast_ref::<String>().cloned())
.unwrap_or_else(|| "<non-string panic>".to_string());
tracing::warn!(
path = %path.display(),
panic = %msg,
"analysis panicked; continuing"
);
if let Some(l) = logs {
l.warn(
format!("Analysis panicked: {msg}"),
Some(path.display().to_string()),
Some(msg.clone()),
);
}
Err(crate::errors::NyxError::Msg(format!(
"analysis panicked: {msg}"
)))
}
}
}
fn fail_if_persist_errors(stage: &str, errors: Arc<Mutex<Vec<String>>>) -> NyxResult<()> {
let errors = errors.lock().unwrap_or_else(|p| p.into_inner());
if errors.is_empty() {
return Ok(());
}
let mut details = errors.iter().take(3).cloned().collect::<Vec<_>>();
if errors.len() > 3 {
details.push(format!("... and {} more", errors.len() - 3));
}
Err(crate::errors::NyxError::Msg(format!(
"{stage} failed to persist scan state: {}",
details.join("; ")
)))
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Diag {
pub path: String,
pub line: usize,
pub col: usize,
pub severity: Severity,
pub id: String,
pub category: FindingCategory,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub path_validated: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub guard_kind: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub labels: Vec<(String, String)>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub confidence: Option<crate::evidence::Confidence>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub evidence: Option<crate::evidence::Evidence>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub rank_score: Option<f64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub rank_reason: Option<Vec<(String, String)>>,
#[serde(default, skip_serializing_if = "is_false")]
pub suppressed: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub suppression: Option<crate::suppress::SuppressionMeta>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub rollup: Option<RollupData>,
#[serde(default, skip_serializing_if = "String::is_empty")]
pub finding_id: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub alternative_finding_ids: Vec<String>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct RollupData {
pub count: usize,
pub occurrences: Vec<Location>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Location {
pub line: usize,
pub col: usize,
}
pub struct SuppressionStats {
pub quality_dropped: usize,
pub low_budget_dropped: usize,
pub max_results_dropped: usize,
pub include_quality: bool,
#[allow(dead_code)]
pub show_all: bool,
pub max_low: u32,
pub max_low_per_file: u32,
pub max_low_per_rule: u32,
}
impl SuppressionStats {
pub fn total_suppressed(&self) -> usize {
self.quality_dropped + self.low_budget_dropped + self.max_results_dropped
}
}
fn is_false(b: &bool) -> bool {
!*b
}
pub(crate) fn ensure_framework_ctx(root: &Path, cfg: &Config) -> Option<Config> {
if cfg.framework_ctx.is_some() {
return None;
}
let mut c = cfg.clone();
c.framework_ctx = Some(crate::utils::detect_frameworks(root));
Some(c)
}
pub(crate) fn is_preview_tier_path(path: &Path) -> bool {
matches!(
path.extension()
.and_then(|e| e.to_str())
.map(str::to_ascii_lowercase)
.as_deref(),
Some("c" | "cpp")
)
}
#[allow(clippy::too_many_arguments)]
pub fn handle(
path: &str,
index_mode: IndexMode,
format: OutputFormat,
severity_filter: Option<SeverityFilter>,
fail_on: Option<Severity>,
show_suppressed: bool,
show_instances: Option<&str>,
database_dir: &Path,
config: &Config,
) -> NyxResult<()> {
let scan_path = Path::new(path).canonicalize()?;
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
let config = &{
let mut cfg = config.clone();
if cfg.framework_ctx.is_none() {
let fw = crate::utils::detect_frameworks(&scan_path);
if !fw.frameworks.is_empty() {
tracing::info!(frameworks = ?fw.frameworks, "detected frameworks");
}
cfg.framework_ctx = Some(fw);
}
cfg
};
let is_machine = format == OutputFormat::Json || format == OutputFormat::Sarif;
let suppress_status = config.output.quiet || is_machine;
if !suppress_status {
eprintln!(
"{} {}...\n",
style("Checking").green().bold(),
&project_name
);
}
let show_progress = !is_machine && !config.output.quiet;
let preview_tier_seen = Arc::new(AtomicBool::new(false));
let mut diags: Vec<Diag> = if index_mode == IndexMode::Off {
scan_filesystem_with_observer(
&scan_path,
config,
show_progress,
None,
None,
None,
Some(&preview_tier_seen),
)?
} else {
if index_mode == IndexMode::Rebuild || !db_path.exists() {
tracing::debug!("Scanning filesystem index filesystem");
crate::commands::index::build_index(
&project_name,
&scan_path,
&db_path,
config,
show_progress,
)?;
}
let pool = Indexer::init(&db_path)?;
if config.database.vacuum_on_startup {
let idx = Indexer::from_pool(&project_name, &pool)?;
idx.vacuum()?;
}
scan_with_index_parallel_observer(
&project_name,
pool,
config,
show_progress,
&scan_path,
None,
None,
None,
Some(&preview_tier_seen),
)?
};
if !suppress_status && preview_tier_seen.load(Ordering::Relaxed) {
eprintln!(
"{}: Nyx is in Preview for C/C++. Pointer aliasing, function pointers,",
style("warning").yellow().bold()
);
eprintln!("array-element taint, and STL container flows are not modeled. Findings are");
eprintln!("a starting point for review; pair with clang-tidy or Clang Static Analyzer");
eprintln!("for production gates.\n");
}
tracing::debug!("Found {:?} issues (pre-filter).", diags.len());
if let Some(ref filter) = severity_filter {
diags.retain(|d| filter.matches(d.severity));
}
if let Some(min) = config.output.min_score {
let threshold = f64::from(min);
diags.retain(|d| d.rank_score.unwrap_or(0.0) >= threshold);
}
if let Some(min_conf) = config.output.min_confidence {
diags.retain(|d| d.confidence.is_none_or(|c| c >= min_conf));
}
if config.output.require_converged {
retain_converged_findings(&mut diags);
}
apply_suppressions(&mut diags);
if !show_suppressed {
diags.retain(|d| !d.suppressed);
}
let stats = prioritize(&mut diags, &config.output, show_instances);
tracing::debug!("Emitting {:?} issues (post-filter).", diags.len());
match format {
OutputFormat::Json => {
let json = serde_json::to_string(&diags)
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
println!("{json}");
}
OutputFormat::Sarif => {
let sarif = crate::output::build_sarif(&diags, &scan_path);
let json = serde_json::to_string_pretty(&sarif)
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
println!("{json}");
}
OutputFormat::Console => {
tracing::debug!("Printing to console");
print!(
"{}",
crate::fmt::render_console(&diags, &project_name, Some(&stats))
);
}
}
if crate::convergence_telemetry::is_enabled() {
let path = crate::convergence_telemetry::default_path(&scan_path);
match crate::convergence_telemetry::write_jsonl(&path) {
Ok(n) if n > 0 => {
tracing::info!(
records = n,
path = %path.display(),
"wrote convergence telemetry sidecar"
);
}
Ok(_) => {}
Err(e) => {
tracing::warn!(
error = %e,
path = %path.display(),
"failed to write convergence telemetry sidecar"
);
}
}
}
if let Some(threshold) = fail_on {
let breached = diags
.iter()
.any(|d| !d.suppressed && d.severity <= threshold);
if breached {
std::process::exit(1);
}
}
Ok(())
}
pub(crate) fn post_process_diags(diags: &mut Vec<Diag>, cfg: &Config) {
deduplicate_taint_flows(diags);
for d in diags.iter_mut() {
if d.confidence.is_none() {
d.confidence = Some(crate::evidence::compute_confidence(d));
}
}
if cfg.output.attack_surface_ranking {
crate::rank::rank_diags(diags);
}
if let Some(max) = cfg.output.max_results {
diags.truncate(max as usize);
}
}
pub fn retain_converged_findings(diags: &mut Vec<Diag>) {
use crate::engine_notes::{LossDirection, worst_direction};
diags.retain(|d| {
d.evidence
.as_ref()
.and_then(|ev| worst_direction(&ev.engine_notes))
.is_none_or(|dir| {
matches!(
dir,
LossDirection::UnderReport | LossDirection::Informational
)
})
});
}
pub(crate) fn deduplicate_taint_flows(diags: &mut Vec<Diag>) {
use std::collections::HashMap;
const TAINT_BASE: &str = "taint-unsanitised-flow";
fn is_taint_flow(id: &str) -> bool {
id.starts_with(TAINT_BASE)
}
fn sink_cap_bits(d: &Diag) -> u16 {
d.evidence.as_ref().map(|e| e.sink_caps).unwrap_or(0)
}
let mut groups: HashMap<(String, usize, Severity, u16), Vec<usize>> = HashMap::new();
for (i, d) in diags.iter().enumerate() {
if is_taint_flow(&d.id) {
groups
.entry((d.path.clone(), d.line, d.severity, sink_cap_bits(d)))
.or_default()
.push(i);
}
}
fn score(d: &Diag) -> (u32, u32, usize, u32, u32) {
let ev = d.evidence.as_ref();
let src = ev.and_then(|e| e.source.as_ref());
let src_line = src.map(|s| s.line).unwrap_or(u32::MAX);
let src_col = src.map(|s| s.col).unwrap_or(u32::MAX);
let same_function_flag: u32 = ev
.and_then(|e| {
let steps = &e.flow_steps;
if steps.is_empty() {
return None;
}
let first = &steps[0];
let last = &steps[steps.len() - 1];
match (first.function.as_ref(), last.function.as_ref()) {
(Some(a), Some(b)) => Some(if a == b { 0u32 } else { 2u32 }),
_ => Some(1u32),
}
})
.unwrap_or(1u32);
let sink_line = d.line as u32;
let source_distance = if src_line == u32::MAX {
usize::MAX
} else {
(sink_line as i64 - src_line as i64).unsigned_abs() as usize
};
let hop_count = ev
.and_then(|e| e.hop_count)
.map(|h| h as u32)
.unwrap_or(u32::MAX);
(
same_function_flag,
hop_count,
source_distance,
src_line,
src_col,
)
}
let mut drop: Vec<usize> = Vec::new();
for indices in groups.values() {
if indices.len() <= 1 {
continue;
}
let mut scored: Vec<(usize, _)> = indices.iter().map(|&i| (i, score(&diags[i]))).collect();
scored.sort_by_key(|a| a.1);
for &(i, _) in scored.iter().skip(1) {
drop.push(i);
}
}
if drop.is_empty() {
return;
}
drop.sort_unstable();
drop.dedup();
for &i in drop.iter().rev() {
diags.remove(i);
}
}
fn build_and_analyse_call_graph(
global_summaries: &GlobalSummaries,
) -> (
crate::callgraph::CallGraph,
crate::callgraph::CallGraphAnalysis,
) {
let _span = tracing::info_span!("build_call_graph").entered();
let call_graph = crate::callgraph::build_call_graph(global_summaries, &[]);
let cg_analysis = crate::callgraph::analyse(&call_graph);
tracing::info!(
nodes = call_graph.graph.node_count(),
edges = call_graph.graph.edge_count(),
unresolved_not_found = call_graph.unresolved_not_found.len(),
unresolved_ambiguous = call_graph.unresolved_ambiguous.len(),
sccs = cg_analysis.sccs.len(),
"call graph built"
);
(call_graph, cg_analysis)
}
fn log_unresolved_callees(call_graph: &CallGraph) {
use std::collections::HashSet;
let mut seen_not_found: HashSet<&str> = HashSet::new();
for u in &call_graph.unresolved_not_found {
if seen_not_found.insert(&u.callee_name) {
tracing::debug!(caller=%u.caller.name, callee=%u.callee_name, "unresolved callee: not found");
}
}
let mut seen_ambiguous: HashSet<&str> = HashSet::new();
for a in &call_graph.unresolved_ambiguous {
if seen_ambiguous.insert(&a.callee_name) {
tracing::debug!(caller=%a.caller.name, callee=%a.callee_name, candidates=a.candidates.len(), "unresolved callee: ambiguous");
}
}
}
pub const SCC_UNCONVERGED_NOTE_PREFIX: &str = "scc_unconverged:";
pub const SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX: &str = "scc_unconverged:cross-file ";
fn changed_cap_keys_of(
before: &HashMap<crate::symbol::FuncKey, (u16, u16, u16, Vec<usize>)>,
after: &HashMap<crate::symbol::FuncKey, (u16, u16, u16, Vec<usize>)>,
) -> HashSet<crate::symbol::FuncKey> {
let mut changed = HashSet::new();
for (k, v_after) in after {
match before.get(k) {
Some(v_before) if v_before == v_after => {}
_ => {
changed.insert(k.clone());
}
}
}
for k in before.keys() {
if !after.contains_key(k) {
changed.insert(k.clone());
}
}
changed
}
fn changed_ssa_keys_of(
before: &HashMap<crate::symbol::FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
after: &HashMap<crate::symbol::FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
) -> HashSet<crate::symbol::FuncKey> {
let mut changed = HashSet::new();
for (k, v_after) in after {
match before.get(k) {
Some(v_before) if v_before == v_after => {}
_ => {
changed.insert(k.clone());
}
}
}
for k in before.keys() {
if !after.contains_key(k) {
changed.insert(k.clone());
}
}
changed
}
fn tag_unconverged_findings(
diags: &mut [Diag],
iterations: usize,
cap: usize,
cross_file: bool,
reason: crate::engine_notes::CapHitReason,
) {
use crate::engine_notes::{EngineNote, push_unique};
use crate::evidence::{Confidence, Evidence};
let engine_note = EngineNote::CrossFileFixpointCapped {
iterations: iterations as u32,
reason: reason.clone(),
};
let reason_tag = reason.tag();
for d in diags.iter_mut() {
d.confidence = match d.confidence {
Some(c) if c < Confidence::Low => Some(c), _ => Some(Confidence::Low),
};
let note = if cross_file {
format!(
"{SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX}SCC did not converge within \
{iterations} iterations (cap {cap}, reason={reason_tag}); \
cross-file taint may be imprecise"
)
} else {
format!(
"{SCC_UNCONVERGED_NOTE_PREFIX}SCC did not converge within {iterations} \
iterations (cap {cap}, reason={reason_tag}); results may be imprecise"
)
};
match d.evidence.as_mut() {
Some(ev) => {
if !ev.notes.iter().any(|n| n == ¬e) {
ev.notes.push(note);
}
push_unique(&mut ev.engine_notes, engine_note.clone());
}
None => {
let mut ev = Evidence::default();
ev.notes.push(note);
push_unique(&mut ev.engine_notes, engine_note.clone());
d.evidence = Some(ev);
}
}
}
}
const SCC_FIXPOINT_SAFETY_CAP: usize = 64;
static LAST_SCC_MAX_ITERATIONS: AtomicUsize = AtomicUsize::new(0);
pub fn last_scc_max_iterations() -> usize {
LAST_SCC_MAX_ITERATIONS.load(Ordering::Relaxed)
}
static SCC_FIXPOINT_CAP_OVERRIDE: AtomicUsize = AtomicUsize::new(0);
pub fn set_scc_fixpoint_cap_override(cap: usize) {
SCC_FIXPOINT_CAP_OVERRIDE.store(cap, Ordering::Relaxed);
}
fn effective_scc_cap() -> usize {
let o = SCC_FIXPOINT_CAP_OVERRIDE.load(Ordering::Relaxed);
if o == 0 { SCC_FIXPOINT_SAFETY_CAP } else { o }
}
static LAST_TOPO_NONRECURSIVE_REFINEMENTS: AtomicUsize = AtomicUsize::new(0);
pub fn last_topo_nonrecursive_refinements() -> usize {
LAST_TOPO_NONRECURSIVE_REFINEMENTS.load(Ordering::Relaxed)
}
fn topo_refine_enabled() -> bool {
match std::env::var("NYX_TOPO_REFINE") {
Ok(v) => !matches!(v.as_str(), "0" | "false" | "FALSE" | "False"),
Err(_) => true,
}
}
#[allow(clippy::too_many_arguments)]
fn run_topo_batches(
batches: &[FileBatch<'_>],
orphans: &[&PathBuf],
global_summaries: &mut GlobalSummaries,
call_graph: &CallGraph,
cfg: &Config,
scan_root: Option<&Path>,
pb: &ProgressBar,
progress: Option<&Arc<ScanProgress>>,
logs: Option<&Arc<ScanLogCollector>>,
) -> Vec<Diag> {
let root_str = scan_root.map(|r| r.to_string_lossy());
let root_str_ref = root_str.as_deref();
let mut result: Vec<Diag> = Vec::new();
LAST_SCC_MAX_ITERATIONS.store(0, Ordering::Relaxed);
LAST_TOPO_NONRECURSIVE_REFINEMENTS.store(0, Ordering::Relaxed);
let refine_nonrecursive = topo_refine_enabled();
for (batch_idx, batch) in batches.iter().enumerate() {
if batch.has_mutual_recursion {
let scc_cap = effective_scc_cap();
let cross_file_scc = batch.cross_file;
if cross_file_scc {
tracing::debug!(
batch = batch_idx,
files = batch.files.len(),
"cross-file SCC fixed-point: iterating with joint \
summary + inline convergence"
);
}
let mut converged = false;
let mut iters_used: usize = 0;
let mut delta_trajectory: smallvec::SmallVec<[u32; 4]> = smallvec::SmallVec::new();
let mut dirty_files: HashSet<std::path::PathBuf> =
batch.files.iter().map(|p| (*p).clone()).collect();
let mut diags_by_file: HashMap<std::path::PathBuf, Vec<Diag>> = HashMap::new();
for iter in 0..scc_cap {
iters_used = iter + 1;
let snap_before = global_summaries.snapshot_caps();
let ssa_snap_before = global_summaries.snapshot_ssa().clone();
let iter_files: Vec<&PathBuf> = batch
.files
.iter()
.filter(|p| dirty_files.contains(**p))
.copied()
.collect();
let batch_results: Vec<(
std::path::PathBuf,
Vec<Diag>,
Vec<crate::summary::FuncSummary>,
Vec<(
crate::symbol::FuncKey,
crate::summary::ssa_summary::SsaFuncSummary,
)>,
Vec<(
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
)>,
)> = iter_files
.par_iter()
.map(|path| {
if let Some(p) = progress {
p.set_current_file(&path.to_string_lossy());
}
let bytes = match std::fs::read(path) {
Ok(b) => b,
Err(e) => {
tracing::warn!(
"pass 2 (SCC iter {}): cannot read {}: {e}",
iter,
path.display()
);
if let Some(l) = logs {
l.warn(
format!("Cannot read file for pass 2: {e}"),
Some(path.display().to_string()),
None,
);
}
return (path.to_path_buf(), vec![], vec![], vec![], vec![]);
}
};
match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| {
analyse_file_fused(
&bytes,
path,
cfg,
Some(global_summaries),
scan_root,
)
},
) {
Ok(r) => {
pb.inc(0); (
path.to_path_buf(),
r.diags,
r.summaries,
r.ssa_summaries,
r.ssa_bodies,
)
}
Err(e) => {
tracing::warn!(
"pass 2 (SCC iter {}): {}: {e}",
iter,
path.display()
);
if let Some(l) = logs {
l.warn(
format!("Pass 2 (SCC iter {iter}) analysis failed: {e}"),
Some(path.display().to_string()),
None,
);
}
(path.to_path_buf(), vec![], vec![], vec![], vec![])
}
}
})
.collect();
let mut ssa_count: usize = 0;
for (path, diags, summaries, ssa_summaries, _ssa_bodies) in batch_results {
diags_by_file.insert(path, diags);
for s in summaries {
let key = s.func_key(root_str_ref);
global_summaries.insert(key, s);
}
for (key, ssa_sum) in ssa_summaries {
global_summaries.insert_ssa(key, ssa_sum);
ssa_count += 1;
}
}
let snap_after = global_summaries.snapshot_caps();
let ssa_converged = ssa_snap_before == *global_summaries.snapshot_ssa();
let iter_converged = snap_before == snap_after && ssa_converged;
let changed_cap_keys = changed_cap_keys_of(&snap_before, &snap_after);
let changed_ssa_keys =
changed_ssa_keys_of(&ssa_snap_before, global_summaries.snapshot_ssa());
let all_changed_keys: HashSet<crate::symbol::FuncKey> =
changed_cap_keys.union(&changed_ssa_keys).cloned().collect();
let changed_caps_count = changed_cap_keys.len();
let changed_ssa_count = changed_ssa_keys.len();
let iter_delta = changed_caps_count + changed_ssa_count;
if delta_trajectory.len() == 4 {
delta_trajectory.remove(0);
}
delta_trajectory.push(iter_delta as u32);
let namespaces_needing_reanalysis =
crate::callgraph::namespaces_for_callers(call_graph, &all_changed_keys);
let next_dirty: HashSet<std::path::PathBuf> = batch
.files
.iter()
.filter(|p| {
let abs = p.to_string_lossy();
let rel = crate::symbol::normalize_namespace(&abs, root_str_ref);
namespaces_needing_reanalysis.contains(&rel)
})
.map(|p| (*p).clone())
.collect();
dirty_files = next_dirty;
tracing::debug!(
batch = batch_idx,
files = batch.files.len(),
recursive = true,
iteration = iter,
ssa_summaries_updated = ssa_count,
ssa_converged,
converged = iter_converged,
delta = iter_delta,
dirty_next = dirty_files.len(),
"SCC batch iteration"
);
if iter_converged && dirty_files.is_empty() {
converged = true;
break;
}
if iter_converged {
tracing::debug!(
batch = batch_idx,
dirty = dirty_files.len(),
"SCC converged by snapshot but dirty_files non-empty; \
call graph disagrees with summary diff — accepting \
snapshot as authoritative"
);
converged = true;
break;
}
}
let mut iteration_diags: Vec<Diag> = Vec::new();
for p in &batch.files {
if let Some(v) = diags_by_file.remove(*p) {
iteration_diags.extend(v);
}
}
LAST_SCC_MAX_ITERATIONS.fetch_max(iters_used, Ordering::Relaxed);
crate::convergence_telemetry::record(
crate::convergence_telemetry::ConvergenceEvent::SccBatch(
crate::convergence_telemetry::SccBatchRecord {
schema: crate::convergence_telemetry::SCHEMA_VERSION,
batch_index: batch_idx,
file_count: batch.files.len(),
cross_file: cross_file_scc,
iterations: iters_used,
cap: scc_cap,
converged,
trajectory: delta_trajectory.clone(),
},
),
);
if !converged {
let reason = crate::engine_notes::CapHitReason::classify(&delta_trajectory);
tracing::warn!(
batch = batch_idx,
files = batch.files.len(),
iterations = iters_used,
cap = scc_cap,
cross_file = cross_file_scc,
reason = reason.tag(),
"SCC batch did not converge within safety cap — results \
may be imprecise. This usually indicates a very large \
mutually-recursive region or a non-monotone summary \
refinement; please file a bug with a reproducer."
);
if let Some(l) = logs {
l.warn(
format!(
"SCC batch {batch_idx} ({} files, cross_file={cross_file_scc}) \
did not converge within {scc_cap} iterations (reason={})",
batch.files.len(),
reason.tag()
),
None,
None,
);
}
tag_unconverged_findings(
&mut iteration_diags,
iters_used,
scc_cap,
cross_file_scc,
reason,
);
}
pb.inc(batch.files.len() as u64);
if let Some(p) = progress {
p.inc_analyzed(batch.files.len() as u64);
p.inc_batches_completed(1);
}
result.extend(iteration_diags);
} else if refine_nonrecursive {
#[allow(clippy::type_complexity)]
let batch_results: Vec<(
std::path::PathBuf,
Vec<Diag>,
Vec<crate::summary::FuncSummary>,
Vec<(
crate::symbol::FuncKey,
crate::summary::ssa_summary::SsaFuncSummary,
)>,
Vec<(
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
)>,
Vec<(
crate::symbol::FuncKey,
crate::auth_analysis::model::AuthCheckSummary,
)>,
)> = batch
.files
.par_iter()
.map(|path| {
if let Some(p) = progress {
p.set_current_file(&path.to_string_lossy());
}
let bytes = match std::fs::read(path) {
Ok(b) => b,
Err(e) => {
tracing::warn!(
"pass 2 (non-recursive): cannot read {}: {e}",
path.display()
);
if let Some(l) = logs {
l.warn(
format!("Cannot read file for pass 2: {e}"),
Some(path.display().to_string()),
None,
);
}
pb.inc(1);
if let Some(p) = progress {
p.inc_analyzed(1);
}
return (path.to_path_buf(), vec![], vec![], vec![], vec![], vec![]);
}
};
match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| analyse_file_fused(&bytes, path, cfg, Some(global_summaries), scan_root),
) {
Ok(r) => {
pb.inc(1);
if let Some(p) = progress {
p.inc_analyzed(1);
}
(
path.to_path_buf(),
r.diags,
r.summaries,
r.ssa_summaries,
r.ssa_bodies,
r.auth_summaries,
)
}
Err(e) => {
tracing::warn!("pass 2 (non-recursive): {}: {e}", path.display());
if let Some(l) = logs {
l.warn(
format!("Pass 2 analysis failed: {e}"),
Some(path.display().to_string()),
None,
);
}
pb.inc(1);
if let Some(p) = progress {
p.inc_analyzed(1);
}
(path.to_path_buf(), vec![], vec![], vec![], vec![], vec![])
}
}
})
.collect();
let mut batch_diags: Vec<Diag> = Vec::new();
let mut refined_summaries: usize = 0;
let mut refined_ssa: usize = 0;
let mut refined_bodies: usize = 0;
let mut refined_auth: usize = 0;
for (_path, diags, summaries, ssa_summaries, ssa_bodies, auth_summaries) in
batch_results
{
batch_diags.extend(diags);
for s in summaries {
let key = s.func_key(root_str_ref);
global_summaries.insert(key, s);
refined_summaries += 1;
}
for (key, ssa_sum) in ssa_summaries {
global_summaries.insert_ssa(key, ssa_sum);
refined_ssa += 1;
}
for (key, body) in ssa_bodies {
global_summaries.insert_body(key, body);
refined_bodies += 1;
}
for (key, auth_sum) in auth_summaries {
global_summaries.insert_auth(key, auth_sum);
refined_auth += 1;
}
}
let total_refinements = refined_summaries + refined_ssa + refined_bodies + refined_auth;
LAST_TOPO_NONRECURSIVE_REFINEMENTS.fetch_add(total_refinements, Ordering::Relaxed);
tracing::debug!(
batch = batch_idx,
files = batch.files.len(),
recursive = false,
refined_summaries,
refined_ssa,
refined_bodies,
refined_auth,
"non-recursive batch complete (refinements persisted)"
);
if let Some(p) = progress {
p.inc_batches_completed(1);
}
result.extend(batch_diags);
} else {
let batch_diags: Vec<Diag> = batch
.files
.par_iter()
.flat_map_iter(|path| {
if let Some(p) = progress {
p.set_current_file(&path.to_string_lossy());
}
let d = match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| run_rules_on_file(path, cfg, Some(global_summaries), scan_root),
) {
Ok(d) => d,
Err(e) => {
tracing::warn!("pass 2: {}: {e}", path.display());
if let Some(l) = logs {
l.warn(
format!("Pass 2 analysis failed: {e}"),
Some(path.display().to_string()),
None,
);
}
vec![]
}
};
pb.inc(1);
if let Some(p) = progress {
p.inc_analyzed(1);
}
d
})
.collect();
tracing::debug!(
batch = batch_idx,
files = batch.files.len(),
recursive = false,
"non-recursive batch complete (legacy, refinement disabled)"
);
if let Some(p) = progress {
p.inc_batches_completed(1);
}
result.extend(batch_diags);
}
}
if !orphans.is_empty() {
let orphan_diags: Vec<Diag> = orphans
.par_iter()
.flat_map_iter(|path| {
if let Some(p) = progress {
p.set_current_file(&path.to_string_lossy());
}
let d = match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| run_rules_on_file(path, cfg, Some(global_summaries), scan_root),
) {
Ok(d) => d,
Err(e) => {
tracing::warn!("pass 2: {}: {e}", path.display());
if let Some(l) = logs {
l.warn(
format!("Pass 2 analysis failed: {e}"),
Some(path.display().to_string()),
None,
);
}
vec![]
}
};
pb.inc(1);
if let Some(p) = progress {
p.inc_analyzed(1);
}
d
})
.collect();
if let Some(p) = progress {
p.inc_batches_completed(1);
}
result.extend(orphan_diags);
}
result
}
pub(crate) fn scan_filesystem(
root: &Path,
cfg: &Config,
show_progress: bool,
) -> NyxResult<Vec<Diag>> {
scan_filesystem_with_observer(root, cfg, show_progress, None, None, None, None)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn scan_filesystem_with_observer(
root: &Path,
cfg: &Config,
show_progress: bool,
progress: Option<&Arc<ScanProgress>>,
metrics: Option<&Arc<ScanMetrics>>,
logs: Option<&Arc<ScanLogCollector>>,
preview_tier_seen: Option<&Arc<AtomicBool>>,
) -> NyxResult<Vec<Diag>> {
let owned_cfg = ensure_framework_ctx(root, cfg);
let cfg = owned_cfg.as_ref().unwrap_or(cfg);
if let Some(p) = progress {
p.set_stage(ScanStage::Discovering);
}
let walk_start = std::time::Instant::now();
let all_paths: Vec<PathBuf> = {
let _span = tracing::info_span!("walk_files").entered();
let (rx, handle) = spawn_file_walker(root, cfg);
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
if let Err(err) = handle.join() {
tracing::error!("walker thread panicked: {:#?}", err);
if let Some(l) = logs {
l.error("Walker thread panicked", None, Some(format!("{err:#?}")));
}
}
paths
};
tracing::info!(file_count = all_paths.len(), "file walk complete");
if let Some(flag) = preview_tier_seen {
if all_paths.iter().any(|p| is_preview_tier_path(p)) {
flag.store(true, Ordering::Relaxed);
}
}
if let Some(p) = progress {
p.record_walk_ms(walk_start.elapsed().as_millis() as u64);
p.set_files_discovered(all_paths.len() as u64);
}
if let Some(l) = logs {
l.info(
format!(
"File walk complete: {} files discovered in {}ms",
all_paths.len(),
walk_start.elapsed().as_millis()
),
None,
);
}
let needs_taint = matches!(
cfg.scanner.mode,
crate::utils::config::AnalysisMode::Full
| crate::utils::config::AnalysisMode::Cfg
| crate::utils::config::AnalysisMode::Taint
);
if !needs_taint {
if let Some(p) = progress {
p.set_stage(ScanStage::Indexing);
}
if let Some(l) = logs {
l.info("Starting AST-only analysis (no taint)", None);
}
let _span = tracing::info_span!("ast_only_analysis", files = all_paths.len()).entered();
let pb = make_progress_bar(all_paths.len() as u64, "Running analysis", show_progress);
let mut diags: Vec<Diag> = all_paths
.par_iter()
.flat_map_iter(|path| {
let bytes = match std::fs::read(path) {
Ok(b) => b,
Err(e) => {
tracing::warn!("analysis: cannot read {}: {e}", path.display());
if let Some(l) = logs {
l.warn(
format!("Cannot read file: {e}"),
Some(path.display().to_string()),
None,
);
}
pb.inc(1);
if let Some(p) = progress {
p.inc_parsed(1);
p.inc_analyzed(1);
p.set_current_file(&path.to_string_lossy());
}
return Vec::<Diag>::new();
}
};
let result = match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| analyse_file_fused(&bytes, path, cfg, None, Some(root)),
) {
Ok(r) => r.diags,
Err(e) => {
tracing::warn!("analysis: {}: {e}", path.display());
if let Some(l) = logs {
l.warn(
format!("Analysis failed: {e}"),
Some(path.display().to_string()),
None,
);
}
vec![]
}
};
pb.inc(1);
if let Some(p) = progress {
p.inc_parsed(1);
p.inc_analyzed(1);
p.set_current_file(&path.to_string_lossy());
}
result
})
.collect();
pb.finish_and_clear();
if let Some(p) = progress {
p.set_stage(ScanStage::Complete);
}
post_process_diags(&mut diags, cfg);
return Ok(diags);
}
if let Some(p) = progress {
p.set_stage(ScanStage::Indexing);
}
if let Some(l) = logs {
l.info(
format!(
"Starting pass 1: extracting summaries from {} files",
all_paths.len()
),
None,
);
}
let pass1_start = std::time::Instant::now();
let mut global_summaries: GlobalSummaries = {
let _span = tracing::info_span!("pass1_fused", files = all_paths.len()).entered();
let pb = make_progress_bar(
all_paths.len() as u64,
"Pass 1: Extracting summaries",
show_progress,
);
let root_str = root.to_string_lossy();
let gs = all_paths
.par_iter()
.fold(GlobalSummaries::new, |mut local_gs, path| {
if let Ok(bytes) = std::fs::read(path) {
match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| analyse_file_fused(&bytes, path, cfg, None, Some(root)),
) {
Ok(r) => {
let first_lang = r.summaries.first().map(|s| s.lang.clone());
for s in r.summaries {
let key = s.func_key(Some(&root_str));
local_gs.insert(key, s);
}
if !r.ssa_summaries.is_empty() {
for (key, ssa_sum) in r.ssa_summaries {
local_gs.insert_ssa(key, ssa_sum);
}
}
for (key, body) in r.ssa_bodies {
local_gs.insert_body(key, body);
}
for (key, auth_sum) in r.auth_summaries {
local_gs.insert_auth(key, auth_sum);
}
if let Some(p) = progress {
if let Some(ref lang) = first_lang {
p.record_language(lang);
}
}
}
Err(e) => {
tracing::warn!("pass 1: {}: {e}", path.display());
if let Some(l) = logs {
l.warn(
format!("Pass 1 analysis failed: {e}"),
Some(path.display().to_string()),
None,
);
}
}
}
} else {
tracing::warn!("pass 1: cannot read {}", path.display());
if let Some(l) = logs {
l.warn("Cannot read file", Some(path.display().to_string()), None);
}
}
pb.inc(1);
if let Some(p) = progress {
p.inc_parsed(1);
p.set_current_file(&path.to_string_lossy());
}
local_gs
})
.reduce(GlobalSummaries::new, |mut a, b| {
a.merge(b);
a
});
pb.finish_and_clear();
tracing::info!("pass 1 complete");
gs
};
if let Some(p) = progress {
p.record_pass1_ms(pass1_start.elapsed().as_millis() as u64);
}
tracing::debug!(
cross_file_bodies = global_summaries.bodies_len(),
"pass 1: cross-file SSA bodies available for taint"
);
if let Some(l) = logs {
l.info(
format!(
"Pass 1 complete in {}ms ({} cross-file SSA bodies, {} auth summaries)",
pass1_start.elapsed().as_millis(),
global_summaries.bodies_len(),
global_summaries.auth_len(),
),
None,
);
}
if let Some(l) = logs {
l.info("Building call graph", None);
}
let cg_start = std::time::Instant::now();
global_summaries.install_hierarchy();
let (call_graph, cg_analysis) = build_and_analyse_call_graph(&global_summaries);
log_unresolved_callees(&call_graph);
if let Some(p) = progress {
p.record_call_graph_ms(cg_start.elapsed().as_millis() as u64);
}
if let Some(m) = metrics {
m.call_edges.store(
call_graph.graph.edge_count() as u64,
std::sync::atomic::Ordering::Relaxed,
);
m.functions_analyzed.store(
call_graph.graph.node_count() as u64,
std::sync::atomic::Ordering::Relaxed,
);
m.unresolved_calls.store(
(call_graph.unresolved_not_found.len() + call_graph.unresolved_ambiguous.len()) as u64,
std::sync::atomic::Ordering::Relaxed,
);
}
if let Some(l) = logs {
l.info(
format!(
"Call graph built in {}ms: {} nodes, {} edges, {} unresolved",
cg_start.elapsed().as_millis(),
call_graph.graph.node_count(),
call_graph.graph.edge_count(),
call_graph.unresolved_not_found.len() + call_graph.unresolved_ambiguous.len(),
),
None,
);
}
if let Some(p) = progress {
p.set_stage(ScanStage::Analyzing);
}
if let Some(l) = logs {
l.info(
format!(
"Starting pass 2: taint analysis on {} files",
all_paths.len()
),
None,
);
}
let pass2_start = std::time::Instant::now();
let mut diags: Vec<Diag> = {
let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
let pb = make_progress_bar(
all_paths.len() as u64,
"Pass 2: Running analysis",
show_progress,
);
let (batches, orphans) = crate::callgraph::scc_file_batches_with_metadata(
&call_graph,
&cg_analysis,
&all_paths,
root,
);
tracing::info!(
batches = batches.len(),
orphan_files = orphans.len(),
"topo-ordered file batches computed"
);
if let Some(l) = logs {
l.info(
format!(
"Topo-ordered file batches: {} batches, {} orphan files",
batches.len(),
orphans.len()
),
None,
);
}
let mut gs = global_summaries;
let total_batches = batches.len() as u64 + u64::from(!orphans.is_empty());
if let Some(p) = progress {
p.set_batches_total(total_batches);
}
let result = run_topo_batches(
&batches,
&orphans,
&mut gs,
&call_graph,
cfg,
Some(root),
&pb,
progress,
logs,
);
pb.finish_and_clear();
result
};
tracing::info!(diags = diags.len(), "pass 2 complete");
if let Some(p) = progress {
p.record_pass2_ms(pass2_start.elapsed().as_millis() as u64);
}
if let Some(l) = logs {
l.info(
format!(
"Pass 2 complete in {}ms: {} raw findings",
pass2_start.elapsed().as_millis(),
diags.len()
),
None,
);
}
let pp_start = std::time::Instant::now();
if let Some(p) = progress {
p.set_stage(ScanStage::PostProcessing);
}
post_process_diags(&mut diags, cfg);
if let Some(p) = progress {
p.record_post_process_ms(pp_start.elapsed().as_millis() as u64);
p.set_stage(ScanStage::Complete);
}
if let Some(l) = logs {
l.info(
format!(
"Post-processing complete in {}ms: {} final findings",
pp_start.elapsed().as_millis(),
diags.len()
),
None,
);
}
Ok(diags)
}
pub fn scan_with_index_parallel(
project: &str,
pool: Arc<Pool<SqliteConnectionManager>>,
cfg: &Config,
show_progress: bool,
scan_root: &Path,
) -> NyxResult<Vec<Diag>> {
scan_with_index_parallel_observer(
project,
pool,
cfg,
show_progress,
scan_root,
None,
None,
None,
None,
)
}
#[allow(clippy::too_many_arguments)]
pub fn scan_with_index_parallel_observer(
project: &str,
pool: Arc<Pool<SqliteConnectionManager>>,
cfg: &Config,
show_progress: bool,
scan_root: &Path,
progress: Option<&Arc<ScanProgress>>,
metrics: Option<&Arc<ScanMetrics>>,
logs: Option<&Arc<ScanLogCollector>>,
preview_tier_seen: Option<&Arc<AtomicBool>>,
) -> NyxResult<Vec<Diag>> {
let owned_cfg = ensure_framework_ctx(scan_root, cfg);
let cfg = owned_cfg.as_ref().unwrap_or(cfg);
if let Some(p) = progress {
p.set_stage(ScanStage::Discovering);
}
let walk_start = std::time::Instant::now();
let indexed_files = {
let idx = Indexer::from_pool(project, &pool)?;
idx.get_files(project)?
};
let (rx, handle) = spawn_file_walker(scan_root, cfg);
let files: Vec<PathBuf> = rx.into_iter().flatten().collect();
if let Err(err) = handle.join() {
tracing::error!("walker thread panicked: {:#?}", err);
if let Some(l) = logs {
l.error(
"Walker thread panicked during indexed scan",
None,
Some(format!("{err:#?}")),
);
}
}
if let Some(flag) = preview_tier_seen {
if files.iter().any(|p| is_preview_tier_path(p)) {
flag.store(true, Ordering::Relaxed);
}
}
if let Some(p) = progress {
p.record_walk_ms(walk_start.elapsed().as_millis() as u64);
p.set_files_discovered(files.len() as u64);
}
if let Some(l) = logs {
l.info(
format!(
"Indexed scan discovered {} files in {}ms",
files.len(),
walk_start.elapsed().as_millis()
),
None,
);
}
let current_files: HashSet<PathBuf> = files.iter().cloned().collect();
let removed_files: Vec<PathBuf> = indexed_files
.into_iter()
.filter(|path| !current_files.contains(path))
.collect();
if !removed_files.is_empty() {
let mut idx = Indexer::from_pool(project, &pool)?;
for path in &removed_files {
idx.remove_file_and_related(path)?;
}
tracing::info!(
removed = removed_files.len(),
"pruned deleted files from indexed scan state"
);
if let Some(l) = logs {
l.info(
format!(
"Pruned {} deleted files from indexed state",
removed_files.len()
),
None,
);
}
}
let needs_taint = matches!(
cfg.scanner.mode,
crate::utils::config::AnalysisMode::Full
| crate::utils::config::AnalysisMode::Cfg
| crate::utils::config::AnalysisMode::Taint
);
if needs_taint {
if let Some(p) = progress {
p.set_stage(ScanStage::Indexing);
}
if let Some(l) = logs {
l.info(
format!("Refreshing persisted summaries for {} files", files.len()),
None,
);
}
let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
let pb = make_progress_bar(
files.len() as u64,
"Pass 1: Extracting summaries",
show_progress,
);
let pass1_start = std::time::Instant::now();
let persist_errors = Arc::new(Mutex::new(Vec::new()));
let skipped_files = Arc::new(std::sync::atomic::AtomicU64::new(0));
let scan_root_ref = scan_root.to_path_buf();
let persist_errors_ref = Arc::clone(&persist_errors);
let skipped_files_ref = Arc::clone(&skipped_files);
let progress_ref = progress.cloned();
files.par_iter().for_each_init(
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
if let Some(p) = &progress_ref {
p.set_current_file(&path.to_string_lossy());
}
if let Ok(bytes) = std::fs::read(path) {
let hash = Indexer::digest_bytes(&bytes);
let needs_scan = idx.should_scan_with_hash(path, &hash).unwrap_or(true);
if needs_scan {
match recover_or_propagate(
cfg.scanner.enable_panic_recovery,
path,
logs,
|| {
extract_all_summaries_from_bytes(
&bytes,
path,
cfg,
Some(&scan_root_ref),
)
},
) {
Ok((func_sums, ssa_sums, ssa_bodies, auth_sums)) => {
if let Some(p) = &progress_ref {
p.inc_parsed(1);
if let Some(lang) = func_sums.first().map(|s| s.lang.as_str()) {
p.record_language(lang);
}
}
let ssa_rows: Vec<_> = ssa_sums
.into_iter()
.map(|(key, sum)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
sum,
)
})
.collect();
let body_rows: Vec<_> = ssa_bodies
.into_iter()
.map(|(key, body)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
body,
)
})
.collect();
let auth_rows: Vec<_> = auth_sums
.into_iter()
.map(|(key, sum)| {
(
key.name,
key.arity.unwrap_or(0),
key.lang.as_str().to_string(),
key.namespace,
key.container,
key.disambig,
key.kind,
sum,
)
})
.collect();
if let Err(e) = idx.replace_all_for_file(
path, &hash, &func_sums, &ssa_rows, &body_rows, &auth_rows,
) {
record_persist_error(
&persist_errors_ref,
format!("summaries {}: {e}", path.display()),
);
}
}
Err(e) => {
tracing::warn!("pass 1: {}: {e}", path.display());
}
}
} else {
skipped_files_ref.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
if let Some(p) = &progress_ref {
p.inc_skipped(1);
}
}
} else {
tracing::warn!("pass 1: cannot read {}", path.display());
}
pb.inc(1);
},
);
pb.finish_and_clear();
let skipped = skipped_files.load(std::sync::atomic::Ordering::Relaxed);
if let Some(p) = progress {
p.set_files_skipped(skipped);
p.record_pass1_ms(pass1_start.elapsed().as_millis() as u64);
}
if let Some(m) = metrics {
m.summaries_reused
.store(skipped, std::sync::atomic::Ordering::Relaxed);
}
if let Some(l) = logs {
l.info(
format!(
"Indexed pass 1 complete: {} refreshed, {} reused",
files.len().saturating_sub(skipped as usize),
skipped
),
None,
);
}
fail_if_persist_errors("Pass 1", persist_errors)?;
}
let root_str = scan_root.to_string_lossy();
let global_summaries: Option<GlobalSummaries> = if needs_taint {
if let Some(p) = progress {
p.set_stage(ScanStage::LoadingSummaries);
}
let _span = tracing::info_span!("load_summaries_db").entered();
let idx = Indexer::from_pool(project, &pool)?;
let all = idx.load_all_summaries()?;
tracing::info!(summaries = all.len(), "loaded cross-file summaries from DB");
let mut gs = summary::merge_summaries(all, Some(&root_str));
let ssa_rows = idx.load_all_ssa_summaries()?;
let ssa_count = ssa_rows.len();
if !ssa_rows.is_empty() {
tracing::info!(
ssa_summaries = ssa_rows.len(),
"loaded SSA summaries from DB"
);
for (file_path, name, lang_str, arity, namespace, container, disambig, kind, ssa_sum) in
ssa_rows
{
let lang =
crate::symbol::Lang::from_slug(&lang_str).unwrap_or(crate::symbol::Lang::Rust);
let ns = if namespace.is_empty() {
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
} else {
namespace
};
let key = crate::symbol::FuncKey {
lang,
namespace: ns,
container,
name,
arity: if arity >= 0 {
Some(arity as usize)
} else {
None
},
disambig,
kind,
};
gs.insert_ssa(key, ssa_sum);
}
}
let body_count = if crate::symex::cross_file_symex_enabled() {
match idx.load_all_ssa_bodies() {
Ok(body_rows) => {
let count = body_rows.len();
for (
file_path,
name,
lang_str,
arity,
namespace,
container,
disambig,
kind,
body,
) in body_rows
{
let lang = crate::symbol::Lang::from_slug(&lang_str)
.unwrap_or(crate::symbol::Lang::Rust);
let ns = if namespace.is_empty() {
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
} else {
namespace
};
let key = crate::symbol::FuncKey {
lang,
namespace: ns,
container,
name,
arity: if arity >= 0 {
Some(arity as usize)
} else {
None
},
disambig,
kind,
};
gs.insert_body(key, body);
}
count
}
Err(e) => {
tracing::warn!("failed to load SSA bodies from DB: {e}");
0
}
}
} else {
0
};
let auth_rows = idx.load_all_auth_summaries()?;
let auth_count = auth_rows.len();
if !auth_rows.is_empty() {
tracing::info!(
auth_summaries = auth_rows.len(),
"loaded auth summaries from DB"
);
for (
file_path,
name,
lang_str,
arity,
namespace,
container,
disambig,
kind,
auth_sum,
) in auth_rows
{
let lang =
crate::symbol::Lang::from_slug(&lang_str).unwrap_or(crate::symbol::Lang::Rust);
let ns = if namespace.is_empty() {
crate::symbol::normalize_namespace(&file_path, Some(&root_str))
} else {
namespace
};
let key = crate::symbol::FuncKey {
lang,
namespace: ns,
container,
name,
arity: if arity >= 0 {
Some(arity as usize)
} else {
None
},
disambig,
kind,
};
gs.insert_auth(key, auth_sum);
}
}
tracing::debug!(
cross_file_bodies = body_count,
"indexed scan: cross-file SSA bodies available for taint"
);
if let Some(l) = logs {
l.info(
format!(
"Loaded {} coarse summaries, {} SSA summaries, {} SSA bodies, {} auth summaries from DB",
gs.snapshot_caps().len(),
ssa_count,
body_count,
auth_count,
),
None,
);
}
Some(gs)
} else {
None
};
if !needs_taint {
if let Some(p) = progress {
p.set_stage(ScanStage::Analyzing);
}
if let Some(l) = logs {
l.info("Starting AST-only indexed analysis", None);
}
let pass2_start = std::time::Instant::now();
let _span = tracing::info_span!("pass2_indexed_ast_only").entered();
let pb2 = make_progress_bar(
files.len() as u64,
"Pass 2: Running analysis",
show_progress,
);
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
let persist_errors = Arc::new(Mutex::new(Vec::new()));
let skipped_files = Arc::new(std::sync::atomic::AtomicU64::new(0));
let persist_errors_ref = Arc::clone(&persist_errors);
let skipped_files_ref = Arc::clone(&skipped_files);
let progress_ref = progress.cloned();
files.into_par_iter().for_each_init(
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
if let Some(p) = &progress_ref {
p.set_current_file(&path.to_string_lossy());
}
let bytes_opt = std::fs::read(&path).ok();
let hash = bytes_opt.as_ref().map(|b| Indexer::digest_bytes(b));
let needs_scan = match (&hash, &bytes_opt) {
(Some(h), _) => idx.should_scan_with_hash(&path, h).unwrap_or(true),
_ => true,
};
let mut diags = if needs_scan {
if let Some(p) = &progress_ref {
p.inc_parsed(1);
p.inc_analyzed(1);
}
let d = recover_or_propagate(
cfg.scanner.enable_panic_recovery,
&path,
logs,
|| match &bytes_opt {
Some(bytes) => {
run_rules_on_bytes(bytes, &path, cfg, None, Some(scan_root))
}
None => run_rules_on_file(&path, cfg, None, Some(scan_root)),
},
)
.unwrap_or_default();
let file_id = match &hash {
Some(h) => idx.upsert_file_with_hash(&path, h),
None => idx.upsert_file(&path),
};
match file_id {
Ok(file_id) => {
if let Err(e) = idx.replace_issues(
file_id,
d.iter().map(|d| IssueRow {
rule_id: &d.id,
severity: d.severity.as_db_str(),
line: d.line as i64,
col: d.col as i64,
}),
) {
record_persist_error(
&persist_errors_ref,
format!("issues {}: {e}", path.display()),
);
}
}
Err(e) => {
record_persist_error(
&persist_errors_ref,
format!("file row {}: {e}", path.display()),
);
}
}
d
} else {
skipped_files_ref.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
if let Some(p) = &progress_ref {
p.inc_skipped(1);
}
idx.get_issues_from_file(&path).unwrap_or_default()
};
diags.retain(|d| !d.id.starts_with("taint") && !d.id.starts_with("cfg-"));
if !diags.is_empty() {
diag_map
.entry(path.to_string_lossy().to_string())
.or_default()
.append(&mut diags);
}
pb2.inc(1);
},
);
pb2.finish_and_clear();
let skipped = skipped_files.load(std::sync::atomic::Ordering::Relaxed);
if let Some(p) = progress {
p.set_files_skipped(skipped);
p.record_pass2_ms(pass2_start.elapsed().as_millis() as u64);
p.set_stage(ScanStage::PostProcessing);
}
if let Some(m) = metrics {
m.summaries_reused
.store(skipped, std::sync::atomic::Ordering::Relaxed);
}
fail_if_persist_errors("AST-only pass 2", persist_errors)?;
let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
let post_process_start = std::time::Instant::now();
post_process_diags(&mut diags, cfg);
if let Some(p) = progress {
p.record_post_process_ms(post_process_start.elapsed().as_millis() as u64);
p.set_stage(ScanStage::Complete);
}
if let Some(l) = logs {
l.info(
format!(
"AST-only indexed scan complete in {}ms: {} findings, {} reused files",
pass2_start.elapsed().as_millis(),
diags.len(),
skipped
),
None,
);
}
return Ok(diags);
}
let mut global_summaries = global_summaries.ok_or_else(|| {
crate::errors::NyxError::Msg(
"internal: global_summaries missing in taint-mode pass 2".to_string(),
)
})?;
if let Some(p) = progress {
p.set_stage(ScanStage::BuildingCallGraph);
}
let cg_start = std::time::Instant::now();
global_summaries.install_hierarchy();
let (call_graph, cg_analysis) = build_and_analyse_call_graph(&global_summaries);
log_unresolved_callees(&call_graph);
if let Some(p) = progress {
p.record_call_graph_ms(cg_start.elapsed().as_millis() as u64);
}
if let Some(m) = metrics {
m.call_edges.store(
call_graph.graph.edge_count() as u64,
std::sync::atomic::Ordering::Relaxed,
);
m.functions_analyzed.store(
call_graph.graph.node_count() as u64,
std::sync::atomic::Ordering::Relaxed,
);
m.unresolved_calls.store(
(call_graph.unresolved_not_found.len() + call_graph.unresolved_ambiguous.len()) as u64,
std::sync::atomic::Ordering::Relaxed,
);
}
if let Some(l) = logs {
l.info(
format!(
"Call graph built in {}ms: {} nodes, {} edges, {} unresolved",
cg_start.elapsed().as_millis(),
call_graph.graph.node_count(),
call_graph.graph.edge_count(),
call_graph.unresolved_not_found.len() + call_graph.unresolved_ambiguous.len(),
),
None,
);
}
let (batches, orphans) = crate::callgraph::scc_file_batches_with_metadata(
&call_graph,
&cg_analysis,
&files,
scan_root,
);
tracing::info!(
batches = batches.len(),
orphan_files = orphans.len(),
"topo-ordered file batches computed (indexed)"
);
if let Some(l) = logs {
l.info(
format!(
"Topo-ordered indexed analysis plan: {} batches, {} orphan files",
batches.len(),
orphans.len()
),
None,
);
}
let _span = tracing::info_span!("pass2_indexed").entered();
if let Some(p) = progress {
p.set_stage(ScanStage::Analyzing);
p.set_batches_total(batches.len() as u64 + u64::from(!orphans.is_empty()));
}
let pass2_start = std::time::Instant::now();
let pb2 = make_progress_bar(
files.len() as u64,
"Pass 2: Running analysis",
show_progress,
);
let topo_diags = run_topo_batches(
&batches,
&orphans,
&mut global_summaries,
&call_graph,
cfg,
Some(scan_root),
&pb2,
progress,
logs,
);
pb2.finish_and_clear();
if let Some(p) = progress {
p.record_pass2_ms(pass2_start.elapsed().as_millis() as u64);
p.set_stage(ScanStage::PostProcessing);
}
if let Some(l) = logs {
l.info(
format!(
"Indexed pass 2 complete in {}ms: {} raw findings",
pass2_start.elapsed().as_millis(),
topo_diags.len()
),
None,
);
}
{
let mut by_file: HashMap<&str, Vec<&Diag>> = HashMap::new();
for d in &topo_diags {
by_file.entry(&d.path).or_default().push(d);
}
let mut idx = Indexer::from_pool(project, &pool)?;
for path in &files {
if !path.exists() {
idx.remove_file_and_related(path)?;
continue;
}
let file_id = idx.upsert_file(path)?;
let empty: [&Diag; 0] = [];
let file_diags = by_file
.get(path.to_string_lossy().as_ref())
.map(Vec::as_slice)
.unwrap_or(&empty);
idx.replace_issues(
file_id,
file_diags.iter().map(|d| IssueRow {
rule_id: &d.id,
severity: d.severity.as_db_str(),
line: d.line as i64,
col: d.col as i64,
}),
)?;
}
}
if let Some(l) = logs {
l.info(
format!("Persisted findings for {} files", files.len()),
None,
);
}
let mut diags = topo_diags;
let post_process_start = std::time::Instant::now();
post_process_diags(&mut diags, cfg);
if let Some(p) = progress {
p.record_post_process_ms(post_process_start.elapsed().as_millis() as u64);
p.set_stage(ScanStage::Complete);
}
if let Some(l) = logs {
l.info(
format!(
"Indexed scan complete in {}ms: {} final findings",
pass2_start.elapsed().as_millis(),
diags.len()
),
None,
);
}
Ok(diags)
}
const ROLLUP_RULES: &[&str] = &[
"rs.quality.unwrap",
"rs.quality.expect",
"rs.quality.panic_macro",
];
pub(crate) fn prioritize(
diags: &mut Vec<Diag>,
config: &crate::utils::config::OutputConfig,
show_instances: Option<&str>,
) -> SuppressionStats {
let mut stats = SuppressionStats {
quality_dropped: 0,
low_budget_dropped: 0,
max_results_dropped: 0,
include_quality: config.include_quality,
show_all: config.show_all,
max_low: config.max_low,
max_low_per_file: config.max_low_per_file,
max_low_per_rule: config.max_low_per_rule,
};
if config.show_all {
return stats;
}
if !config.include_quality {
let before = diags.len();
diags.retain(|d| d.category != FindingCategory::Quality);
stats.quality_dropped = before - diags.len();
}
rollup_findings(diags, config, show_instances);
apply_low_budgets(diags, config, &mut stats);
if let Some(max) = config.max_results {
let max = max as usize;
if diags.len() > max {
let high_count = diags
.iter()
.filter(|d| d.severity == Severity::High)
.count();
let med_count = diags
.iter()
.filter(|d| d.severity == Severity::Medium)
.count();
let take = if high_count >= max {
diags.retain(|d| d.severity == Severity::High);
diags.truncate(max);
max
} else if high_count + med_count >= max {
let med_slots = max - high_count;
let mut med_seen = 0usize;
diags.retain(|d| {
if d.severity == Severity::High {
true
} else if d.severity == Severity::Medium && med_seen < med_slots {
med_seen += 1;
true
} else {
false
}
});
max
} else {
let low_slots = max - high_count - med_count;
let mut low_seen = 0usize;
diags.retain(|d| {
if d.severity == Severity::High || d.severity == Severity::Medium {
true
} else if low_seen < low_slots {
low_seen += 1;
true
} else {
false
}
});
max
};
let original_total = high_count + med_count + diags.len(); stats.max_results_dropped = original_total.saturating_sub(take);
}
}
stats
}
fn rollup_findings(
diags: &mut Vec<Diag>,
config: &crate::utils::config::OutputConfig,
show_instances: Option<&str>,
) {
use std::collections::HashMap;
let mut groups: HashMap<(String, String), Vec<usize>> = HashMap::new();
for (i, d) in diags.iter().enumerate() {
if d.severity != Severity::Low {
continue;
}
if d.category != FindingCategory::Quality {
continue;
}
if !ROLLUP_RULES.contains(&d.id.as_str()) {
continue;
}
if show_instances == Some(d.id.as_str()) {
continue;
}
groups
.entry((d.path.clone(), d.id.clone()))
.or_default()
.push(i);
}
let mut to_remove: Vec<usize> = Vec::new();
let mut rollups: Vec<Diag> = Vec::new();
for ((_path, _rule_id), mut indices) in groups {
if indices.len() <= 1 {
continue;
}
indices.sort_by_key(|&i| (diags[i].line, diags[i].col));
let canonical_idx = indices[0];
let total = indices.len();
let examples: Vec<Location> = indices
.iter()
.take(config.rollup_examples as usize)
.map(|&i| Location {
line: diags[i].line,
col: diags[i].col,
})
.collect();
let canonical = &diags[canonical_idx];
let rollup_diag = Diag {
path: canonical.path.clone(),
line: canonical.line,
col: canonical.col,
severity: canonical.severity,
id: canonical.id.clone(),
category: canonical.category,
path_validated: false,
guard_kind: None,
message: canonical.message.clone(),
labels: vec![],
confidence: canonical.confidence,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: Some(RollupData {
count: total,
occurrences: examples,
}),
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
};
rollups.push(rollup_diag);
to_remove.extend(indices);
}
if to_remove.is_empty() {
return;
}
to_remove.sort_unstable();
to_remove.dedup();
for &i in to_remove.iter().rev() {
diags.remove(i);
}
rollups.sort_by(|a, b| {
a.path
.cmp(&b.path)
.then(a.id.cmp(&b.id))
.then(a.line.cmp(&b.line))
});
diags.extend(rollups);
}
fn apply_low_budgets(
diags: &mut Vec<Diag>,
config: &crate::utils::config::OutputConfig,
stats: &mut SuppressionStats,
) {
use std::collections::HashMap;
let mut per_file: HashMap<String, u32> = HashMap::new();
let mut per_rule: HashMap<String, u32> = HashMap::new();
let mut total_low: u32 = 0;
let before = diags.len();
diags.retain(|d| {
if d.severity != Severity::Low {
return true;
}
let file_count = per_file.entry(d.path.clone()).or_insert(0);
if *file_count >= config.max_low_per_file {
return false;
}
let rule_count = per_rule.entry(d.id.clone()).or_insert(0);
if *rule_count >= config.max_low_per_rule {
return false;
}
if total_low >= config.max_low {
return false;
}
*file_count += 1;
*rule_count += 1;
total_low += 1;
true
});
stats.low_budget_dropped = before - diags.len();
}
fn apply_suppressions(diags: &mut [Diag]) {
use std::collections::HashMap;
let mut by_path: HashMap<String, Vec<usize>> = HashMap::new();
for (i, d) in diags.iter().enumerate() {
by_path.entry(d.path.clone()).or_default().push(i);
}
for (path, indices) in &by_path {
let Ok(source) = std::fs::read_to_string(path) else {
continue;
};
let file_path = Path::new(path.as_str());
let index = crate::suppress::parse_inline_suppressions(file_path, &source);
if index.is_empty() {
continue;
}
for &i in indices {
if let Some(meta) = index.check(diags[i].line, &diags[i].id) {
diags[i].suppressed = true;
diags[i].suppression = Some(meta);
}
}
}
}
#[cfg(test)]
mod dedup_taint_flow_tests {
use super::*;
use crate::evidence::{Evidence, FlowStep, FlowStepKind, SpanEvidence};
fn make_taint(path: &str, line: usize, col: usize, source_line: u32, source_col: u32) -> Diag {
Diag {
path: path.into(),
line,
col,
severity: Severity::High,
id: format!("taint-unsanitised-flow (source {source_line}:{source_col})"),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: Some(Evidence {
source: Some(SpanEvidence {
path: path.into(),
line: source_line,
col: source_col,
kind: "source".into(),
snippet: None,
}),
sink: Some(SpanEvidence {
path: path.into(),
line: line as u32,
col: col as u32,
kind: "sink".into(),
snippet: None,
}),
hop_count: Some(1),
flow_steps: vec![
FlowStep {
step: 1,
kind: FlowStepKind::Source,
file: path.into(),
line: source_line,
col: source_col,
snippet: None,
variable: None,
callee: None,
function: Some("f".into()),
is_cross_file: false,
},
FlowStep {
step: 2,
kind: FlowStepKind::Sink,
file: path.into(),
line: line as u32,
col: col as u32,
snippet: None,
variable: None,
callee: None,
function: Some("f".into()),
is_cross_file: false,
},
],
..Default::default()
}),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
}
}
#[test]
fn dedup_collapses_two_sources_to_same_sink_keeps_tighter_source() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("a.rs", 10, 5, 8, 1),
];
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 1);
assert!(
diags[0].id.contains("(source 8:1)"),
"should keep tighter source, got id={}",
diags[0].id
);
}
#[test]
fn dedup_does_not_drop_different_sink_locations() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("a.rs", 12, 5, 3, 1),
];
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 2);
}
#[test]
fn dedup_does_not_drop_across_severities() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("a.rs", 10, 5, 8, 1),
];
diags[1].severity = Severity::Medium;
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 2);
}
#[test]
fn dedup_does_not_drop_across_paths() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("b.rs", 10, 5, 3, 1),
];
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 2);
}
#[test]
fn dedup_leaves_non_taint_rule_ids_alone() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("a.rs", 10, 5, 8, 1),
];
diags[1].id = "js.code_exec.eval".into();
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 2);
}
#[test]
fn dedup_collapses_same_line_different_columns() {
let mut diags = vec![
make_taint("a.rs", 10, 3, 4, 1),
make_taint("a.rs", 10, 17, 8, 1),
];
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 1);
assert!(
diags[0].id.contains("(source 8:1)"),
"should keep tighter source (distance 2), got id={}",
diags[0].id
);
}
#[test]
fn dedup_does_not_drop_different_sink_caps_on_same_line() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("a.rs", 10, 5, 3, 1),
];
if let Some(ev) = diags[0].evidence.as_mut() {
ev.sink_caps = crate::labels::Cap::SQL_QUERY.bits();
}
if let Some(ev) = diags[1].evidence.as_mut() {
ev.sink_caps = crate::labels::Cap::SHELL_ESCAPE.bits();
}
deduplicate_taint_flows(&mut diags);
assert_eq!(
diags.len(),
2,
"findings with different sink caps must not dedup"
);
}
#[test]
fn dedup_collapses_same_sink_caps_on_same_line() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 3, 1),
make_taint("a.rs", 10, 5, 8, 1),
];
if let Some(ev) = diags[0].evidence.as_mut() {
ev.sink_caps = crate::labels::Cap::SHELL_ESCAPE.bits();
}
if let Some(ev) = diags[1].evidence.as_mut() {
ev.sink_caps = crate::labels::Cap::SHELL_ESCAPE.bits();
}
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 1);
}
#[test]
fn dedup_prefers_same_function_over_cross_function() {
let mut diags = vec![
make_taint("a.rs", 10, 5, 8, 1),
make_taint("a.rs", 10, 5, 2, 1),
];
if let Some(ev) = diags[1].evidence.as_mut() {
if let Some(first) = ev.flow_steps.first_mut() {
first.function = Some("other".into());
}
}
deduplicate_taint_flows(&mut diags);
assert_eq!(diags.len(), 1);
assert!(diags[0].id.contains("(source 8:1)"));
}
}
#[cfg(test)]
mod scc_tagging_tests {
use super::*;
use crate::evidence::{Confidence, Evidence};
fn make_diag(confidence: Option<Confidence>) -> Diag {
Diag {
path: "a.py".into(),
line: 1,
col: 1,
severity: Severity::High,
id: "taint-unsanitised-flow".into(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence,
evidence: Some(Evidence::default()),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
}
}
#[test]
fn tag_unconverged_caps_confidence_and_appends_note() {
let mut diags = vec![make_diag(Some(Confidence::High)), make_diag(None)];
tag_unconverged_findings(
&mut diags,
64,
64,
false,
crate::engine_notes::CapHitReason::Unknown,
);
assert_eq!(diags[0].confidence, Some(Confidence::Low));
assert_eq!(diags[1].confidence, Some(Confidence::Low));
for d in &diags {
let ev = d.evidence.as_ref().expect("evidence populated");
assert!(
ev.notes
.iter()
.any(|n| n.starts_with(SCC_UNCONVERGED_NOTE_PREFIX)),
"expected scc_unconverged note, got {:?}",
ev.notes
);
}
}
#[test]
fn tag_unconverged_preserves_lower_than_low_confidence() {
let mut diags = vec![make_diag(Some(Confidence::Low))];
tag_unconverged_findings(
&mut diags,
10,
64,
false,
crate::engine_notes::CapHitReason::Unknown,
);
assert_eq!(diags[0].confidence, Some(Confidence::Low));
}
#[test]
fn tag_unconverged_creates_evidence_when_missing() {
let mut d = make_diag(None);
d.evidence = None;
let mut diags = vec![d];
tag_unconverged_findings(
&mut diags,
7,
64,
false,
crate::engine_notes::CapHitReason::Unknown,
);
let ev = diags[0].evidence.as_ref().expect("evidence created");
assert!(
ev.notes
.iter()
.any(|n| n.starts_with(SCC_UNCONVERGED_NOTE_PREFIX))
);
}
#[test]
fn tag_unconverged_does_not_duplicate_notes_on_rerun() {
let mut diags = vec![make_diag(None)];
tag_unconverged_findings(
&mut diags,
64,
64,
false,
crate::engine_notes::CapHitReason::Unknown,
);
tag_unconverged_findings(
&mut diags,
64,
64,
false,
crate::engine_notes::CapHitReason::Unknown,
);
let notes = &diags[0].evidence.as_ref().unwrap().notes;
let count = notes
.iter()
.filter(|n| n.starts_with(SCC_UNCONVERGED_NOTE_PREFIX))
.count();
assert_eq!(count, 1, "should not duplicate scc_unconverged note");
}
#[test]
fn tag_unconverged_cross_file_variant_uses_tighter_prefix() {
let mut diags = vec![make_diag(None)];
tag_unconverged_findings(
&mut diags,
64,
64,
true,
crate::engine_notes::CapHitReason::Unknown,
);
let ev = diags[0].evidence.as_ref().expect("evidence populated");
assert!(SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX.starts_with(SCC_UNCONVERGED_NOTE_PREFIX));
assert!(
ev.notes
.iter()
.any(|n| n.starts_with(SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX)),
"expected cross-file scc_unconverged note, got {:?}",
ev.notes
);
}
#[test]
fn tag_unconverged_non_cross_file_does_not_use_cross_file_prefix() {
let mut diags = vec![make_diag(None)];
tag_unconverged_findings(
&mut diags,
64,
64,
false,
crate::engine_notes::CapHitReason::Unknown,
);
let ev = diags[0].evidence.as_ref().expect("evidence populated");
assert!(
!ev.notes
.iter()
.any(|n| n.starts_with(SCC_UNCONVERGED_CROSS_FILE_NOTE_PREFIX)),
"intra-file SCC should not carry cross-file note, got {:?}",
ev.notes
);
}
}
#[test]
fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
let mut cfg = Config::default();
cfg.performance.worker_threads = Some(1);
cfg.performance.channel_multiplier = 1;
cfg.performance.batch_size = 2;
let td = tempfile::tempdir().unwrap();
let project_dir = td.path().join("proj");
std::fs::create_dir(&project_dir).unwrap();
std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
.unwrap();
let pool = Indexer::init(&db_path).unwrap();
assert_eq!(
Indexer::from_pool(&project_name, &pool)
.unwrap()
.get_files(&project_name)
.unwrap()
.len(),
1
);
let diags =
scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false, &project_dir)
.expect("scan should succeed");
assert!(diags.is_empty());
}
#[test]
fn scan_with_index_parallel_discovers_new_files_after_index_build() {
let mut cfg = Config::default();
cfg.performance.worker_threads = Some(1);
cfg.performance.channel_multiplier = 1;
cfg.performance.batch_size = 2;
let td = tempfile::tempdir().unwrap();
let project_dir = td.path().join("proj");
std::fs::create_dir(&project_dir).unwrap();
std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
.unwrap();
std::fs::write(project_dir.join("bar.txt"), "xyz").unwrap();
let pool = Indexer::init(&db_path).unwrap();
scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false, &project_dir)
.expect("scan should succeed");
let files = Indexer::from_pool(&project_name, &pool)
.unwrap()
.get_files(&project_name)
.unwrap();
assert_eq!(
files.len(),
2,
"new files should be discovered without rebuild"
);
}
#[test]
fn scan_with_index_parallel_clears_stale_issues_when_file_becomes_clean() {
let mut cfg = Config::default();
cfg.performance.worker_threads = Some(1);
cfg.performance.channel_multiplier = 1;
cfg.performance.batch_size = 2;
let td = tempfile::tempdir().unwrap();
let project_dir = td.path().join("proj");
std::fs::create_dir(&project_dir).unwrap();
let app = project_dir.join("app.js");
std::fs::write(
&app,
r#"
function run() {
const cmd = process.env.CMD;
eval(cmd);
}
"#,
)
.unwrap();
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
.unwrap();
let pool = Indexer::init(&db_path).unwrap();
let idx = Indexer::from_pool(&project_name, &pool).unwrap();
assert!(
!idx.get_issues_from_file(&app).unwrap().is_empty(),
"the initial indexed build should persist at least one issue"
);
std::fs::write(
&app,
r#"
function run() {
const cmd = "safe";
console.log(cmd);
}
"#,
)
.unwrap();
let diags =
scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false, &project_dir)
.expect("scan should succeed");
assert!(
diags.is_empty(),
"the cleaned file should no longer report findings"
);
let idx = Indexer::from_pool(&project_name, &pool).unwrap();
assert!(
idx.get_issues_from_file(&app).unwrap().is_empty(),
"DB issues should be cleared when a file becomes clean"
);
}
#[test]
fn severity_filter_applied_at_output_stage() {
let diags = vec![
Diag {
path: "tests/test.py".into(),
line: 1,
col: 1,
severity: Severity::Medium, id: "taint-unsanitised-flow".into(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
},
Diag {
path: "src/main.rs".into(),
line: 10,
col: 5,
severity: Severity::High,
id: "taint-unsanitised-flow".into(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
},
];
let filter = SeverityFilter::parse("HIGH").unwrap();
let filtered: Vec<_> = diags
.into_iter()
.filter(|d| filter.matches(d.severity))
.collect();
assert_eq!(filtered.len(), 1);
assert_eq!(filtered[0].severity, Severity::High);
assert_eq!(filtered[0].path, "src/main.rs");
}
#[cfg(test)]
mod prioritize_tests {
use super::*;
use crate::utils::config::OutputConfig;
fn make_diag(
path: &str,
line: usize,
severity: Severity,
id: &str,
cat: FindingCategory,
) -> Diag {
Diag {
path: path.into(),
line,
col: 1,
severity,
id: id.into(),
category: cat,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
}
}
fn default_config() -> OutputConfig {
OutputConfig::default()
}
#[test]
fn quality_dropped_by_default() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
2,
Severity::High,
"taint-flow",
FindingCategory::Security,
),
];
let stats = prioritize(&mut diags, &default_config(), None);
assert_eq!(diags.len(), 1);
assert_eq!(diags[0].id, "taint-flow");
assert_eq!(stats.quality_dropped, 1);
}
#[test]
fn quality_kept_with_include_quality() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
2,
Severity::High,
"taint-flow",
FindingCategory::Security,
),
];
let mut cfg = default_config();
cfg.include_quality = true;
let stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 2);
assert_eq!(stats.quality_dropped, 0);
}
#[test]
fn show_all_disables_everything() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
2,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
3,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
];
let mut cfg = default_config();
cfg.show_all = true;
let stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 3); assert_eq!(stats.quality_dropped, 0);
assert_eq!(stats.low_budget_dropped, 0);
assert!(diags.iter().all(|d| d.rollup.is_none()));
}
#[test]
fn rollup_groups_by_file_and_rule() {
let mut diags = vec![
make_diag(
"a.rs",
10,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
20,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
30,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"b.rs",
5,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"b.rs",
15,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
];
let mut cfg = default_config();
cfg.include_quality = true;
let _stats = prioritize(&mut diags, &cfg, None);
let rollups: Vec<_> = diags.iter().filter(|d| d.rollup.is_some()).collect();
assert_eq!(rollups.len(), 2);
let a_rollup = rollups.iter().find(|d| d.path == "a.rs").unwrap();
assert_eq!(a_rollup.rollup.as_ref().unwrap().count, 3);
let b_rollup = rollups.iter().find(|d| d.path == "b.rs").unwrap();
assert_eq!(b_rollup.rollup.as_ref().unwrap().count, 2);
}
#[test]
fn rollup_examples_limited() {
let mut diags: Vec<Diag> = (1..=20)
.map(|i| {
make_diag(
"a.rs",
i,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
)
})
.collect();
let mut cfg = default_config();
cfg.include_quality = true;
cfg.rollup_examples = 3;
let _stats = prioritize(&mut diags, &cfg, None);
let rollup = diags.iter().find(|d| d.rollup.is_some()).unwrap();
assert_eq!(rollup.rollup.as_ref().unwrap().count, 20);
assert_eq!(rollup.rollup.as_ref().unwrap().occurrences.len(), 3);
}
#[test]
fn rollup_canonical_is_first_sorted() {
let mut diags = vec![
make_diag(
"a.rs",
50,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
10,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
30,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
];
let mut cfg = default_config();
cfg.include_quality = true;
let _stats = prioritize(&mut diags, &cfg, None);
let rollup = diags.iter().find(|d| d.rollup.is_some()).unwrap();
assert_eq!(rollup.line, 10); }
#[test]
fn low_budget_per_file() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::Low,
"some-rule",
FindingCategory::Security,
),
make_diag(
"a.rs",
2,
Severity::Low,
"some-rule-2",
FindingCategory::Security,
),
make_diag(
"b.rs",
1,
Severity::Low,
"some-rule",
FindingCategory::Security,
),
];
let mut cfg = default_config();
cfg.max_low_per_file = 1;
cfg.max_low = 100;
cfg.max_low_per_rule = 100;
let stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 2);
assert_eq!(stats.low_budget_dropped, 1);
}
#[test]
fn low_budget_per_rule() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::Low,
"rule-x",
FindingCategory::Security,
),
make_diag(
"b.rs",
1,
Severity::Low,
"rule-x",
FindingCategory::Security,
),
make_diag(
"c.rs",
1,
Severity::Low,
"rule-x",
FindingCategory::Security,
),
];
let mut cfg = default_config();
cfg.max_low_per_file = 100;
cfg.max_low = 100;
cfg.max_low_per_rule = 2;
let stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 2);
assert_eq!(stats.low_budget_dropped, 1);
}
#[test]
fn low_budget_total() {
let mut diags: Vec<Diag> = (1..=5)
.map(|i| {
make_diag(
&format!("f{i}.rs"),
1,
Severity::Low,
&format!("rule-{i}"),
FindingCategory::Security,
)
})
.collect();
let mut cfg = default_config();
cfg.max_low_per_file = 100;
cfg.max_low_per_rule = 100;
cfg.max_low = 3;
let stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 3);
assert_eq!(stats.low_budget_dropped, 2);
}
#[test]
fn high_medium_never_dropped_by_low_budget() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::High,
"vuln-1",
FindingCategory::Security,
),
make_diag(
"a.rs",
2,
Severity::Medium,
"vuln-2",
FindingCategory::Security,
),
make_diag(
"a.rs",
3,
Severity::Low,
"vuln-3",
FindingCategory::Security,
),
];
let mut cfg = default_config();
cfg.max_low = 0;
cfg.max_low_per_file = 0;
cfg.max_low_per_rule = 0;
let stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 2); assert!(diags.iter().all(|d| d.severity != Severity::Low));
assert_eq!(stats.low_budget_dropped, 1);
}
#[test]
fn rollup_counts_as_one_for_budget() {
let mut diags: Vec<Diag> = (1..=10)
.map(|i| {
make_diag(
"a.rs",
i,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
)
})
.collect();
diags.push(make_diag(
"a.rs",
100,
Severity::Low,
"other-rule",
FindingCategory::Security,
));
let mut cfg = default_config();
cfg.include_quality = true;
cfg.max_low_per_file = 2; cfg.max_low = 100;
cfg.max_low_per_rule = 100;
let _stats = prioritize(&mut diags, &cfg, None);
assert_eq!(diags.len(), 2);
}
#[test]
fn show_instances_bypasses_rollup_for_rule() {
let mut diags = vec![
make_diag(
"a.rs",
1,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
2,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
3,
Severity::Low,
"rs.quality.expect",
FindingCategory::Quality,
),
make_diag(
"a.rs",
4,
Severity::Low,
"rs.quality.expect",
FindingCategory::Quality,
),
];
let mut cfg = default_config();
cfg.include_quality = true;
cfg.max_low = 100;
cfg.max_low_per_file = 100;
cfg.max_low_per_rule = 100;
let _stats = prioritize(&mut diags, &cfg, Some("rs.quality.unwrap"));
let unwrap_count = diags.iter().filter(|d| d.id == "rs.quality.unwrap").count();
let expect_rollup = diags
.iter()
.find(|d| d.id == "rs.quality.expect" && d.rollup.is_some());
assert_eq!(unwrap_count, 2);
assert!(expect_rollup.is_some());
}
#[test]
fn json_includes_rollup_data() {
let d = Diag {
path: "a.rs".into(),
line: 10,
col: 1,
severity: Severity::Low,
id: "rs.quality.unwrap".into(),
category: FindingCategory::Quality,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: None,
evidence: None,
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: Some(RollupData {
count: 38,
occurrences: vec![Location { line: 10, col: 1 }, Location { line: 20, col: 5 }],
}),
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
};
let json = serde_json::to_string(&d).unwrap();
assert!(json.contains("\"rollup\""));
assert!(json.contains("\"count\":38"));
assert!(json.contains("\"occurrences\""));
}
#[test]
fn deterministic_output() {
let make_diags = || {
vec![
make_diag(
"b.rs",
5,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
10,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"a.rs",
3,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
make_diag(
"b.rs",
1,
Severity::Low,
"rs.quality.unwrap",
FindingCategory::Quality,
),
]
};
let mut cfg = default_config();
cfg.include_quality = true;
let mut d1 = make_diags();
let mut d2 = make_diags();
let _s1 = prioritize(&mut d1, &cfg, None);
let _s2 = prioritize(&mut d2, &cfg, None);
let j1 = serde_json::to_string(&d1).unwrap();
let j2 = serde_json::to_string(&d2).unwrap();
assert_eq!(j1, j2, "same input should produce same output");
}
}