pub(crate) use crate::ast::{
analyse_file_fused, extract_summaries_from_bytes, run_rules_on_bytes, run_rules_on_file,
};
use crate::database::index::{Indexer, IssueRow};
use crate::errors::NyxResult;
use crate::patterns::Severity;
use crate::summary::{self, GlobalSummaries};
use crate::utils::config::Config;
use crate::utils::project::get_project_info;
use crate::walk::spawn_file_walker;
use console::style;
use dashmap::DashMap;
use indicatif::{ProgressBar, ProgressStyle};
use r2d2::Pool;
use r2d2_sqlite::SqliteConnectionManager;
use rayon::prelude::*;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
fn make_progress_bar(len: u64, msg: &str, show: bool) -> ProgressBar {
if !show {
return ProgressBar::hidden();
}
let pb = ProgressBar::new(len);
pb.set_style(
ProgressStyle::with_template(
"{spinner:.green} {msg} [{bar:30.cyan/blue}] {pos}/{len} ({eta})",
)
.unwrap()
.progress_chars("##-"),
);
pb.set_message(msg.to_string());
pb
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct Diag {
pub path: String,
pub line: usize,
pub col: usize,
pub severity: Severity,
pub id: String,
}
pub fn handle(
path: &str,
no_index: bool,
rebuild_index: bool,
format: String,
database_dir: &Path,
config: &Config,
) -> NyxResult<()> {
let scan_path = Path::new(path).canonicalize()?;
let (project_name, db_path) = get_project_info(&scan_path, database_dir)?;
let suppress_status = config.output.quiet || format == "json" || format == "sarif";
if !suppress_status {
println!(
"{} {}...\n",
style("Checking").green().bold(),
&project_name
);
}
let show_progress = format != "json" && format != "sarif" && !config.output.quiet;
let diags: Vec<Diag> = if no_index {
scan_filesystem(&scan_path, config, show_progress)?
} else {
if rebuild_index || !db_path.exists() {
tracing::debug!("Scanning filesystem index filesystem");
crate::commands::index::build_index(
&project_name,
&scan_path,
&db_path,
config,
show_progress,
)?;
}
let pool = Indexer::init(&db_path)?;
if config.database.vacuum_on_startup {
let idx = Indexer::from_pool(&project_name, &pool)?;
idx.vacuum()?;
}
scan_with_index_parallel(&project_name, pool, config, show_progress)?
};
tracing::debug!("Found {:?} issues.", diags.len());
if format == "json" {
let json = serde_json::to_string(&diags)
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
println!("{json}");
return Ok(());
}
if format == "sarif" {
let sarif = crate::output::build_sarif(&diags, &scan_path);
let json = serde_json::to_string_pretty(&sarif)
.map_err(|e| crate::errors::NyxError::Msg(e.to_string()))?;
println!("{json}");
return Ok(());
}
if format == "console" || (format.is_empty() && config.output.default_format == "console") {
tracing::debug!("Printing to console");
let mut grouped: BTreeMap<&str, Vec<&Diag>> = BTreeMap::new();
for d in &diags {
grouped.entry(&d.path).or_default().push(d);
}
for (path, issues) in &grouped {
println!("{}", style(path).blue().underlined());
for d in issues {
println!(
" {:>4}:{:<4} {} {}",
d.line,
d.col,
d.severity.colored_tag(),
style(&d.id).bold()
);
}
println!();
}
println!(
"{} '{}' generated {} issues.",
style("warning").yellow().bold(),
style(project_name).white().bold(),
style(diags.len()).bold()
);
println!("\t");
}
Ok(())
}
pub(crate) fn scan_filesystem(
root: &Path,
cfg: &Config,
show_progress: bool,
) -> NyxResult<Vec<Diag>> {
let all_paths: Vec<PathBuf> = {
let _span = tracing::info_span!("walk_files").entered();
let (rx, handle) = spawn_file_walker(root, cfg);
let paths: Vec<PathBuf> = rx.into_iter().flatten().collect();
if let Err(err) = handle.join() {
tracing::error!("walker thread panicked: {:#?}", err);
}
paths
};
tracing::info!(file_count = all_paths.len(), "file walk complete");
let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
|| cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
if !needs_taint {
let _span = tracing::info_span!("ast_only_analysis", files = all_paths.len()).entered();
let pb = make_progress_bar(all_paths.len() as u64, "Running analysis", show_progress);
let mut diags: Vec<Diag> = all_paths
.par_iter()
.flat_map_iter(|path| {
let result = match analyse_file_fused(
&std::fs::read(path).unwrap_or_default(),
path,
cfg,
None,
Some(root),
) {
Ok(r) => r.diags,
Err(e) => {
tracing::warn!("analysis: {}: {e}", path.display());
vec![]
}
};
pb.inc(1);
result
})
.collect();
pb.finish_and_clear();
if let Some(max) = cfg.output.max_results {
diags.truncate(max as usize);
}
return Ok(diags);
}
let global_summaries: GlobalSummaries = {
let _span = tracing::info_span!("pass1_fused", files = all_paths.len()).entered();
let pb = make_progress_bar(
all_paths.len() as u64,
"Pass 1: Extracting summaries",
show_progress,
);
let root_str = root.to_string_lossy();
let gs = all_paths
.par_iter()
.fold(GlobalSummaries::new, |mut local_gs, path| {
if let Ok(bytes) = std::fs::read(path) {
match analyse_file_fused(&bytes, path, cfg, None, Some(root)) {
Ok(r) => {
for s in r.summaries {
let key = s.func_key(Some(&root_str));
local_gs.insert(key, s);
}
}
Err(e) => {
tracing::warn!("pass 1: {}: {e}", path.display());
}
}
} else {
tracing::warn!("pass 1: cannot read {}", path.display());
}
pb.inc(1);
local_gs
})
.reduce(GlobalSummaries::new, |mut a, b| {
a.merge(b);
a
});
pb.finish_and_clear();
tracing::info!("pass 1 complete");
gs
};
let mut diags: Vec<Diag> = {
let _span = tracing::info_span!("pass2_analysis", files = all_paths.len()).entered();
let pb = make_progress_bar(
all_paths.len() as u64,
"Pass 2: Running analysis",
show_progress,
);
let result: Vec<Diag> = all_paths
.par_iter()
.flat_map_iter(|path| {
let result = match run_rules_on_file(path, cfg, Some(&global_summaries), Some(root))
{
Ok(d) => d,
Err(e) => {
tracing::warn!("pass 2: {}: {e}", path.display());
vec![]
}
};
pb.inc(1);
result
})
.collect();
pb.finish_and_clear();
result
};
tracing::info!(diags = diags.len(), "pass 2 complete");
if let Some(max) = cfg.output.max_results {
diags.truncate(max as usize);
}
Ok(diags)
}
pub fn scan_with_index_parallel(
project: &str,
pool: Arc<Pool<SqliteConnectionManager>>,
cfg: &Config,
show_progress: bool,
) -> NyxResult<Vec<Diag>> {
let files = {
let idx = Indexer::from_pool(project, &pool)?;
idx.get_files(project)?
};
let needs_taint = cfg.scanner.mode == crate::utils::config::AnalysisMode::Full
|| cfg.scanner.mode == crate::utils::config::AnalysisMode::Taint;
if needs_taint {
let _span = tracing::info_span!("pass1_indexed", files = files.len()).entered();
let pb = make_progress_bar(
files.len() as u64,
"Pass 1: Extracting summaries",
show_progress,
);
files.par_iter().for_each_init(
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
if let Ok(bytes) = std::fs::read(path) {
let hash = Indexer::digest_bytes(&bytes);
let needs_scan = idx.should_scan_with_hash(path, &hash).unwrap_or(true);
if needs_scan {
match extract_summaries_from_bytes(&bytes, path, cfg) {
Ok(sums) => {
idx.replace_summaries_for_file(path, &hash, &sums).ok();
}
Err(e) => {
tracing::warn!("pass 1: {}: {e}", path.display());
}
}
}
} else {
tracing::warn!("pass 1: cannot read {}", path.display());
}
pb.inc(1);
},
);
pb.finish_and_clear();
}
let global_summaries: Option<GlobalSummaries> = if needs_taint {
let _span = tracing::info_span!("load_summaries_db").entered();
let idx = Indexer::from_pool(project, &pool)?;
let all = idx.load_all_summaries()?;
tracing::info!(summaries = all.len(), "loaded cross-file summaries from DB");
Some(summary::merge_summaries(all, None))
} else {
None
};
let _span = tracing::info_span!("pass2_indexed").entered();
let pb2 = make_progress_bar(
files.len() as u64,
"Pass 2: Running analysis",
show_progress,
);
let diag_map: DashMap<String, Vec<Diag>> = DashMap::new();
files.into_par_iter().for_each_init(
|| Indexer::from_pool(project, &pool).expect("db pool"),
|idx, path| {
let bytes_opt = std::fs::read(&path).ok();
let hash = bytes_opt.as_ref().map(|b| Indexer::digest_bytes(b));
let needs_scan = if needs_taint {
true } else {
match (&hash, &bytes_opt) {
(Some(h), _) => idx.should_scan_with_hash(&path, h).unwrap_or(true),
_ => true,
}
};
let mut diags = if needs_scan {
let d = match &bytes_opt {
Some(bytes) => {
run_rules_on_bytes(bytes, &path, cfg, global_summaries.as_ref(), None)
.unwrap_or_default()
}
None => run_rules_on_file(&path, cfg, global_summaries.as_ref(), None)
.unwrap_or_default(),
};
let file_id = match &hash {
Some(h) => idx.upsert_file_with_hash(&path, h).unwrap_or_default(),
None => idx.upsert_file(&path).unwrap_or_default(),
};
idx.replace_issues(
file_id,
d.iter().map(|d| IssueRow {
rule_id: &d.id,
severity: d.severity.as_db_str(),
line: d.line as i64,
col: d.col as i64,
}),
)
.ok();
d
} else {
idx.get_issues_from_file(&path).unwrap_or_default()
};
match cfg.scanner.mode {
crate::utils::config::AnalysisMode::Ast => {
diags.retain(|d| !d.id.starts_with("taint") && !d.id.starts_with("cfg-"));
}
crate::utils::config::AnalysisMode::Taint => {
diags.retain(|d| d.id.starts_with("taint") || d.id.starts_with("cfg-"));
}
crate::utils::config::AnalysisMode::Full => {}
}
if !diags.is_empty() {
diag_map
.entry(path.to_string_lossy().to_string())
.or_default()
.append(&mut diags);
}
pb2.inc(1);
},
);
pb2.finish_and_clear();
let mut diags: Vec<Diag> = diag_map.into_iter().flat_map(|(_, v)| v).collect();
if let Some(max) = cfg.output.max_results {
diags.truncate(max as usize);
}
Ok(diags)
}
#[test]
fn scan_with_index_parallel_uses_existing_index_without_rescanning() {
let mut cfg = Config::default();
cfg.performance.worker_threads = Some(1);
cfg.performance.channel_multiplier = 1;
cfg.performance.batch_size = 2;
let td = tempfile::tempdir().unwrap();
let project_dir = td.path().join("proj");
std::fs::create_dir(&project_dir).unwrap();
std::fs::write(project_dir.join("foo.txt"), "abc").unwrap();
let (project_name, db_path) = get_project_info(&project_dir, td.path()).unwrap();
crate::commands::index::build_index(&project_name, &project_dir, &db_path, &cfg, false)
.unwrap();
let pool = Indexer::init(&db_path).unwrap();
assert_eq!(
Indexer::from_pool(&project_name, &pool)
.unwrap()
.get_files(&project_name)
.unwrap()
.len(),
1
);
let diags = scan_with_index_parallel(&project_name, Arc::clone(&pool), &cfg, false)
.expect("scan should succeed");
assert!(diags.is_empty());
}