use crate::callgraph::{
CallGraph, CallGraphBuilder, extract_function_calls, extract_function_definitions,
};
use crate::imports::{FileImports, extract_file_imports};
use crate::{AnalysisSummary, AnalyzerEngine, FileAnalysis};
use anyhow::Result;
use rayon::prelude::*;
use rma_common::{RmaConfig, Severity};
use rma_parser::{ParsedFile, ParserEngine};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::time::Instant;
use tracing::{debug, info, instrument, warn};
#[derive(Debug, Default)]
pub struct ProjectAnalysisResult {
pub files_analyzed: usize,
pub file_results: Vec<FileAnalysis>,
pub cross_file_taints: Vec<CrossFileTaint>,
pub call_graph: Option<CallGraph>,
pub import_graph: HashMap<PathBuf, Vec<PathBuf>>,
pub summary: AnalysisSummary,
pub duration_ms: u64,
}
#[derive(Debug, Clone)]
pub struct CrossFileTaint {
pub source: TaintLocation,
pub sink: TaintLocation,
pub path: Vec<TaintLocation>,
pub severity: Severity,
pub description: String,
}
#[derive(Debug, Clone)]
pub struct TaintLocation {
pub file: PathBuf,
pub function: String,
pub line: usize,
pub name: String,
}
pub struct ProjectAnalyzer {
config: std::sync::Arc<RmaConfig>,
parser: ParserEngine,
analyzer: AnalyzerEngine,
cross_file_enabled: bool,
parallel_enabled: bool,
}
impl ProjectAnalyzer {
pub fn new(config: RmaConfig) -> Self {
let parser = ParserEngine::new(config.clone());
let analyzer = AnalyzerEngine::new(config.clone());
Self {
config: std::sync::Arc::new(config),
parser,
analyzer,
cross_file_enabled: false,
parallel_enabled: true,
}
}
pub fn with_cross_file(mut self, enabled: bool) -> Self {
self.cross_file_enabled = enabled;
self
}
pub fn with_parallel(mut self, enabled: bool) -> Self {
self.parallel_enabled = enabled;
self
}
#[instrument(skip(self), fields(path = %path.display()))]
pub fn analyze_project(&self, path: &Path) -> Result<ProjectAnalysisResult> {
let start = Instant::now();
info!("Starting project analysis for {}", path.display());
let files = discover_files(path, &self.config)?;
info!("Discovered {} source files", files.len());
if files.is_empty() {
return Ok(ProjectAnalysisResult::default());
}
let parsed_files: Vec<ParsedFile> = if self.parallel_enabled {
files
.par_iter()
.filter_map(|f| match std::fs::read_to_string(f) {
Ok(content) => self.parser.parse_file(f, &content).ok(),
Err(e) => {
warn!("Failed to read {}: {}", f.display(), e);
None
}
})
.collect()
} else {
files
.iter()
.filter_map(|f| match std::fs::read_to_string(f) {
Ok(content) => self.parser.parse_file(f, &content).ok(),
Err(e) => {
warn!("Failed to read {}: {}", f.display(), e);
None
}
})
.collect()
};
info!("Parsed {} files successfully", parsed_files.len());
let (file_results, summary) = self.analyzer.analyze_files(&parsed_files)?;
let (call_graph, import_graph, cross_file_taints) = if self.cross_file_enabled {
self.run_cross_file_analysis(&parsed_files, path)?
} else {
(None, HashMap::new(), Vec::new())
};
let duration = start.elapsed();
info!(
"Project analysis complete in {:?}: {} files, {} findings",
duration,
file_results.len(),
summary.total_findings
);
Ok(ProjectAnalysisResult {
files_analyzed: file_results.len(),
file_results,
cross_file_taints,
call_graph,
import_graph,
summary,
duration_ms: duration.as_millis() as u64,
})
}
fn run_cross_file_analysis(
&self,
parsed_files: &[ParsedFile],
project_root: &Path,
) -> Result<(
Option<CallGraph>,
HashMap<PathBuf, Vec<PathBuf>>,
Vec<CrossFileTaint>,
)> {
info!("Running cross-file analysis...");
let file_imports: HashMap<PathBuf, FileImports> = if self.parallel_enabled {
parsed_files
.par_iter()
.map(|parsed| {
let imports = extract_file_imports(
&parsed.tree,
parsed.content.as_bytes(),
&parsed.path,
parsed.language,
project_root,
);
(parsed.path.clone(), imports)
})
.collect()
} else {
parsed_files
.iter()
.map(|parsed| {
let imports = extract_file_imports(
&parsed.tree,
parsed.content.as_bytes(),
&parsed.path,
parsed.language,
project_root,
);
(parsed.path.clone(), imports)
})
.collect()
};
let import_graph = build_import_graph(&file_imports);
debug!("Built import graph with {} nodes", import_graph.len());
let mut call_graph_builder = CallGraphBuilder::new();
for parsed in parsed_files {
let source = parsed.content.as_bytes();
let functions = extract_function_definitions(&parsed.tree, source, parsed.language);
let calls = extract_function_calls(&parsed.tree, source, parsed.language);
let imports = file_imports.get(&parsed.path).cloned().unwrap_or_default();
call_graph_builder.add_file(&parsed.path, parsed.language, functions, calls, imports);
}
let call_graph = call_graph_builder.build();
info!(
"Built call graph: {} functions, {} edges",
call_graph.function_count(),
call_graph.edge_count()
);
let cross_file_taints = detect_cross_file_taints(&call_graph, parsed_files);
if !cross_file_taints.is_empty() {
info!(
"Detected {} cross-file taint flows",
cross_file_taints.len()
);
}
Ok((Some(call_graph), import_graph, cross_file_taints))
}
pub fn analyzer(&self) -> &AnalyzerEngine {
&self.analyzer
}
pub fn parser(&self) -> &ParserEngine {
&self.parser
}
}
fn discover_files(path: &Path, config: &RmaConfig) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
discover_files_recursive(path, config, &mut files)?;
Ok(files)
}
fn discover_files_recursive(
path: &Path,
config: &RmaConfig,
files: &mut Vec<PathBuf>,
) -> Result<()> {
if !path.exists() {
return Ok(());
}
if path.is_file() {
if should_include_file(path, config) {
files.push(path.to_path_buf());
}
return Ok(());
}
if path.is_dir() {
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
let excluded_dirs = [
"node_modules",
".git",
"target",
"build",
"dist",
"__pycache__",
".venv",
"venv",
"vendor",
];
if excluded_dirs.contains(&name) || name.starts_with('.') {
return Ok(());
}
}
for entry in std::fs::read_dir(path)? {
let entry = entry?;
discover_files_recursive(&entry.path(), config, files)?;
}
}
Ok(())
}
fn should_include_file(path: &Path, _config: &RmaConfig) -> bool {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
let supported_extensions = [
"rs", "js", "jsx", "ts", "tsx", "mjs", "cjs", "py", "go", "java",
];
supported_extensions.contains(&ext)
}
fn build_import_graph(
file_imports: &HashMap<PathBuf, FileImports>,
) -> HashMap<PathBuf, Vec<PathBuf>> {
let mut graph = HashMap::new();
for (file, imports) in file_imports {
let deps: Vec<PathBuf> = imports
.imports
.iter()
.map(|imp| imp.source_file.clone())
.collect();
graph.insert(file.clone(), deps);
}
graph
}
const SECURITY_SENSITIVE_FUNCTIONS: &[&str] = &[
"exec",
"eval",
"query",
"execute",
"system",
"popen",
"spawn",
"fork",
"innerHTML",
"setInnerHTML",
"write",
"writeln",
"insertAdjacentHTML",
];
fn detect_cross_file_taints(
call_graph: &CallGraph,
_parsed_files: &[ParsedFile],
) -> Vec<CrossFileTaint> {
let mut taints = Vec::new();
for edge in call_graph.cross_file_edges() {
if SECURITY_SENSITIVE_FUNCTIONS
.iter()
.any(|s| edge.callee.name.contains(s))
{
taints.push(CrossFileTaint {
source: TaintLocation {
file: edge.caller.file.clone(),
function: edge.caller.name.clone(),
line: edge.call_site.line,
name: "input".to_string(),
},
sink: TaintLocation {
file: edge.callee.file.clone(),
function: edge.callee.name.clone(),
line: edge.callee.line,
name: edge.callee.name.clone(),
},
path: vec![TaintLocation {
file: edge.caller.file.clone(),
function: edge.caller.name.clone(),
line: edge.call_site.line,
name: "call".to_string(),
}],
severity: Severity::Warning,
description: format!(
"Potential taint flow from {} to security-sensitive function {}",
edge.caller.name, edge.callee.name
),
});
}
}
taints
}
pub fn topological_order(import_graph: &HashMap<PathBuf, Vec<PathBuf>>) -> Vec<PathBuf> {
let mut in_degree: HashMap<PathBuf, usize> = HashMap::new();
let mut all_files: HashSet<PathBuf> = HashSet::new();
for (file, deps) in import_graph {
all_files.insert(file.clone());
for dep in deps {
all_files.insert(dep.clone());
}
}
for file in &all_files {
in_degree.insert(file.clone(), 0);
}
for deps in import_graph.values() {
for dep in deps {
*in_degree.get_mut(dep).unwrap_or(&mut 0) += 1;
}
}
let mut queue: Vec<PathBuf> = in_degree
.iter()
.filter(|(_, deg)| **deg == 0)
.map(|(f, _)| f.clone())
.collect();
let mut result = Vec::new();
while let Some(file) = queue.pop() {
result.push(file.clone());
if let Some(deps) = import_graph.get(&file) {
for dep in deps {
if let Some(deg) = in_degree.get_mut(dep) {
*deg = deg.saturating_sub(1);
if *deg == 0 {
queue.push(dep.clone());
}
}
}
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_topological_order_simple() {
let mut graph = HashMap::new();
graph.insert(PathBuf::from("a.js"), vec![PathBuf::from("b.js")]);
graph.insert(PathBuf::from("b.js"), vec![PathBuf::from("c.js")]);
graph.insert(PathBuf::from("c.js"), vec![]);
let order = topological_order(&graph);
let c_idx = order.iter().position(|f| f.ends_with("c.js"));
let b_idx = order.iter().position(|f| f.ends_with("b.js"));
let a_idx = order.iter().position(|f| f.ends_with("a.js"));
assert!(c_idx.is_some());
assert!(b_idx.is_some());
assert!(a_idx.is_some());
}
#[test]
fn test_discover_files() {
let config = RmaConfig::default();
let result = discover_files(Path::new("/nonexistent/path"), &config);
assert!(result.is_ok());
assert!(result.unwrap().is_empty());
}
#[test]
fn test_build_import_graph() {
let mut imports = HashMap::new();
let mut file_a = FileImports::default();
file_a.imports.push(crate::imports::ResolvedImport {
local_name: "foo".to_string(),
source_file: PathBuf::from("b.js"),
exported_name: "foo".to_string(),
kind: crate::imports::ImportKind::Named,
specifier: "./b".to_string(),
line: 1,
});
imports.insert(PathBuf::from("a.js"), file_a);
imports.insert(PathBuf::from("b.js"), FileImports::default());
let graph = build_import_graph(&imports);
assert_eq!(graph.len(), 2);
assert_eq!(graph.get(&PathBuf::from("a.js")).unwrap().len(), 1);
assert_eq!(graph.get(&PathBuf::from("b.js")).unwrap().len(), 0);
}
}