Skip to main content

jsdet_cli/
npm2.rs

1use std::collections::BTreeMap;
2use std::fs;
3use std::io;
4use std::net::IpAddr;
5use std::path::{Component, Path, PathBuf};
6use std::sync::Arc;
7
8use anyhow::{Context, Result, bail};
9use flate2::read::GzDecoder;
10use reqwest::blocking::Client;
11use secfinding::Severity;
12use serde::Deserialize;
13use tempfile::TempDir;
14use vulnir::{
15    ConfidenceValue, Producer, ProducerKind, Provenance, VulnEdge, VulnIRGraph, VulnNode,
16};
17
18use crate::npm_detonation::NpmDetonationBridge;
19
20/// Extension trait for `IpAddr` to provide `is_private()` method.
21pub trait IpAddrExt {
22    /// Returns `true` if this address is in a private range.
23    ///
24    /// For IPv4, checks if the address is in a private network range.
25    /// For IPv6, checks if the address is a unique local address (fc00::/7).
26    fn is_private(&self) -> bool;
27}
28
29impl IpAddrExt for IpAddr {
30    fn is_private(&self) -> bool {
31        match self {
32            IpAddr::V4(v4) => v4.is_private(),
33            IpAddr::V6(v6) => v6.is_unique_local(),
34        }
35    }
36}
37
38#[derive(Debug, Clone)]
39pub struct NpmScanOptions {
40    pub detonate: bool,
41}
42
43impl Default for NpmScanOptions {
44    fn default() -> Self {
45        Self { detonate: true }
46    }
47}
48
49pub type RuleSeverity = Severity;
50
51#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
52pub struct PackageFinding {
53    pub id: String,
54    pub severity: RuleSeverity,
55    pub cwe: Option<String>,
56    pub file: String,
57    pub line: usize,
58    pub description: String,
59    pub evidence: String,
60}
61
62#[must_use]
63pub fn severity_label(severity: RuleSeverity) -> &'static str {
64    match severity {
65        Severity::Critical => "CRITICAL",
66        Severity::High => "HIGH",
67        Severity::Medium => "MEDIUM",
68        Severity::Low => "LOW",
69        _ => "INFO",
70    }
71}
72
73#[derive(Debug, Clone)]
74pub struct PackageScanResult {
75    pub package_name: String,
76    pub package_version: String,
77    pub findings: Vec<PackageFinding>,
78}
79
80impl PackageScanResult {
81    #[must_use]
82    pub fn to_vulnir_graph(&self) -> VulnIRGraph<VulnNode, VulnEdge> {
83        let mut graph = VulnIRGraph::new();
84        let provenance = package_provenance();
85        let package_node = graph.add_node(VulnNode::TrustBoundary {
86            id: format!("npm-package-{}", self.package_name),
87            description: format!("npm package {}@{}", self.package_name, self.package_version),
88            source_type: "npm-package".to_string(),
89            confidence: ConfidenceValue::from(1.0),
90            taint_id: Some(self.package_name.clone()),
91            created_ns: None,
92            provenance: vec![provenance.clone()],
93            metadata: BTreeMap::from([
94                (
95                    "package_name".to_string(),
96                    serde_json::json!(&self.package_name),
97                ),
98                (
99                    "package_version".to_string(),
100                    serde_json::json!(&self.package_version),
101                ),
102            ])
103            .into_iter()
104            .collect(),
105        });
106
107        for finding in &self.findings {
108            let node = graph.add_node(VulnNode::Capability {
109                id: finding.id.clone(),
110                resource: "npm-package-analysis".to_string(),
111                permission: severity_label(finding.severity).to_ascii_lowercase(),
112                target: Some(finding.file.clone()),
113                arguments: vec![finding.description.clone(), finding.evidence.clone()],
114                duration_ns: None,
115                provenance: vec![provenance.clone()],
116                metadata: BTreeMap::from([
117                    ("rule_id".to_string(), serde_json::json!(&finding.id)),
118                    ("line".to_string(), serde_json::json!(finding.line)),
119                    ("cwe".to_string(), serde_json::json!(&finding.cwe)),
120                ])
121                .into_iter()
122                .collect(),
123            });
124            graph.add_edge(
125                package_node,
126                node,
127                VulnEdge::DataFlow {
128                    confidence: ConfidenceValue::from(1.0),
129                },
130            );
131        }
132
133        graph
134    }
135}
136
137#[must_use]
138pub fn vulnir_json(graph: &VulnIRGraph<VulnNode, VulnEdge>) -> serde_json::Value {
139    serde_json::json!({
140        "nodes": graph.node_indices().map(|index| serde_json::json!({
141            "index": index.index(),
142            "node": &graph[index],
143        })).collect::<Vec<_>>(),
144        "edges": graph.edge_indices().filter_map(|index| {
145            graph.edge_endpoints(index).map(|(source, target)| serde_json::json!({
146                "source": source.index(),
147                "target": target.index(),
148                "edge": &graph[index],
149            }))
150        }).collect::<Vec<_>>(),
151    })
152}
153
154pub fn scan_npm_package(path: impl AsRef<Path>) -> Result<PackageScanResult> {
155    scan_npm_package_with_options(path, &NpmScanOptions::default())
156}
157
158pub fn scan_npm_package_with_options(
159    path: impl AsRef<Path>,
160    options: &NpmScanOptions,
161) -> Result<PackageScanResult> {
162    let resolved = resolve_scan_target(path.as_ref())?;
163    let package_json_path = resolved.root.join("package.json");
164    let (package_json, manifest) = if package_json_path.is_file() {
165        let package_json = fs::read_to_string(&package_json_path).with_context(|| {
166            format!(
167                "failed to read package manifest at {}. Fix: provide a readable package.json",
168                package_json_path.display()
169            )
170        })?;
171        let manifest: PackageManifest = serde_json::from_str(&package_json).with_context(|| {
172            format!(
173                "failed to parse {}. Fix: ensure package.json is valid JSON",
174                package_json_path.display()
175            )
176        })?;
177        (package_json, manifest)
178    } else {
179        (
180            "{}".to_string(),
181            PackageManifest::from_directory(&resolved.root, resolved.display_name.clone()),
182        )
183    };
184
185    let files = collect_scan_files(&resolved.root)?;
186    if manifest.scripts.is_empty() && files.is_empty() && !package_json_path.is_file() {
187        return Ok(PackageScanResult {
188            package_name: manifest.name,
189            package_version: manifest.version,
190            findings: Vec::new(),
191        });
192    }
193
194    let rules = load_rules(&workspace_rules_dir())?;
195    let corpus = PackageCorpus::new(&package_json, manifest.clone(), &files);
196    let mut findings = Vec::new();
197
198    for rule in rules {
199        if !rule.matches(&corpus) {
200            continue;
201        }
202
203        let evidence_token = rule.primary_evidence_token();
204        let (file, line, evidence) = corpus.find_evidence_location(evidence_token);
205        findings.push(PackageFinding {
206            id: rule.id,
207            severity: rule.severity,
208            cwe: rule.cwe,
209            file,
210            line,
211            description: format!("{} Fix: {}", rule.title, rule.description),
212            evidence,
213        });
214    }
215
216    if options.detonate {
217        findings.extend(run_dynamic_detonation(&resolved.root, &files)?);
218    }
219
220    dedupe_findings(&mut findings);
221    findings.sort_by(|left, right| {
222        right
223            .severity
224            .cmp(&left.severity)
225            .then_with(|| left.id.cmp(&right.id))
226            .then_with(|| left.file.cmp(&right.file))
227            .then_with(|| left.line.cmp(&right.line))
228    });
229
230    Ok(PackageScanResult {
231        package_name: manifest.name,
232        package_version: manifest.version,
233        findings,
234    })
235}
236
237fn run_dynamic_detonation(package_dir: &Path, files: &[SourceFile]) -> Result<Vec<PackageFinding>> {
238    let module = jsdet_core::CompiledModule::new()
239        .context("failed to compile QuickJS WASM for npm detonation")?;
240    let config = jsdet_core::SandboxConfig::detonation();
241    let mut findings = Vec::new();
242
243    for source in files.iter().filter(|file| is_javascript(&file.path)) {
244        if source.text.len() > config.max_script_bytes {
245            continue;
246        }
247        let bridge = Arc::new(NpmDetonationBridge::new(package_dir, &source.path)?);
248        let result = module.execute(&[wrap_script(source)], bridge, &config)?;
249        findings.extend(dynamic_findings_for_script(source, &result.observations));
250    }
251
252    Ok(findings)
253}
254
255fn dynamic_findings_for_script(
256    source: &SourceFile,
257    observations: &[jsdet_core::Observation],
258) -> Vec<PackageFinding> {
259    let mut findings = Vec::new();
260
261    for observation in observations {
262        let jsdet_core::Observation::ApiCall { api, args, .. } = observation else {
263            continue;
264        };
265        match api.as_str() {
266            "eval" | "eval.tainted" => findings.push(build_finding(
267                source,
268                "npm-dynamic-eval-external-input",
269                if api == "eval.tainted" {
270                    Severity::Critical
271                } else {
272                    Severity::High
273                },
274                Some("CWE-95"),
275                "Dynamic code execution via eval() with external or tainted input. Fix: remove eval() or validate and parse untrusted data before execution.",
276                &first_non_empty(&args_json_strings(args).join(" "), "eval"),
277            )),
278            "Function" | "Function.tainted" => findings.push(build_finding(
279                source,
280                "npm-function-constructor-external-input",
281                if api == "Function.tainted" {
282                    Severity::Critical
283                } else {
284                    Severity::High
285                },
286                Some("CWE-95"),
287                "Dynamic code execution via Function constructor with external or tainted input. Fix: replace string compilation with explicit functions.",
288                &first_non_empty(&args_json_strings(args).join(" "), "Function"),
289            )),
290            "child_process.exec" | "child_process.execSync" => {
291                let command = args_json_strings(args).join(" ");
292                findings.push(build_finding(
293                    source,
294                    "npm-dynamic-command-exec",
295                    Severity::Critical,
296                    Some("CWE-78"),
297                    if looks_like_reverse_shell(&command) {
298                        "Command execution with reverse-shell indicators. Fix: remove child_process execution from package install/runtime paths."
299                    } else {
300                        "Command execution via child_process. Fix: avoid shell execution or hardcode safe commands and arguments."
301                    },
302                    &first_non_empty(&command, "child_process.exec"),
303                ));
304            }
305            "child_process.spawn" => findings.push(build_finding(
306                source,
307                if looks_like_reverse_shell(&args_json_strings(args).join(" ")) {
308                    "npm-reverse-shell-spawn"
309                } else {
310                    "npm-child-process-spawn"
311                },
312                Severity::Critical,
313                Some("CWE-78"),
314                "Process spawning from package code. Fix: remove runtime process creation or whitelist safe subprocesses in trusted tooling only.",
315                &first_non_empty(&args_json_strings(args).join(" "), "child_process.spawn"),
316            )),
317            "fs.readFileSync" => {
318                let path = first_json_string(args).unwrap_or_default();
319                if is_sensitive_path(&path) {
320                    findings.push(build_finding(
321                        source,
322                        "npm-sensitive-file-read",
323                        Severity::High,
324                        Some("CWE-538"),
325                        "Sensitive local file read from package code. Fix: remove host file access and consume explicit user-provided inputs instead.",
326                        &path,
327                    ));
328                }
329            }
330            "process.env.get" => {
331                let key = first_json_string(args).unwrap_or_default();
332                if is_sensitive_env_key(&key) {
333                    findings.push(build_finding(
334                        source,
335                        "npm-env-secret-access",
336                        Severity::High,
337                        Some("CWE-526"),
338                        "Package reads sensitive environment variables. Fix: stop harvesting secrets from process.env and request explicit credentials through a secure interface.",
339                        &key,
340                    ));
341                }
342            }
343            "dns.lookup" | "dns.resolveTxt" => {
344                let host = first_json_string(args).unwrap_or_default();
345                if is_suspicious_dns_query(&host) {
346                    findings.push(build_finding(
347                        source,
348                        "npm-dns-exfiltration",
349                        Severity::Critical,
350                        Some("CWE-200"),
351                        "Suspicious DNS lookup consistent with data exfiltration. Fix: remove DNS-based transport and avoid encoding host data into domain labels.",
352                        &host,
353                    ));
354                }
355            }
356            "http.get" | "http.request" | "https.get" | "https.request" => {
357                let target = first_json_string(args).unwrap_or_default();
358                if is_external_target(&target) {
359                    findings.push(build_finding(
360                        source,
361                        "npm-network-exfiltration",
362                        Severity::High,
363                        Some("CWE-319"),
364                        "Outbound network request from package code. Fix: remove unsolicited network calls or gate them behind explicit user configuration.",
365                        &target,
366                    ));
367                }
368            }
369            "net.connect" | "net.createConnection" => findings.push(build_finding(
370                source,
371                "npm-net-socket-exfiltration",
372                if is_suspicious_ip_or_host(&args_json_strings(args).join(" ")) {
373                    Severity::Critical
374                } else {
375                    Severity::High
376                },
377                Some("CWE-200"),
378                "Raw socket connection from package code. Fix: remove arbitrary outbound socket creation and reverse-shell patterns.",
379                &first_non_empty(&args_json_strings(args).join(" "), "net.connect"),
380            )),
381            "net.write" => {
382                let payload = args_json_strings(args).join(" ");
383                if looks_like_secret_material(&payload) {
384                    findings.push(build_finding(
385                        source,
386                        "npm-net-secret-exfiltration",
387                        Severity::Critical,
388                        Some("CWE-200"),
389                        "Socket write contains secret or credential material. Fix: stop transmitting harvested data to remote peers.",
390                        &first_non_empty(&payload, "net.write"),
391                    ));
392                }
393            }
394            "Buffer.from" => {
395                let payload = args_json_strings(args).join(" ");
396                if looks_like_memory_leak_pattern(&payload) {
397                    findings.push(build_finding(
398                        source,
399                        "npm-buffer-memory-leak",
400                        Severity::Medium,
401                        Some("CWE-200"),
402                        "Buffer operation suggests unsafe memory disclosure or encoding abuse. Fix: avoid exposing raw buffers derived from secrets or uninitialized memory.",
403                        &first_non_empty(&payload, "Buffer.from"),
404                    ));
405                }
406            }
407            _ => {}
408        }
409    }
410
411    findings
412}
413
414fn dedupe_findings(findings: &mut Vec<PackageFinding>) {
415    let mut deduped = BTreeMap::<(String, String, usize, String), PackageFinding>::new();
416    for finding in findings.drain(..) {
417        let key = (
418            finding.id.clone(),
419            finding.file.clone(),
420            finding.line,
421            finding.evidence.clone(),
422        );
423        deduped
424            .entry(key)
425            .and_modify(|current| {
426                if finding.severity > current.severity {
427                    *current = finding.clone();
428                }
429            })
430            .or_insert(finding);
431    }
432    *findings = deduped.into_values().collect();
433}
434
435fn build_finding(
436    source: &SourceFile,
437    id: &str,
438    severity: Severity,
439    cwe: Option<&str>,
440    description: &str,
441    evidence_token: &str,
442) -> PackageFinding {
443    let (line, evidence) = find_evidence_location(&source.text, evidence_token);
444    PackageFinding {
445        id: id.to_string(),
446        severity,
447        cwe: cwe.map(ToOwned::to_owned),
448        file: source.path.clone(),
449        line,
450        description: description.to_string(),
451        evidence,
452    }
453}
454
455fn find_evidence_location(text: &str, token: &str) -> (usize, String) {
456    if token.is_empty() {
457        return (1, first_non_empty_line(text));
458    }
459    let normalized = token.trim_matches('"');
460    if let Some(index) = text.find(normalized) {
461        let line = text[..index].bytes().filter(|byte| *byte == b'\n').count() + 1;
462        let evidence = text
463            .lines()
464            .nth(line.saturating_sub(1))
465            .map(str::trim)
466            .filter(|line| !line.is_empty())
467            .map(ToOwned::to_owned)
468            .unwrap_or_else(|| normalized.to_string());
469        return (line, evidence);
470    }
471    (1, first_non_empty(normalized, &first_non_empty_line(text)))
472}
473
474fn first_non_empty(primary: &str, fallback: &str) -> String {
475    if primary.trim().is_empty() {
476        fallback.to_string()
477    } else {
478        primary.to_string()
479    }
480}
481
482fn first_non_empty_line(text: &str) -> String {
483    text.lines()
484        .map(str::trim)
485        .find(|line| !line.is_empty())
486        .map_or_else(|| "package source".to_string(), ToOwned::to_owned)
487}
488
489fn args_json_strings(args: &[jsdet_core::Value]) -> Vec<String> {
490    args.iter().flat_map(value_to_strings).collect()
491}
492
493fn first_json_string(args: &[jsdet_core::Value]) -> Option<String> {
494    args_json_strings(args)
495        .into_iter()
496        .find(|value| !value.is_empty())
497}
498
499fn value_to_strings(value: &jsdet_core::Value) -> Vec<String> {
500    match value {
501        jsdet_core::Value::String(text, _) | jsdet_core::Value::Json(text, _) => {
502            if let Ok(parsed) = serde_json::from_str::<Vec<serde_json::Value>>(text) {
503                parsed
504                    .into_iter()
505                    .filter_map(|item| item.as_str().map(ToOwned::to_owned))
506                    .collect()
507            } else {
508                vec![text.clone()]
509            }
510        }
511        jsdet_core::Value::Bytes(bytes) => vec![String::from_utf8_lossy(bytes).into_owned()],
512        jsdet_core::Value::Bool(value) => vec![value.to_string()],
513        jsdet_core::Value::Int(value) => vec![value.to_string()],
514        jsdet_core::Value::Float(value) => vec![value.to_string()],
515        jsdet_core::Value::Undefined | jsdet_core::Value::Null => Vec::new(),
516    }
517}
518
519fn wrap_script(source: &SourceFile) -> String {
520    let dirname = Path::new(&source.path).parent().map_or_else(
521        || ".".to_string(),
522        |path| path.to_string_lossy().replace('\\', "/"),
523    );
524    format!(
525        "var __filename = {filename}; var __dirname = {dirname};\n{script}",
526        filename = serde_json::to_string(&source.path).unwrap_or_else(|_| "\"index.js\"".into()),
527        dirname = serde_json::to_string(&dirname).unwrap_or_else(|_| "\".\"".into()),
528        script = source.text
529    )
530}
531
532fn workspace_rules_dir() -> PathBuf {
533    Path::new(env!("CARGO_MANIFEST_DIR"))
534        .join("..")
535        .join("..")
536        .join("rules")
537}
538
539fn load_rules(rules_dir: &Path) -> Result<Vec<Rule>> {
540    let mut entries = fs::read_dir(rules_dir)
541        .with_context(|| {
542            format!(
543                "failed to read rules directory {}. Fix: ensure the workspace rules/ directory exists",
544                rules_dir.display()
545            )
546        })?
547        .collect::<std::io::Result<Vec<_>>>()
548        .with_context(|| format!("failed to enumerate {}", rules_dir.display()))?;
549    entries.sort_by_key(|entry| entry.path());
550
551    let mut rules = Vec::new();
552    for entry in entries {
553        let path = entry.path();
554        if path.extension().and_then(std::ffi::OsStr::to_str) != Some("toml") {
555            continue;
556        }
557        let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
558            continue;
559        };
560        if !file_name.starts_with("npm-") {
561            continue;
562        }
563        let text = fs::read_to_string(&path)
564            .with_context(|| format!("failed to read rule file {}", path.display()))?;
565        let rule: Rule = toml::from_str(&text).with_context(|| {
566            format!(
567                "failed to parse {}. Fix: make the rule valid TOML matching the scanner schema",
568                path.display()
569            )
570        })?;
571        rules.push(rule);
572    }
573
574    if rules.is_empty() {
575        bail!(
576            "no npm rules loaded from {}. Fix: add TOML rule files under rules/",
577            rules_dir.display()
578        );
579    }
580
581    Ok(rules)
582}
583
584fn collect_scan_files(package_dir: &Path) -> Result<Vec<SourceFile>> {
585    let mut stack = vec![package_dir.to_path_buf()];
586    let mut files = Vec::new();
587    while let Some(dir) = stack.pop() {
588        for entry in fs::read_dir(&dir)
589            .with_context(|| format!("failed to read directory {}", dir.display()))?
590        {
591            let entry =
592                entry.with_context(|| format!("failed to read entry in {}", dir.display()))?;
593            let path = entry.path();
594            let file_type = entry
595                .file_type()
596                .with_context(|| format!("failed to inspect {}", path.display()))?;
597            if file_type.is_dir() {
598                let name = entry.file_name().to_string_lossy().into_owned();
599                if name == "node_modules" || name == ".git" || name == "target" {
600                    continue;
601                }
602                stack.push(path);
603                continue;
604            }
605            if !file_type.is_file() {
606                continue;
607            }
608            let Some(ext) = path.extension().and_then(std::ffi::OsStr::to_str) else {
609                continue;
610            };
611            if !matches!(ext, "js" | "cjs" | "mjs" | "json") {
612                continue;
613            }
614            let text = fs::read_to_string(&path)
615                .with_context(|| format!("failed to read source file {}", path.display()))?;
616            let relative = path
617                .strip_prefix(package_dir)
618                .unwrap_or(path.as_path())
619                .to_string_lossy()
620                .replace('\\', "/");
621            files.push(SourceFile {
622                path: relative,
623                text,
624            });
625        }
626    }
627    files.sort_by(|left, right| left.path.cmp(&right.path));
628    Ok(files)
629}
630
631fn is_javascript(path: &str) -> bool {
632    path.ends_with(".js") || path.ends_with(".cjs") || path.ends_with(".mjs")
633}
634
635fn package_provenance() -> Provenance {
636    Provenance::new(
637        Producer::new("jsdet-cli-npm", ProducerKind::Static)
638            .with_version(env!("CARGO_PKG_VERSION")),
639    )
640}
641
642#[derive(Debug, Clone)]
643struct SourceFile {
644    path: String,
645    text: String,
646}
647
648struct PackageCorpus {
649    manifest: PackageManifest,
650    package_json_lower: String,
651    javascript_lower: String,
652    package_lower: String,
653    files: Vec<SourceFile>,
654    script_names: Vec<String>,
655}
656
657impl PackageCorpus {
658    fn new(package_json: &str, manifest: PackageManifest, files: &[SourceFile]) -> Self {
659        let javascript = files
660            .iter()
661            .filter(|file| is_javascript(&file.path))
662            .map(|file| file.text.as_str())
663            .collect::<Vec<_>>()
664            .join("\n");
665        let package_text = files
666            .iter()
667            .map(|file| file.text.as_str())
668            .collect::<Vec<_>>()
669            .join("\n");
670        let script_names = manifest.scripts.keys().cloned().collect();
671        Self {
672            manifest,
673            package_json_lower: package_json.to_ascii_lowercase(),
674            javascript_lower: javascript.to_ascii_lowercase(),
675            package_lower: package_text.to_ascii_lowercase(),
676            files: files.to_vec(),
677            script_names,
678        }
679    }
680
681    fn scope_text(&self, scope: RuleScope) -> &str {
682        match scope {
683            RuleScope::Package => &self.package_lower,
684            RuleScope::PackageJson => &self.package_json_lower,
685            RuleScope::Javascript => &self.javascript_lower,
686        }
687    }
688
689    fn find_evidence_location(&self, token: &str) -> (String, usize, String) {
690        let token = token.to_ascii_lowercase();
691        for file in &self.files {
692            if let Some((_, line, evidence)) = find_token(&file.text, &token) {
693                return (file.path.clone(), line, evidence);
694            }
695        }
696        ("package.json".to_string(), 1, "package.json".to_string())
697    }
698}
699
700#[derive(Debug, Clone, Deserialize)]
701struct Rule {
702    id: String,
703    title: String,
704    description: String,
705    severity: Severity,
706    #[serde(default)]
707    cwe: Option<String>,
708    #[serde(default)]
709    scope: RuleScope,
710    #[serde(default)]
711    all: Vec<String>,
712    #[serde(default)]
713    any: Vec<String>,
714    #[serde(default)]
715    package_names: Vec<String>,
716    #[serde(default)]
717    script_names: Vec<String>,
718}
719
720impl Rule {
721    fn matches(&self, corpus: &PackageCorpus) -> bool {
722        let scope_text = corpus.scope_text(self.scope);
723        let has_all = self
724            .all
725            .iter()
726            .all(|needle| scope_text.contains(&needle.to_ascii_lowercase()));
727        let has_any = self.any.is_empty()
728            || self
729                .any
730                .iter()
731                .any(|needle| scope_text.contains(&needle.to_ascii_lowercase()));
732        let matches_package_name = self.package_names.is_empty()
733            || self
734                .package_names
735                .iter()
736                .any(|name| corpus.manifest.name.eq_ignore_ascii_case(name));
737        let matches_script_name = self.script_names.is_empty()
738            || self.script_names.iter().any(|name| {
739                corpus
740                    .script_names
741                    .iter()
742                    .any(|script_name| script_name.eq_ignore_ascii_case(name))
743            });
744        has_all && has_any && matches_package_name && matches_script_name
745    }
746
747    fn primary_evidence_token(&self) -> &str {
748        self.all
749            .first()
750            .or_else(|| self.any.first())
751            .or_else(|| self.script_names.first())
752            .or_else(|| self.package_names.first())
753            .map_or("package.json", String::as_str)
754    }
755}
756
757#[derive(Debug, Clone, Copy, Default, Deserialize)]
758#[serde(rename_all = "snake_case")]
759enum RuleScope {
760    #[default]
761    Package,
762    PackageJson,
763    Javascript,
764}
765
766#[derive(Debug, Clone, Default, Deserialize)]
767struct PackageManifest {
768    #[serde(default)]
769    name: String,
770    #[serde(default)]
771    version: String,
772    #[serde(default)]
773    scripts: BTreeMap<String, String>,
774}
775
776impl PackageManifest {
777    fn from_directory(package_dir: &Path, display_name: String) -> Self {
778        Self {
779            name: if display_name.is_empty() {
780                package_dir
781                    .file_name()
782                    .and_then(std::ffi::OsStr::to_str)
783                    .map_or_else(|| "unknown-package".to_string(), ToOwned::to_owned)
784            } else {
785                display_name
786            },
787            version: "0.0.0".to_string(),
788            scripts: BTreeMap::new(),
789        }
790    }
791}
792
793fn find_token(text: &str, token: &str) -> Option<(usize, usize, String)> {
794    let lower = text.to_ascii_lowercase();
795    lower.find(token).map(|index| {
796        let line = text[..index].bytes().filter(|byte| *byte == b'\n').count() + 1;
797        let evidence = text
798            .lines()
799            .nth(line.saturating_sub(1))
800            .map(str::trim)
801            .filter(|line_text| !line_text.is_empty())
802            .map(ToOwned::to_owned)
803            .unwrap_or_else(|| token.to_string());
804        (index, line, evidence)
805    })
806}
807
808struct ResolvedScanTarget {
809    _tempdir: Option<TempDir>,
810    root: PathBuf,
811    display_name: String,
812}
813
814fn resolve_scan_target(path: &Path) -> Result<ResolvedScanTarget> {
815    if path.exists() {
816        if path.is_dir() {
817            return Ok(ResolvedScanTarget {
818                _tempdir: None,
819                root: path.to_path_buf(),
820                display_name: path
821                    .file_name()
822                    .and_then(std::ffi::OsStr::to_str)
823                    .map_or_else(String::new, ToOwned::to_owned),
824            });
825        }
826        if is_npm_tarball(path) {
827            let tempdir = extract_tarball(path)?;
828            let root = locate_package_root(tempdir.path())?;
829            return Ok(ResolvedScanTarget {
830                _tempdir: Some(tempdir),
831                root,
832                display_name: String::new(),
833            });
834        }
835        bail!(
836            "failed to scan {}. Fix: provide a package directory, tarball, or npm package name",
837            path.display()
838        );
839    }
840    download_registry_package(&path.to_string_lossy())
841}
842
843fn is_npm_tarball(path: &Path) -> bool {
844    let path_text = path.to_string_lossy();
845    path_text.ends_with(".tgz") || path_text.ends_with(".tar.gz") || path_text.ends_with(".tar")
846}
847
848fn download_registry_package(spec: &str) -> Result<ResolvedScanTarget> {
849    let (name, version) = split_package_spec(spec);
850    let client = Client::builder()
851        .timeout(std::time::Duration::from_secs(30))
852        .build()
853        .context("failed to build npm registry client")?;
854    let metadata: RegistryPackage = client
855        .get(format!(
856            "https://registry.npmjs.org/{}",
857            urlencoding::encode(&name)
858        ))
859        .header(reqwest::header::USER_AGENT, "jsdet/0.1.0")
860        .send()
861        .with_context(|| format!("failed to fetch npm metadata for {name}"))?
862        .error_for_status()
863        .with_context(|| format!("npm registry returned an error for {name}"))?
864        .json()
865        .with_context(|| format!("failed to parse npm metadata for {name}"))?;
866    let resolved_version = version
867        .or_else(|| metadata.dist_tags.latest.clone())
868        .ok_or_else(|| anyhow::anyhow!("npm package {name} did not expose a resolvable version"))?;
869    let manifest = metadata
870        .versions
871        .get(&resolved_version)
872        .ok_or_else(|| anyhow::anyhow!("npm version {resolved_version} not found for {name}"))?;
873    let tarball_url = manifest.dist.tarball.clone().ok_or_else(|| {
874        anyhow::anyhow!("npm metadata for {name}@{resolved_version} has no tarball URL")
875    })?;
876    let download_dir = tempfile::tempdir().context("failed to create npm download directory")?;
877    let archive_path = download_dir.path().join("package.tgz");
878    let mut response = client
879        .get(&tarball_url)
880        .header(reqwest::header::USER_AGENT, "jsdet/0.1.0")
881        .send()
882        .with_context(|| format!("failed to download npm tarball from {tarball_url}"))?
883        .error_for_status()
884        .with_context(|| format!("npm tarball download failed for {name}@{resolved_version}"))?;
885    let mut file = fs::File::create(&archive_path)
886        .with_context(|| format!("failed to create {}", archive_path.display()))?;
887    io::copy(&mut response, &mut file)
888        .with_context(|| format!("failed to persist {}", archive_path.display()))?;
889    let extracted = extract_tarball(&archive_path)?;
890    let root = locate_package_root(extracted.path())?;
891    Ok(ResolvedScanTarget {
892        _tempdir: Some(extracted),
893        root,
894        display_name: manifest.name.clone().unwrap_or(name),
895    })
896}
897
898fn split_package_spec(spec: &str) -> (String, Option<String>) {
899    if let Some(stripped) = spec.strip_prefix('@') {
900        if let Some(idx) = stripped.rfind('@') {
901            let split_at = idx + 1;
902            return (
903                spec[..split_at].to_string(),
904                Some(spec[split_at + 1..].to_string()).filter(|value| !value.is_empty()),
905            );
906        }
907        return (spec.to_string(), None);
908    }
909    if let Some((name, version)) = spec.rsplit_once('@') {
910        return (
911            name.to_string(),
912            Some(version.to_string()).filter(|value| !value.is_empty()),
913        );
914    }
915    (spec.to_string(), None)
916}
917
918#[derive(Debug, Deserialize)]
919struct RegistryPackage {
920    #[serde(rename = "dist-tags")]
921    dist_tags: RegistryDistTags,
922    #[serde(default)]
923    versions: BTreeMap<String, RegistryVersion>,
924}
925
926#[derive(Debug, Default, Deserialize)]
927struct RegistryDistTags {
928    latest: Option<String>,
929}
930
931#[derive(Debug, Default, Deserialize)]
932struct RegistryVersion {
933    name: Option<String>,
934    dist: RegistryDist,
935}
936
937#[derive(Debug, Default, Deserialize)]
938struct RegistryDist {
939    tarball: Option<String>,
940}
941
942fn extract_tarball(path: &Path) -> Result<TempDir> {
943    let file =
944        fs::File::open(path).with_context(|| format!("failed to read {}", path.display()))?;
945    let tempdir = tempfile::tempdir().context("failed to create extraction directory")?;
946    if matches!(
947        path.extension().and_then(|value| value.to_str()),
948        Some("tgz" | "gz")
949    ) {
950        unpack_tar(GzDecoder::new(file), tempdir.path())?;
951    } else {
952        unpack_tar(file, tempdir.path())?;
953    }
954    Ok(tempdir)
955}
956
957fn unpack_tar(reader: impl io::Read, destination: &Path) -> Result<()> {
958    let mut archive = tar::Archive::new(reader);
959    for entry in archive
960        .entries()
961        .context("failed to enumerate tar entries")?
962    {
963        let mut entry = entry.context("failed to read tar entry")?;
964        let raw_path = entry.path().context("failed to read tar entry path")?;
965        let path = sanitize_archive_path(&raw_path)?;
966        let target = destination.join(path);
967        if let Some(parent) = target.parent() {
968            fs::create_dir_all(parent)
969                .with_context(|| format!("failed to create {}", parent.display()))?;
970        }
971        entry
972            .unpack(&target)
973            .with_context(|| format!("failed to unpack {}", target.display()))?;
974    }
975    Ok(())
976}
977
978fn sanitize_archive_path(path: &Path) -> Result<PathBuf> {
979    let mut cleaned = PathBuf::new();
980    for component in path.components() {
981        match component {
982            Component::Normal(part) => cleaned.push(part),
983            Component::CurDir => {}
984            _ => bail!("archive contains unsafe path: {}", path.display()),
985        }
986    }
987    if cleaned.as_os_str().is_empty() {
988        bail!("archive contains empty path entry");
989    }
990    Ok(cleaned)
991}
992
993fn locate_package_root(extracted_root: &Path) -> Result<PathBuf> {
994    let direct = extracted_root.join("package");
995    if direct.join("package.json").exists() {
996        return Ok(direct);
997    }
998    for entry in fs::read_dir(extracted_root)
999        .with_context(|| format!("failed to read {}", extracted_root.display()))?
1000    {
1001        let entry = entry?;
1002        let path = entry.path();
1003        if path.is_dir() && path.join("package.json").exists() {
1004            return Ok(path);
1005        }
1006    }
1007    bail!("no extracted package root with package.json found")
1008}
1009
1010fn is_sensitive_env_key(key: &str) -> bool {
1011    let lower = key.to_ascii_lowercase();
1012    ["token", "secret", "key", "passwd", "password", "auth"]
1013        .iter()
1014        .any(|needle| lower.contains(needle))
1015}
1016
1017fn is_sensitive_path(path: &str) -> bool {
1018    [
1019        ".npmrc",
1020        ".ssh/id_rsa",
1021        ".ssh/config",
1022        ".aws/credentials",
1023        "/etc/passwd",
1024        "/etc/hostname",
1025        ".git-credentials",
1026    ]
1027    .iter()
1028    .any(|needle| path.contains(needle))
1029}
1030
1031fn is_suspicious_dns_query(host: &str) -> bool {
1032    let lower = host.to_ascii_lowercase();
1033    if lower.len() > 60 {
1034        return true;
1035    }
1036    lower.contains("token")
1037        || lower.contains("secret")
1038        || lower.contains("passwd")
1039        || lower.contains("evil")
1040        || lower
1041            .split('.')
1042            .any(|label| label.len() > 20 && label.chars().all(|ch| ch.is_ascii_alphanumeric()))
1043}
1044
1045fn is_external_target(target: &str) -> bool {
1046    target.starts_with("http://") || target.starts_with("https://")
1047}
1048
1049fn is_suspicious_ip_or_host(target: &str) -> bool {
1050    args_like_host_values(target)
1051        .into_iter()
1052        .any(|host| match host.parse::<IpAddr>() {
1053            Ok(ip) => !(ip.is_loopback() || ip.is_private()),
1054            Err(_) => {
1055                let lower = host.to_ascii_lowercase();
1056                lower.contains("evil")
1057                    || lower.contains("attacker")
1058                    || lower.ends_with(".example")
1059                    || lower.ends_with(".invalid")
1060            }
1061        })
1062}
1063
1064fn looks_like_secret_material(payload: &str) -> bool {
1065    let lower = payload.to_ascii_lowercase();
1066    [
1067        "npm_token",
1068        "github_token",
1069        "secret",
1070        "private key",
1071        "authtoken",
1072        "passwd",
1073    ]
1074    .iter()
1075    .any(|needle| lower.contains(needle))
1076}
1077
1078fn looks_like_memory_leak_pattern(payload: &str) -> bool {
1079    let lower = payload.to_ascii_lowercase();
1080    lower.contains("allocunsafe") || lower.contains("uninitialized")
1081}
1082
1083fn looks_like_reverse_shell(command: &str) -> bool {
1084    let lower = command.to_ascii_lowercase();
1085    (lower.contains("/bin/sh") || lower.contains("/bin/bash") || lower.contains("powershell"))
1086        && (lower.contains("/dev/tcp")
1087            || lower.contains("nc ")
1088            || lower.contains("netcat")
1089            || lower.contains("socket")
1090            || lower.contains("curl "))
1091}
1092
1093fn args_like_host_values(target: &str) -> Vec<String> {
1094    if let Ok(values) = serde_json::from_str::<Vec<serde_json::Value>>(target) {
1095        return values
1096            .into_iter()
1097            .filter_map(|item| item.as_str().map(ToOwned::to_owned))
1098            .collect();
1099    }
1100    vec![target.to_string()]
1101}