1use std::collections::BTreeMap;
2use std::fs;
3use std::io;
4use std::net::IpAddr;
5use std::path::{Component, Path, PathBuf};
6use std::sync::Arc;
7
8use anyhow::{Context, Result, bail};
9use flate2::read::GzDecoder;
10use reqwest::blocking::Client;
11use secfinding::Severity;
12use serde::Deserialize;
13use tempfile::TempDir;
14use vulnir::{
15 ConfidenceValue, Producer, ProducerKind, Provenance, VulnEdge, VulnIRGraph, VulnNode,
16};
17
18use crate::npm_detonation::NpmDetonationBridge;
19
20pub trait IpAddrExt {
22 fn is_private(&self) -> bool;
27}
28
29impl IpAddrExt for IpAddr {
30 fn is_private(&self) -> bool {
31 match self {
32 IpAddr::V4(v4) => v4.is_private(),
33 IpAddr::V6(v6) => v6.is_unique_local(),
34 }
35 }
36}
37
38#[derive(Debug, Clone)]
39pub struct NpmScanOptions {
40 pub detonate: bool,
41}
42
43impl Default for NpmScanOptions {
44 fn default() -> Self {
45 Self { detonate: true }
46 }
47}
48
49pub type RuleSeverity = Severity;
50
51#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
52pub struct PackageFinding {
53 pub id: String,
54 pub severity: RuleSeverity,
55 pub cwe: Option<String>,
56 pub file: String,
57 pub line: usize,
58 pub description: String,
59 pub evidence: String,
60}
61
62#[must_use]
63pub fn severity_label(severity: RuleSeverity) -> &'static str {
64 match severity {
65 Severity::Critical => "CRITICAL",
66 Severity::High => "HIGH",
67 Severity::Medium => "MEDIUM",
68 Severity::Low => "LOW",
69 _ => "INFO",
70 }
71}
72
73#[derive(Debug, Clone)]
74pub struct PackageScanResult {
75 pub package_name: String,
76 pub package_version: String,
77 pub findings: Vec<PackageFinding>,
78}
79
80impl PackageScanResult {
81 #[must_use]
82 pub fn to_vulnir_graph(&self) -> VulnIRGraph<VulnNode, VulnEdge> {
83 let mut graph = VulnIRGraph::new();
84 let provenance = package_provenance();
85 let package_node = graph.add_node(VulnNode::TrustBoundary {
86 id: format!("npm-package-{}", self.package_name),
87 description: format!("npm package {}@{}", self.package_name, self.package_version),
88 source_type: "npm-package".to_string(),
89 confidence: ConfidenceValue::from(1.0),
90 taint_id: Some(self.package_name.clone()),
91 created_ns: None,
92 provenance: vec![provenance.clone()],
93 metadata: BTreeMap::from([
94 (
95 "package_name".to_string(),
96 serde_json::json!(&self.package_name),
97 ),
98 (
99 "package_version".to_string(),
100 serde_json::json!(&self.package_version),
101 ),
102 ])
103 .into_iter()
104 .collect(),
105 });
106
107 for finding in &self.findings {
108 let node = graph.add_node(VulnNode::Capability {
109 id: finding.id.clone(),
110 resource: "npm-package-analysis".to_string(),
111 permission: severity_label(finding.severity).to_ascii_lowercase(),
112 target: Some(finding.file.clone()),
113 arguments: vec![finding.description.clone(), finding.evidence.clone()],
114 duration_ns: None,
115 provenance: vec![provenance.clone()],
116 metadata: BTreeMap::from([
117 ("rule_id".to_string(), serde_json::json!(&finding.id)),
118 ("line".to_string(), serde_json::json!(finding.line)),
119 ("cwe".to_string(), serde_json::json!(&finding.cwe)),
120 ])
121 .into_iter()
122 .collect(),
123 });
124 graph.add_edge(
125 package_node,
126 node,
127 VulnEdge::DataFlow {
128 confidence: ConfidenceValue::from(1.0),
129 },
130 );
131 }
132
133 graph
134 }
135}
136
137#[must_use]
138pub fn vulnir_json(graph: &VulnIRGraph<VulnNode, VulnEdge>) -> serde_json::Value {
139 serde_json::json!({
140 "nodes": graph.node_indices().map(|index| serde_json::json!({
141 "index": index.index(),
142 "node": &graph[index],
143 })).collect::<Vec<_>>(),
144 "edges": graph.edge_indices().filter_map(|index| {
145 graph.edge_endpoints(index).map(|(source, target)| serde_json::json!({
146 "source": source.index(),
147 "target": target.index(),
148 "edge": &graph[index],
149 }))
150 }).collect::<Vec<_>>(),
151 })
152}
153
154pub fn scan_npm_package(path: impl AsRef<Path>) -> Result<PackageScanResult> {
155 scan_npm_package_with_options(path, &NpmScanOptions::default())
156}
157
158pub fn scan_npm_package_with_options(
159 path: impl AsRef<Path>,
160 options: &NpmScanOptions,
161) -> Result<PackageScanResult> {
162 let resolved = resolve_scan_target(path.as_ref())?;
163 let package_json_path = resolved.root.join("package.json");
164 let (package_json, manifest) = if package_json_path.is_file() {
165 let package_json = fs::read_to_string(&package_json_path).with_context(|| {
166 format!(
167 "failed to read package manifest at {}. Fix: provide a readable package.json",
168 package_json_path.display()
169 )
170 })?;
171 let manifest: PackageManifest = serde_json::from_str(&package_json).with_context(|| {
172 format!(
173 "failed to parse {}. Fix: ensure package.json is valid JSON",
174 package_json_path.display()
175 )
176 })?;
177 (package_json, manifest)
178 } else {
179 (
180 "{}".to_string(),
181 PackageManifest::from_directory(&resolved.root, resolved.display_name.clone()),
182 )
183 };
184
185 let files = collect_scan_files(&resolved.root)?;
186 if manifest.scripts.is_empty() && files.is_empty() && !package_json_path.is_file() {
187 return Ok(PackageScanResult {
188 package_name: manifest.name,
189 package_version: manifest.version,
190 findings: Vec::new(),
191 });
192 }
193
194 let rules = load_rules(&workspace_rules_dir())?;
195 let corpus = PackageCorpus::new(&package_json, manifest.clone(), &files);
196 let mut findings = Vec::new();
197
198 for rule in rules {
199 if !rule.matches(&corpus) {
200 continue;
201 }
202
203 let evidence_token = rule.primary_evidence_token();
204 let (file, line, evidence) = corpus.find_evidence_location(evidence_token);
205 findings.push(PackageFinding {
206 id: rule.id,
207 severity: rule.severity,
208 cwe: rule.cwe,
209 file,
210 line,
211 description: format!("{} Fix: {}", rule.title, rule.description),
212 evidence,
213 });
214 }
215
216 if options.detonate {
217 findings.extend(run_dynamic_detonation(&resolved.root, &files)?);
218 }
219
220 dedupe_findings(&mut findings);
221 findings.sort_by(|left, right| {
222 right
223 .severity
224 .cmp(&left.severity)
225 .then_with(|| left.id.cmp(&right.id))
226 .then_with(|| left.file.cmp(&right.file))
227 .then_with(|| left.line.cmp(&right.line))
228 });
229
230 Ok(PackageScanResult {
231 package_name: manifest.name,
232 package_version: manifest.version,
233 findings,
234 })
235}
236
237fn run_dynamic_detonation(package_dir: &Path, files: &[SourceFile]) -> Result<Vec<PackageFinding>> {
238 let module = jsdet_core::CompiledModule::new()
239 .context("failed to compile QuickJS WASM for npm detonation")?;
240 let config = jsdet_core::SandboxConfig::detonation();
241 let mut findings = Vec::new();
242
243 for source in files.iter().filter(|file| is_javascript(&file.path)) {
244 if source.text.len() > config.max_script_bytes {
245 continue;
246 }
247 let bridge = Arc::new(NpmDetonationBridge::new(package_dir, &source.path)?);
248 let result = module.execute(&[wrap_script(source)], bridge, &config)?;
249 findings.extend(dynamic_findings_for_script(source, &result.observations));
250 }
251
252 Ok(findings)
253}
254
255fn dynamic_findings_for_script(
256 source: &SourceFile,
257 observations: &[jsdet_core::Observation],
258) -> Vec<PackageFinding> {
259 let mut findings = Vec::new();
260
261 for observation in observations {
262 let jsdet_core::Observation::ApiCall { api, args, .. } = observation else {
263 continue;
264 };
265 match api.as_str() {
266 "eval" | "eval.tainted" => findings.push(build_finding(
267 source,
268 "npm-dynamic-eval-external-input",
269 if api == "eval.tainted" {
270 Severity::Critical
271 } else {
272 Severity::High
273 },
274 Some("CWE-95"),
275 "Dynamic code execution via eval() with external or tainted input. Fix: remove eval() or validate and parse untrusted data before execution.",
276 &first_non_empty(&args_json_strings(args).join(" "), "eval"),
277 )),
278 "Function" | "Function.tainted" => findings.push(build_finding(
279 source,
280 "npm-function-constructor-external-input",
281 if api == "Function.tainted" {
282 Severity::Critical
283 } else {
284 Severity::High
285 },
286 Some("CWE-95"),
287 "Dynamic code execution via Function constructor with external or tainted input. Fix: replace string compilation with explicit functions.",
288 &first_non_empty(&args_json_strings(args).join(" "), "Function"),
289 )),
290 "child_process.exec" | "child_process.execSync" => {
291 let command = args_json_strings(args).join(" ");
292 findings.push(build_finding(
293 source,
294 "npm-dynamic-command-exec",
295 Severity::Critical,
296 Some("CWE-78"),
297 if looks_like_reverse_shell(&command) {
298 "Command execution with reverse-shell indicators. Fix: remove child_process execution from package install/runtime paths."
299 } else {
300 "Command execution via child_process. Fix: avoid shell execution or hardcode safe commands and arguments."
301 },
302 &first_non_empty(&command, "child_process.exec"),
303 ));
304 }
305 "child_process.spawn" => findings.push(build_finding(
306 source,
307 if looks_like_reverse_shell(&args_json_strings(args).join(" ")) {
308 "npm-reverse-shell-spawn"
309 } else {
310 "npm-child-process-spawn"
311 },
312 Severity::Critical,
313 Some("CWE-78"),
314 "Process spawning from package code. Fix: remove runtime process creation or whitelist safe subprocesses in trusted tooling only.",
315 &first_non_empty(&args_json_strings(args).join(" "), "child_process.spawn"),
316 )),
317 "fs.readFileSync" => {
318 let path = first_json_string(args).unwrap_or_default();
319 if is_sensitive_path(&path) {
320 findings.push(build_finding(
321 source,
322 "npm-sensitive-file-read",
323 Severity::High,
324 Some("CWE-538"),
325 "Sensitive local file read from package code. Fix: remove host file access and consume explicit user-provided inputs instead.",
326 &path,
327 ));
328 }
329 }
330 "process.env.get" => {
331 let key = first_json_string(args).unwrap_or_default();
332 if is_sensitive_env_key(&key) {
333 findings.push(build_finding(
334 source,
335 "npm-env-secret-access",
336 Severity::High,
337 Some("CWE-526"),
338 "Package reads sensitive environment variables. Fix: stop harvesting secrets from process.env and request explicit credentials through a secure interface.",
339 &key,
340 ));
341 }
342 }
343 "dns.lookup" | "dns.resolveTxt" => {
344 let host = first_json_string(args).unwrap_or_default();
345 if is_suspicious_dns_query(&host) {
346 findings.push(build_finding(
347 source,
348 "npm-dns-exfiltration",
349 Severity::Critical,
350 Some("CWE-200"),
351 "Suspicious DNS lookup consistent with data exfiltration. Fix: remove DNS-based transport and avoid encoding host data into domain labels.",
352 &host,
353 ));
354 }
355 }
356 "http.get" | "http.request" | "https.get" | "https.request" => {
357 let target = first_json_string(args).unwrap_or_default();
358 if is_external_target(&target) {
359 findings.push(build_finding(
360 source,
361 "npm-network-exfiltration",
362 Severity::High,
363 Some("CWE-319"),
364 "Outbound network request from package code. Fix: remove unsolicited network calls or gate them behind explicit user configuration.",
365 &target,
366 ));
367 }
368 }
369 "net.connect" | "net.createConnection" => findings.push(build_finding(
370 source,
371 "npm-net-socket-exfiltration",
372 if is_suspicious_ip_or_host(&args_json_strings(args).join(" ")) {
373 Severity::Critical
374 } else {
375 Severity::High
376 },
377 Some("CWE-200"),
378 "Raw socket connection from package code. Fix: remove arbitrary outbound socket creation and reverse-shell patterns.",
379 &first_non_empty(&args_json_strings(args).join(" "), "net.connect"),
380 )),
381 "net.write" => {
382 let payload = args_json_strings(args).join(" ");
383 if looks_like_secret_material(&payload) {
384 findings.push(build_finding(
385 source,
386 "npm-net-secret-exfiltration",
387 Severity::Critical,
388 Some("CWE-200"),
389 "Socket write contains secret or credential material. Fix: stop transmitting harvested data to remote peers.",
390 &first_non_empty(&payload, "net.write"),
391 ));
392 }
393 }
394 "Buffer.from" => {
395 let payload = args_json_strings(args).join(" ");
396 if looks_like_memory_leak_pattern(&payload) {
397 findings.push(build_finding(
398 source,
399 "npm-buffer-memory-leak",
400 Severity::Medium,
401 Some("CWE-200"),
402 "Buffer operation suggests unsafe memory disclosure or encoding abuse. Fix: avoid exposing raw buffers derived from secrets or uninitialized memory.",
403 &first_non_empty(&payload, "Buffer.from"),
404 ));
405 }
406 }
407 _ => {}
408 }
409 }
410
411 findings
412}
413
414fn dedupe_findings(findings: &mut Vec<PackageFinding>) {
415 let mut deduped = BTreeMap::<(String, String, usize, String), PackageFinding>::new();
416 for finding in findings.drain(..) {
417 let key = (
418 finding.id.clone(),
419 finding.file.clone(),
420 finding.line,
421 finding.evidence.clone(),
422 );
423 deduped
424 .entry(key)
425 .and_modify(|current| {
426 if finding.severity > current.severity {
427 *current = finding.clone();
428 }
429 })
430 .or_insert(finding);
431 }
432 *findings = deduped.into_values().collect();
433}
434
435fn build_finding(
436 source: &SourceFile,
437 id: &str,
438 severity: Severity,
439 cwe: Option<&str>,
440 description: &str,
441 evidence_token: &str,
442) -> PackageFinding {
443 let (line, evidence) = find_evidence_location(&source.text, evidence_token);
444 PackageFinding {
445 id: id.to_string(),
446 severity,
447 cwe: cwe.map(ToOwned::to_owned),
448 file: source.path.clone(),
449 line,
450 description: description.to_string(),
451 evidence,
452 }
453}
454
455fn find_evidence_location(text: &str, token: &str) -> (usize, String) {
456 if token.is_empty() {
457 return (1, first_non_empty_line(text));
458 }
459 let normalized = token.trim_matches('"');
460 if let Some(index) = text.find(normalized) {
461 let line = text[..index].bytes().filter(|byte| *byte == b'\n').count() + 1;
462 let evidence = text
463 .lines()
464 .nth(line.saturating_sub(1))
465 .map(str::trim)
466 .filter(|line| !line.is_empty())
467 .map(ToOwned::to_owned)
468 .unwrap_or_else(|| normalized.to_string());
469 return (line, evidence);
470 }
471 (1, first_non_empty(normalized, &first_non_empty_line(text)))
472}
473
474fn first_non_empty(primary: &str, fallback: &str) -> String {
475 if primary.trim().is_empty() {
476 fallback.to_string()
477 } else {
478 primary.to_string()
479 }
480}
481
482fn first_non_empty_line(text: &str) -> String {
483 text.lines()
484 .map(str::trim)
485 .find(|line| !line.is_empty())
486 .map_or_else(|| "package source".to_string(), ToOwned::to_owned)
487}
488
489fn args_json_strings(args: &[jsdet_core::Value]) -> Vec<String> {
490 args.iter().flat_map(value_to_strings).collect()
491}
492
493fn first_json_string(args: &[jsdet_core::Value]) -> Option<String> {
494 args_json_strings(args)
495 .into_iter()
496 .find(|value| !value.is_empty())
497}
498
499fn value_to_strings(value: &jsdet_core::Value) -> Vec<String> {
500 match value {
501 jsdet_core::Value::String(text, _) | jsdet_core::Value::Json(text, _) => {
502 if let Ok(parsed) = serde_json::from_str::<Vec<serde_json::Value>>(text) {
503 parsed
504 .into_iter()
505 .filter_map(|item| item.as_str().map(ToOwned::to_owned))
506 .collect()
507 } else {
508 vec![text.clone()]
509 }
510 }
511 jsdet_core::Value::Bytes(bytes) => vec![String::from_utf8_lossy(bytes).into_owned()],
512 jsdet_core::Value::Bool(value) => vec![value.to_string()],
513 jsdet_core::Value::Int(value) => vec![value.to_string()],
514 jsdet_core::Value::Float(value) => vec![value.to_string()],
515 jsdet_core::Value::Undefined | jsdet_core::Value::Null => Vec::new(),
516 }
517}
518
519fn wrap_script(source: &SourceFile) -> String {
520 let dirname = Path::new(&source.path).parent().map_or_else(
521 || ".".to_string(),
522 |path| path.to_string_lossy().replace('\\', "/"),
523 );
524 format!(
525 "var __filename = {filename}; var __dirname = {dirname};\n{script}",
526 filename = serde_json::to_string(&source.path).unwrap_or_else(|_| "\"index.js\"".into()),
527 dirname = serde_json::to_string(&dirname).unwrap_or_else(|_| "\".\"".into()),
528 script = source.text
529 )
530}
531
532fn workspace_rules_dir() -> PathBuf {
533 Path::new(env!("CARGO_MANIFEST_DIR"))
534 .join("..")
535 .join("..")
536 .join("rules")
537}
538
539fn load_rules(rules_dir: &Path) -> Result<Vec<Rule>> {
540 let mut entries = fs::read_dir(rules_dir)
541 .with_context(|| {
542 format!(
543 "failed to read rules directory {}. Fix: ensure the workspace rules/ directory exists",
544 rules_dir.display()
545 )
546 })?
547 .collect::<std::io::Result<Vec<_>>>()
548 .with_context(|| format!("failed to enumerate {}", rules_dir.display()))?;
549 entries.sort_by_key(|entry| entry.path());
550
551 let mut rules = Vec::new();
552 for entry in entries {
553 let path = entry.path();
554 if path.extension().and_then(std::ffi::OsStr::to_str) != Some("toml") {
555 continue;
556 }
557 let Some(file_name) = path.file_name().and_then(std::ffi::OsStr::to_str) else {
558 continue;
559 };
560 if !file_name.starts_with("npm-") {
561 continue;
562 }
563 let text = fs::read_to_string(&path)
564 .with_context(|| format!("failed to read rule file {}", path.display()))?;
565 let rule: Rule = toml::from_str(&text).with_context(|| {
566 format!(
567 "failed to parse {}. Fix: make the rule valid TOML matching the scanner schema",
568 path.display()
569 )
570 })?;
571 rules.push(rule);
572 }
573
574 if rules.is_empty() {
575 bail!(
576 "no npm rules loaded from {}. Fix: add TOML rule files under rules/",
577 rules_dir.display()
578 );
579 }
580
581 Ok(rules)
582}
583
584fn collect_scan_files(package_dir: &Path) -> Result<Vec<SourceFile>> {
585 let mut stack = vec![package_dir.to_path_buf()];
586 let mut files = Vec::new();
587 while let Some(dir) = stack.pop() {
588 for entry in fs::read_dir(&dir)
589 .with_context(|| format!("failed to read directory {}", dir.display()))?
590 {
591 let entry =
592 entry.with_context(|| format!("failed to read entry in {}", dir.display()))?;
593 let path = entry.path();
594 let file_type = entry
595 .file_type()
596 .with_context(|| format!("failed to inspect {}", path.display()))?;
597 if file_type.is_dir() {
598 let name = entry.file_name().to_string_lossy().into_owned();
599 if name == "node_modules" || name == ".git" || name == "target" {
600 continue;
601 }
602 stack.push(path);
603 continue;
604 }
605 if !file_type.is_file() {
606 continue;
607 }
608 let Some(ext) = path.extension().and_then(std::ffi::OsStr::to_str) else {
609 continue;
610 };
611 if !matches!(ext, "js" | "cjs" | "mjs" | "json") {
612 continue;
613 }
614 let text = fs::read_to_string(&path)
615 .with_context(|| format!("failed to read source file {}", path.display()))?;
616 let relative = path
617 .strip_prefix(package_dir)
618 .unwrap_or(path.as_path())
619 .to_string_lossy()
620 .replace('\\', "/");
621 files.push(SourceFile {
622 path: relative,
623 text,
624 });
625 }
626 }
627 files.sort_by(|left, right| left.path.cmp(&right.path));
628 Ok(files)
629}
630
631fn is_javascript(path: &str) -> bool {
632 path.ends_with(".js") || path.ends_with(".cjs") || path.ends_with(".mjs")
633}
634
635fn package_provenance() -> Provenance {
636 Provenance::new(
637 Producer::new("jsdet-cli-npm", ProducerKind::Static)
638 .with_version(env!("CARGO_PKG_VERSION")),
639 )
640}
641
642#[derive(Debug, Clone)]
643struct SourceFile {
644 path: String,
645 text: String,
646}
647
648struct PackageCorpus {
649 manifest: PackageManifest,
650 package_json_lower: String,
651 javascript_lower: String,
652 package_lower: String,
653 files: Vec<SourceFile>,
654 script_names: Vec<String>,
655}
656
657impl PackageCorpus {
658 fn new(package_json: &str, manifest: PackageManifest, files: &[SourceFile]) -> Self {
659 let javascript = files
660 .iter()
661 .filter(|file| is_javascript(&file.path))
662 .map(|file| file.text.as_str())
663 .collect::<Vec<_>>()
664 .join("\n");
665 let package_text = files
666 .iter()
667 .map(|file| file.text.as_str())
668 .collect::<Vec<_>>()
669 .join("\n");
670 let script_names = manifest.scripts.keys().cloned().collect();
671 Self {
672 manifest,
673 package_json_lower: package_json.to_ascii_lowercase(),
674 javascript_lower: javascript.to_ascii_lowercase(),
675 package_lower: package_text.to_ascii_lowercase(),
676 files: files.to_vec(),
677 script_names,
678 }
679 }
680
681 fn scope_text(&self, scope: RuleScope) -> &str {
682 match scope {
683 RuleScope::Package => &self.package_lower,
684 RuleScope::PackageJson => &self.package_json_lower,
685 RuleScope::Javascript => &self.javascript_lower,
686 }
687 }
688
689 fn find_evidence_location(&self, token: &str) -> (String, usize, String) {
690 let token = token.to_ascii_lowercase();
691 for file in &self.files {
692 if let Some((_, line, evidence)) = find_token(&file.text, &token) {
693 return (file.path.clone(), line, evidence);
694 }
695 }
696 ("package.json".to_string(), 1, "package.json".to_string())
697 }
698}
699
700#[derive(Debug, Clone, Deserialize)]
701struct Rule {
702 id: String,
703 title: String,
704 description: String,
705 severity: Severity,
706 #[serde(default)]
707 cwe: Option<String>,
708 #[serde(default)]
709 scope: RuleScope,
710 #[serde(default)]
711 all: Vec<String>,
712 #[serde(default)]
713 any: Vec<String>,
714 #[serde(default)]
715 package_names: Vec<String>,
716 #[serde(default)]
717 script_names: Vec<String>,
718}
719
720impl Rule {
721 fn matches(&self, corpus: &PackageCorpus) -> bool {
722 let scope_text = corpus.scope_text(self.scope);
723 let has_all = self
724 .all
725 .iter()
726 .all(|needle| scope_text.contains(&needle.to_ascii_lowercase()));
727 let has_any = self.any.is_empty()
728 || self
729 .any
730 .iter()
731 .any(|needle| scope_text.contains(&needle.to_ascii_lowercase()));
732 let matches_package_name = self.package_names.is_empty()
733 || self
734 .package_names
735 .iter()
736 .any(|name| corpus.manifest.name.eq_ignore_ascii_case(name));
737 let matches_script_name = self.script_names.is_empty()
738 || self.script_names.iter().any(|name| {
739 corpus
740 .script_names
741 .iter()
742 .any(|script_name| script_name.eq_ignore_ascii_case(name))
743 });
744 has_all && has_any && matches_package_name && matches_script_name
745 }
746
747 fn primary_evidence_token(&self) -> &str {
748 self.all
749 .first()
750 .or_else(|| self.any.first())
751 .or_else(|| self.script_names.first())
752 .or_else(|| self.package_names.first())
753 .map_or("package.json", String::as_str)
754 }
755}
756
757#[derive(Debug, Clone, Copy, Default, Deserialize)]
758#[serde(rename_all = "snake_case")]
759enum RuleScope {
760 #[default]
761 Package,
762 PackageJson,
763 Javascript,
764}
765
766#[derive(Debug, Clone, Default, Deserialize)]
767struct PackageManifest {
768 #[serde(default)]
769 name: String,
770 #[serde(default)]
771 version: String,
772 #[serde(default)]
773 scripts: BTreeMap<String, String>,
774}
775
776impl PackageManifest {
777 fn from_directory(package_dir: &Path, display_name: String) -> Self {
778 Self {
779 name: if display_name.is_empty() {
780 package_dir
781 .file_name()
782 .and_then(std::ffi::OsStr::to_str)
783 .map_or_else(|| "unknown-package".to_string(), ToOwned::to_owned)
784 } else {
785 display_name
786 },
787 version: "0.0.0".to_string(),
788 scripts: BTreeMap::new(),
789 }
790 }
791}
792
793fn find_token(text: &str, token: &str) -> Option<(usize, usize, String)> {
794 let lower = text.to_ascii_lowercase();
795 lower.find(token).map(|index| {
796 let line = text[..index].bytes().filter(|byte| *byte == b'\n').count() + 1;
797 let evidence = text
798 .lines()
799 .nth(line.saturating_sub(1))
800 .map(str::trim)
801 .filter(|line_text| !line_text.is_empty())
802 .map(ToOwned::to_owned)
803 .unwrap_or_else(|| token.to_string());
804 (index, line, evidence)
805 })
806}
807
808struct ResolvedScanTarget {
809 _tempdir: Option<TempDir>,
810 root: PathBuf,
811 display_name: String,
812}
813
814fn resolve_scan_target(path: &Path) -> Result<ResolvedScanTarget> {
815 if path.exists() {
816 if path.is_dir() {
817 return Ok(ResolvedScanTarget {
818 _tempdir: None,
819 root: path.to_path_buf(),
820 display_name: path
821 .file_name()
822 .and_then(std::ffi::OsStr::to_str)
823 .map_or_else(String::new, ToOwned::to_owned),
824 });
825 }
826 if is_npm_tarball(path) {
827 let tempdir = extract_tarball(path)?;
828 let root = locate_package_root(tempdir.path())?;
829 return Ok(ResolvedScanTarget {
830 _tempdir: Some(tempdir),
831 root,
832 display_name: String::new(),
833 });
834 }
835 bail!(
836 "failed to scan {}. Fix: provide a package directory, tarball, or npm package name",
837 path.display()
838 );
839 }
840 download_registry_package(&path.to_string_lossy())
841}
842
843fn is_npm_tarball(path: &Path) -> bool {
844 let path_text = path.to_string_lossy();
845 path_text.ends_with(".tgz") || path_text.ends_with(".tar.gz") || path_text.ends_with(".tar")
846}
847
848fn download_registry_package(spec: &str) -> Result<ResolvedScanTarget> {
849 let (name, version) = split_package_spec(spec);
850 let client = Client::builder()
851 .timeout(std::time::Duration::from_secs(30))
852 .build()
853 .context("failed to build npm registry client")?;
854 let metadata: RegistryPackage = client
855 .get(format!(
856 "https://registry.npmjs.org/{}",
857 urlencoding::encode(&name)
858 ))
859 .header(reqwest::header::USER_AGENT, "jsdet/0.1.0")
860 .send()
861 .with_context(|| format!("failed to fetch npm metadata for {name}"))?
862 .error_for_status()
863 .with_context(|| format!("npm registry returned an error for {name}"))?
864 .json()
865 .with_context(|| format!("failed to parse npm metadata for {name}"))?;
866 let resolved_version = version
867 .or_else(|| metadata.dist_tags.latest.clone())
868 .ok_or_else(|| anyhow::anyhow!("npm package {name} did not expose a resolvable version"))?;
869 let manifest = metadata
870 .versions
871 .get(&resolved_version)
872 .ok_or_else(|| anyhow::anyhow!("npm version {resolved_version} not found for {name}"))?;
873 let tarball_url = manifest.dist.tarball.clone().ok_or_else(|| {
874 anyhow::anyhow!("npm metadata for {name}@{resolved_version} has no tarball URL")
875 })?;
876 let download_dir = tempfile::tempdir().context("failed to create npm download directory")?;
877 let archive_path = download_dir.path().join("package.tgz");
878 let mut response = client
879 .get(&tarball_url)
880 .header(reqwest::header::USER_AGENT, "jsdet/0.1.0")
881 .send()
882 .with_context(|| format!("failed to download npm tarball from {tarball_url}"))?
883 .error_for_status()
884 .with_context(|| format!("npm tarball download failed for {name}@{resolved_version}"))?;
885 let mut file = fs::File::create(&archive_path)
886 .with_context(|| format!("failed to create {}", archive_path.display()))?;
887 io::copy(&mut response, &mut file)
888 .with_context(|| format!("failed to persist {}", archive_path.display()))?;
889 let extracted = extract_tarball(&archive_path)?;
890 let root = locate_package_root(extracted.path())?;
891 Ok(ResolvedScanTarget {
892 _tempdir: Some(extracted),
893 root,
894 display_name: manifest.name.clone().unwrap_or(name),
895 })
896}
897
898fn split_package_spec(spec: &str) -> (String, Option<String>) {
899 if let Some(stripped) = spec.strip_prefix('@') {
900 if let Some(idx) = stripped.rfind('@') {
901 let split_at = idx + 1;
902 return (
903 spec[..split_at].to_string(),
904 Some(spec[split_at + 1..].to_string()).filter(|value| !value.is_empty()),
905 );
906 }
907 return (spec.to_string(), None);
908 }
909 if let Some((name, version)) = spec.rsplit_once('@') {
910 return (
911 name.to_string(),
912 Some(version.to_string()).filter(|value| !value.is_empty()),
913 );
914 }
915 (spec.to_string(), None)
916}
917
918#[derive(Debug, Deserialize)]
919struct RegistryPackage {
920 #[serde(rename = "dist-tags")]
921 dist_tags: RegistryDistTags,
922 #[serde(default)]
923 versions: BTreeMap<String, RegistryVersion>,
924}
925
926#[derive(Debug, Default, Deserialize)]
927struct RegistryDistTags {
928 latest: Option<String>,
929}
930
931#[derive(Debug, Default, Deserialize)]
932struct RegistryVersion {
933 name: Option<String>,
934 dist: RegistryDist,
935}
936
937#[derive(Debug, Default, Deserialize)]
938struct RegistryDist {
939 tarball: Option<String>,
940}
941
942fn extract_tarball(path: &Path) -> Result<TempDir> {
943 let file =
944 fs::File::open(path).with_context(|| format!("failed to read {}", path.display()))?;
945 let tempdir = tempfile::tempdir().context("failed to create extraction directory")?;
946 if matches!(
947 path.extension().and_then(|value| value.to_str()),
948 Some("tgz" | "gz")
949 ) {
950 unpack_tar(GzDecoder::new(file), tempdir.path())?;
951 } else {
952 unpack_tar(file, tempdir.path())?;
953 }
954 Ok(tempdir)
955}
956
957fn unpack_tar(reader: impl io::Read, destination: &Path) -> Result<()> {
958 let mut archive = tar::Archive::new(reader);
959 for entry in archive
960 .entries()
961 .context("failed to enumerate tar entries")?
962 {
963 let mut entry = entry.context("failed to read tar entry")?;
964 let raw_path = entry.path().context("failed to read tar entry path")?;
965 let path = sanitize_archive_path(&raw_path)?;
966 let target = destination.join(path);
967 if let Some(parent) = target.parent() {
968 fs::create_dir_all(parent)
969 .with_context(|| format!("failed to create {}", parent.display()))?;
970 }
971 entry
972 .unpack(&target)
973 .with_context(|| format!("failed to unpack {}", target.display()))?;
974 }
975 Ok(())
976}
977
978fn sanitize_archive_path(path: &Path) -> Result<PathBuf> {
979 let mut cleaned = PathBuf::new();
980 for component in path.components() {
981 match component {
982 Component::Normal(part) => cleaned.push(part),
983 Component::CurDir => {}
984 _ => bail!("archive contains unsafe path: {}", path.display()),
985 }
986 }
987 if cleaned.as_os_str().is_empty() {
988 bail!("archive contains empty path entry");
989 }
990 Ok(cleaned)
991}
992
993fn locate_package_root(extracted_root: &Path) -> Result<PathBuf> {
994 let direct = extracted_root.join("package");
995 if direct.join("package.json").exists() {
996 return Ok(direct);
997 }
998 for entry in fs::read_dir(extracted_root)
999 .with_context(|| format!("failed to read {}", extracted_root.display()))?
1000 {
1001 let entry = entry?;
1002 let path = entry.path();
1003 if path.is_dir() && path.join("package.json").exists() {
1004 return Ok(path);
1005 }
1006 }
1007 bail!("no extracted package root with package.json found")
1008}
1009
1010fn is_sensitive_env_key(key: &str) -> bool {
1011 let lower = key.to_ascii_lowercase();
1012 ["token", "secret", "key", "passwd", "password", "auth"]
1013 .iter()
1014 .any(|needle| lower.contains(needle))
1015}
1016
1017fn is_sensitive_path(path: &str) -> bool {
1018 [
1019 ".npmrc",
1020 ".ssh/id_rsa",
1021 ".ssh/config",
1022 ".aws/credentials",
1023 "/etc/passwd",
1024 "/etc/hostname",
1025 ".git-credentials",
1026 ]
1027 .iter()
1028 .any(|needle| path.contains(needle))
1029}
1030
1031fn is_suspicious_dns_query(host: &str) -> bool {
1032 let lower = host.to_ascii_lowercase();
1033 if lower.len() > 60 {
1034 return true;
1035 }
1036 lower.contains("token")
1037 || lower.contains("secret")
1038 || lower.contains("passwd")
1039 || lower.contains("evil")
1040 || lower
1041 .split('.')
1042 .any(|label| label.len() > 20 && label.chars().all(|ch| ch.is_ascii_alphanumeric()))
1043}
1044
1045fn is_external_target(target: &str) -> bool {
1046 target.starts_with("http://") || target.starts_with("https://")
1047}
1048
1049fn is_suspicious_ip_or_host(target: &str) -> bool {
1050 args_like_host_values(target)
1051 .into_iter()
1052 .any(|host| match host.parse::<IpAddr>() {
1053 Ok(ip) => !(ip.is_loopback() || ip.is_private()),
1054 Err(_) => {
1055 let lower = host.to_ascii_lowercase();
1056 lower.contains("evil")
1057 || lower.contains("attacker")
1058 || lower.ends_with(".example")
1059 || lower.ends_with(".invalid")
1060 }
1061 })
1062}
1063
1064fn looks_like_secret_material(payload: &str) -> bool {
1065 let lower = payload.to_ascii_lowercase();
1066 [
1067 "npm_token",
1068 "github_token",
1069 "secret",
1070 "private key",
1071 "authtoken",
1072 "passwd",
1073 ]
1074 .iter()
1075 .any(|needle| lower.contains(needle))
1076}
1077
1078fn looks_like_memory_leak_pattern(payload: &str) -> bool {
1079 let lower = payload.to_ascii_lowercase();
1080 lower.contains("allocunsafe") || lower.contains("uninitialized")
1081}
1082
1083fn looks_like_reverse_shell(command: &str) -> bool {
1084 let lower = command.to_ascii_lowercase();
1085 (lower.contains("/bin/sh") || lower.contains("/bin/bash") || lower.contains("powershell"))
1086 && (lower.contains("/dev/tcp")
1087 || lower.contains("nc ")
1088 || lower.contains("netcat")
1089 || lower.contains("socket")
1090 || lower.contains("curl "))
1091}
1092
1093fn args_like_host_values(target: &str) -> Vec<String> {
1094 if let Ok(values) = serde_json::from_str::<Vec<serde_json::Value>>(target) {
1095 return values
1096 .into_iter()
1097 .filter_map(|item| item.as_str().map(ToOwned::to_owned))
1098 .collect();
1099 }
1100 vec![target.to_string()]
1101}