use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::time::Duration;
use anyhow::{anyhow, Context};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::Serialize;
use crate::engine::{Adjustments, Engine, Scope, Severity};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Target {
LocalPath(PathBuf),
Github(String),
Npm(String),
}
impl Target {
pub fn parse(s: &str) -> anyhow::Result<Self> {
if let Some(pkg) = s.strip_prefix("npm:") {
return Ok(Target::Npm(pkg.to_string()));
}
if s.starts_with("https://github.com/") || s.starts_with("git@github.com:") {
return Ok(Target::Github(s.to_string()));
}
if s.starts_with("http://") || s.starts_with("https://") {
anyhow::bail!("only GitHub URLs are supported for --scan (got '{s}')");
}
let p = PathBuf::from(s);
if p.exists() {
return Ok(Target::LocalPath(p));
}
if s.starts_with('.') || s.starts_with('/') || s.starts_with('~') {
anyhow::bail!("--scan path '{s}' does not exist");
}
Ok(Target::Npm(s.to_string()))
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Finding {
pub pass: &'static str, pub id: String,
pub severity: Severity,
pub detail: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub location: Option<String>, }
#[derive(Debug, Serialize)]
pub struct Report {
pub target: String,
pub findings: Vec<Finding>,
pub passes_run: Vec<&'static str>,
pub passes_skipped: Vec<(&'static str, String)>,
pub verdict: Verdict,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum Verdict {
Pass,
Caution,
Fail,
}
impl Report {
fn finalize(&mut self) {
let worst = self.findings.iter().map(|f| f.severity).max();
self.verdict = match worst {
Some(Severity::Critical) | Some(Severity::High) => Verdict::Fail,
Some(Severity::Medium) => Verdict::Caution,
Some(Severity::Low) | None => Verdict::Pass,
};
}
pub fn exit_code(&self) -> i32 {
match self.verdict {
Verdict::Pass => 0,
Verdict::Caution => 1,
Verdict::Fail => 2,
}
}
pub fn render_text(&self) -> String {
let mut out = String::new();
out.push_str(&format!("scan target: {}\n", self.target));
out.push_str(&format!(
"passes: {}{}\n",
self.passes_run.join(", "),
if self.passes_skipped.is_empty() {
String::new()
} else {
format!(
" (skipped: {})",
self.passes_skipped
.iter()
.map(|(p, why)| format!("{p} -- {why}"))
.collect::<Vec<_>>()
.join("; ")
)
}
));
if self.findings.is_empty() {
out.push_str("findings: none\n");
} else {
out.push_str(&format!("findings: {}\n", self.findings.len()));
for f in &self.findings {
out.push_str(&format!(
" [{:?}] {} ({}): {}{}\n",
f.severity,
f.id,
f.pass,
f.detail,
f.location
.as_ref()
.map(|l| format!(" @ {l}"))
.unwrap_or_default()
));
}
}
out.push_str(&format!("verdict: {:?}\n", self.verdict));
out
}
}
struct StaticSig {
id: &'static str,
severity: Severity,
detail: &'static str,
exts: &'static [&'static str],
re: &'static str,
}
const JS: &[&str] = &["js", "mjs", "cjs", "ts", "mts", "cts", "jsx", "tsx"];
const PY: &[&str] = &["py"];
const ANY: &[&str] = &[];
static STATIC_SIGS: &[StaticSig] = &[
StaticSig { id: "scan.static.ssh_key_read", severity: Severity::Critical, exts: ANY,
detail: "reads SSH private key material",
re: r#"(?i)[~$./\\A-Za-z_]*\.ssh[/\\](id_[a-z0-9]+|authorized_keys|known_hosts)"# },
StaticSig { id: "scan.static.cloud_creds_read", severity: Severity::Critical, exts: ANY,
detail: "reads cloud credential files",
re: r#"(?i)\.(aws[/\\]credentials|kube[/\\]config|gnupg|netrc|docker[/\\]config\.json)"# },
StaticSig { id: "scan.static.browser_secrets", severity: Severity::Critical, exts: ANY,
detail: "touches browser credential / cookie stores",
re: r#"(?i)(Login Data|Cookies|Local State)['"].{0,40}(Chrome|Chromium|Brave|Edge)|keychain-db"# },
StaticSig { id: "scan.static.env_exfil_js", severity: Severity::High, exts: JS,
detail: "serializes the entire process environment (pair with any network call = exfil)",
re: r#"JSON\.stringify\(\s*process\.env\s*\)|Object\.(entries|keys)\(\s*process\.env\s*\)"# },
StaticSig { id: "scan.static.env_exfil_py", severity: Severity::High, exts: PY,
detail: "serializes the entire process environment",
re: r#"(json\.dumps|str)\(\s*(dict\(\s*)?os\.environ"# },
StaticSig { id: "scan.static.dynamic_eval_js", severity: Severity::High, exts: JS,
detail: "dynamic code execution (eval / new Function)",
re: r#"\beval\s*\(\s*[^'")\s]|new\s+Function\s*\("# },
StaticSig { id: "scan.static.child_process_js", severity: Severity::Medium, exts: JS,
detail: "spawns shell subprocesses (child_process)",
re: r#"require\(\s*['"]child_process['"]\s*\)|from\s+['"](node:)?child_process['"]"# },
StaticSig { id: "scan.static.dynamic_require", severity: Severity::High, exts: JS,
detail: "dynamic require/import of a computed module path",
re: r#"require\s*\(\s*[A-Za-z_$][\w$]*(\[|\.|\+| )|import\s*\(\s*[A-Za-z_$][\w$]*[\s+\[]"# },
StaticSig { id: "scan.static.dynamic_exec_py", severity: Severity::High, exts: PY,
detail: "dynamic code execution (exec/eval on non-literal)",
re: r#"\b(exec|eval)\s*\(\s*[A-Za-z_]"# },
StaticSig { id: "scan.static.shell_true_py", severity: Severity::Medium, exts: PY,
detail: "subprocess with shell=True",
re: r#"subprocess\.[A-Za-z_]+\([^)]*shell\s*=\s*True"# },
StaticSig { id: "scan.static.b64_exec", severity: Severity::Critical, exts: ANY,
detail: "decodes base64 then executes it",
re: r#"(?i)(eval|exec|Function|spawn|system)\s*\(\s*[^)]{0,60}(atob|b64decode|from(?:_base64)?\s*\(\s*[^)]{0,40}['"]base64)"# },
StaticSig { id: "scan.static.charcode_assembly", severity: Severity::High, exts: JS,
detail: "assembles strings from character codes (classic obfuscation)",
re: r#"String\.fromCharCode\s*\((\s*\d+\s*,){8,}"# },
StaticSig { id: "scan.static.hex_blob_decode", severity: Severity::Medium, exts: ANY,
detail: "decodes a large embedded hex/base64 blob at runtime",
re: r#"(?i)(atob|b64decode|fromhex|Buffer\.from)\s*\(\s*['"][A-Za-z0-9+/=]{200,}"# },
StaticSig { id: "scan.static.install_script", severity: Severity::Medium, exts: &["json"],
detail: "package.json declares an install-time script hook",
re: r#""(pre|post)?install"\s*:"# },
];
static COMPILED_SIGS: Lazy<Vec<(usize, Regex)>> = Lazy::new(|| {
STATIC_SIGS
.iter()
.enumerate()
.map(|(i, s)| (i, Regex::new(s.re).expect("static scan signature must compile")))
.collect()
});
const MAX_FILE_BYTES: u64 = 2_000_000;
const SKIP_DIRS: &[&str] = &["node_modules", ".git", "dist", "build", "target", "__pycache__", ".venv", "venv"];
fn walk(root: &Path, files: &mut Vec<PathBuf>) {
let Ok(entries) = std::fs::read_dir(root) else { return };
for e in entries.flatten() {
let p = e.path();
let name = e.file_name().to_string_lossy().to_string();
if p.is_dir() {
if !SKIP_DIRS.contains(&name.as_str()) && !name.starts_with('.') {
walk(&p, files);
}
} else if p.is_file() {
files.push(p);
}
}
}
pub fn static_scan(root: &Path) -> Vec<Finding> {
let mut files = Vec::new();
walk(root, &mut files);
let mut findings = Vec::new();
let mut per_sig: BTreeMap<&'static str, usize> = BTreeMap::new();
for f in files {
let ext = f.extension().and_then(|e| e.to_str()).unwrap_or("").to_ascii_lowercase();
if let Ok(meta) = f.metadata() {
if meta.len() > MAX_FILE_BYTES {
continue;
}
}
let Ok(content) = std::fs::read_to_string(&f) else { continue };
for (i, re) in COMPILED_SIGS.iter() {
let sig = &STATIC_SIGS[*i];
if !sig.exts.is_empty() && !sig.exts.contains(&ext.as_str()) {
continue;
}
if sig.id == "scan.static.install_script"
&& f.file_name().and_then(|n| n.to_str()) != Some("package.json")
{
continue;
}
if let Some(m) = re.find(&content) {
let count = per_sig.entry(sig.id).or_insert(0);
*count += 1;
if *count > 5 {
continue;
}
let line = content[..m.start()].matches('\n').count() + 1;
findings.push(Finding {
pass: "static",
id: sig.id.to_string(),
severity: sig.severity,
detail: sig.detail.to_string(),
location: Some(format!("{}:{}", f.display(), line)),
});
}
}
}
findings
}
const YOUNG_PACKAGE_DAYS: i64 = 30;
const LOW_DOWNLOADS_WEEKLY: u64 = 50;
pub async fn npm_metadata_scan(pkg: &str) -> anyhow::Result<Vec<Finding>> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.user_agent("aperion-shield-scan")
.build()?;
let mut findings = Vec::new();
let meta: serde_json::Value = client
.get(format!("https://registry.npmjs.org/{}", pkg))
.send()
.await?
.error_for_status()
.context("npm registry lookup failed")?
.json()
.await?;
if let Some(created) = meta
.pointer("/time/created")
.and_then(|v| v.as_str())
.and_then(|s| chrono_lite_days_since(s))
{
if created < YOUNG_PACKAGE_DAYS {
findings.push(Finding {
pass: "metadata",
id: "scan.meta.young_package".into(),
severity: Severity::Medium,
detail: format!("package is only {created} days old"),
location: None,
});
}
}
let maintainers = meta
.get("maintainers")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0);
if maintainers <= 1 {
findings.push(Finding {
pass: "metadata",
id: "scan.meta.single_maintainer".into(),
severity: Severity::Low,
detail: format!("{maintainers} maintainer(s) on npm"),
location: None,
});
}
if let Ok(resp) = client
.get(format!("https://api.npmjs.org/downloads/point/last-week/{}", pkg))
.send()
.await
{
if let Ok(dl) = resp.json::<serde_json::Value>().await {
if let Some(n) = dl.get("downloads").and_then(|v| v.as_u64()) {
if n < LOW_DOWNLOADS_WEEKLY {
findings.push(Finding {
pass: "metadata",
id: "scan.meta.low_adoption".into(),
severity: Severity::Low,
detail: format!("{n} downloads in the last week"),
location: None,
});
}
}
}
}
let osv: serde_json::Value = client
.post("https://api.osv.dev/v1/query")
.json(&serde_json::json!({
"package": {"name": pkg, "ecosystem": "npm"}
}))
.send()
.await?
.json()
.await
.unwrap_or_else(|_| serde_json::json!({}));
if let Some(vulns) = osv.get("vulns").and_then(|v| v.as_array()) {
for v in vulns.iter().take(5) {
let id = v.get("id").and_then(|x| x.as_str()).unwrap_or("OSV-unknown");
let summary = v.get("summary").and_then(|x| x.as_str()).unwrap_or("");
findings.push(Finding {
pass: "metadata",
id: "scan.meta.known_vuln".into(),
severity: Severity::High,
detail: format!("{id}: {summary}"),
location: None,
});
}
}
Ok(findings)
}
fn chrono_lite_days_since(rfc3339: &str) -> Option<i64> {
let date = rfc3339.split('T').next()?;
let mut it = date.split('-');
let (y, m, d): (i64, i64, i64) = (
it.next()?.parse().ok()?,
it.next()?.parse().ok()?,
it.next()?.parse().ok()?,
);
let civil = |y: i64, m: i64, d: i64| -> i64 {
let y = if m <= 2 { y - 1 } else { y };
let era = if y >= 0 { y } else { y - 399 } / 400;
let yoe = y - era * 400;
let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
era * 146097 + doe - 719468
};
let now_days = (std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.ok()?
.as_secs()
/ 86400) as i64;
Some(now_days - civil(y, m, d))
}
pub async fn catalog_audit(launch: &[String], engine: &Engine) -> anyhow::Result<Vec<Finding>> {
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
let (program, args) = launch
.split_first()
.ok_or_else(|| anyhow!("empty launch command for catalog audit"))?;
let mut child = tokio::process::Command::new(program)
.args(args)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.kill_on_drop(true)
.spawn()
.with_context(|| format!("failed to launch '{program}' for catalog audit"))?;
let mut stdin = child.stdin.take().ok_or_else(|| anyhow!("no child stdin"))?;
let stdout = child.stdout.take().ok_or_else(|| anyhow!("no child stdout"))?;
let mut lines = BufReader::new(stdout).lines();
let send = |frame: serde_json::Value| {
let mut s = frame.to_string();
s.push('\n');
s
};
stdin
.write_all(
send(serde_json::json!({
"jsonrpc": "2.0", "id": 1, "method": "initialize",
"params": {"protocolVersion": "2025-03-26", "capabilities": {},
"clientInfo": {"name": "aperion-shield-scan", "version": env!("CARGO_PKG_VERSION")}}
}))
.as_bytes(),
)
.await?;
stdin
.write_all(send(serde_json::json!({"jsonrpc": "2.0", "method": "notifications/initialized"})).as_bytes())
.await?;
stdin
.write_all(send(serde_json::json!({"jsonrpc": "2.0", "id": 2, "method": "tools/list"})).as_bytes())
.await?;
stdin.flush().await?;
let tools = tokio::time::timeout(Duration::from_secs(20), async {
while let Some(line) = lines.next_line().await? {
let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) else { continue };
if v.get("id").and_then(|i| i.as_i64()) == Some(2) {
return Ok::<_, anyhow::Error>(
v.pointer("/result/tools").cloned().unwrap_or(serde_json::json!([])),
);
}
}
anyhow::bail!("upstream closed stdout before answering tools/list")
})
.await
.context("timed out waiting for tools/list (20s)")??;
let _ = child.kill().await;
let mut findings = Vec::new();
let empty = vec![];
let tool_list = tools.as_array().unwrap_or(&empty);
for t in tool_list {
let name = t.get("name").and_then(|v| v.as_str()).unwrap_or("<unnamed>");
let desc = t.get("description").and_then(|v| v.as_str()).unwrap_or("");
let schema = t.get("inputSchema").map(|s| s.to_string()).unwrap_or_default();
let surface = format!("{desc}\n{schema}");
let eval = engine.evaluate_scoped_text(
Scope::ToolDescription,
Some(name),
&surface,
Adjustments::default(),
);
for m in eval.matches {
findings.push(Finding {
pass: "catalog",
id: m.rule_id,
severity: m.severity,
detail: format!("tool '{name}': {}", m.reason),
location: None,
});
}
}
if tool_list.is_empty() {
findings.push(Finding {
pass: "catalog",
id: "scan.catalog.empty".into(),
severity: Severity::Low,
detail: "server advertised zero tools (nothing to audit; suspicious for an MCP server)".into(),
location: None,
});
}
Ok(findings)
}
pub struct ScanOptions {
pub target: String,
pub launch: Vec<String>,
pub offline: bool,
}
pub fn fetch_target(target: &Target, workdir: &Path) -> anyhow::Result<PathBuf> {
match target {
Target::LocalPath(p) => Ok(p.clone()),
Target::Github(url) => {
let dst = workdir.join("repo");
let out = std::process::Command::new("git")
.args(["clone", "--depth", "1", url])
.arg(&dst)
.output()
.context("running git clone")?;
if !out.status.success() {
anyhow::bail!("git clone failed: {}", String::from_utf8_lossy(&out.stderr));
}
Ok(dst)
}
Target::Npm(pkg) => {
let out = std::process::Command::new("npm")
.args(["pack", pkg, "--silent"])
.current_dir(workdir)
.output()
.context("running npm pack (is npm installed?)")?;
if !out.status.success() {
anyhow::bail!("npm pack failed: {}", String::from_utf8_lossy(&out.stderr));
}
let tarball = String::from_utf8_lossy(&out.stdout).trim().lines().last().map(str::to_string)
.ok_or_else(|| anyhow!("npm pack produced no tarball name"))?;
let tar_out = std::process::Command::new("tar")
.args(["xzf", &tarball])
.current_dir(workdir)
.output()
.context("extracting npm tarball")?;
if !tar_out.status.success() {
anyhow::bail!("tar extract failed: {}", String::from_utf8_lossy(&tar_out.stderr));
}
Ok(workdir.join("package"))
}
}
}
pub async fn run_scan(opts: &ScanOptions, engine: &Engine) -> anyhow::Result<Report> {
let target = Target::parse(&opts.target)?;
let mut report = Report {
target: opts.target.clone(),
findings: Vec::new(),
passes_run: Vec::new(),
passes_skipped: Vec::new(),
verdict: Verdict::Pass,
};
let tmp = tempfile::tempdir().context("creating scan workdir")?;
let root = fetch_target(&target, tmp.path())?;
report.findings.extend(static_scan(&root));
report.passes_run.push("static");
if opts.offline {
report.passes_skipped.push(("metadata", "--scan-offline".into()));
} else if let Target::Npm(pkg) = &target {
match npm_metadata_scan(pkg).await {
Ok(f) => {
report.findings.extend(f);
report.passes_run.push("metadata");
}
Err(e) => report.passes_skipped.push(("metadata", format!("{e:#}"))),
}
} else {
report
.passes_skipped
.push(("metadata", "only npm targets have registry metadata today".into()));
}
if opts.launch.is_empty() {
report.passes_skipped.push((
"catalog",
"no launch command given (append `-- <cmd...>` to run the live catalog audit)".into(),
));
} else {
match catalog_audit(&opts.launch, engine).await {
Ok(f) => {
report.findings.extend(f);
report.passes_run.push("catalog");
}
Err(e) => report.passes_skipped.push(("catalog", format!("{e:#}"))),
}
}
report.finalize();
Ok(report)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn all_static_signatures_compile() {
assert_eq!(COMPILED_SIGS.len(), STATIC_SIGS.len());
}
#[test]
fn target_parsing() {
assert_eq!(Target::parse("npm:foo").unwrap(), Target::Npm("foo".into()));
assert_eq!(
Target::parse("https://github.com/o/r").unwrap(),
Target::Github("https://github.com/o/r".into())
);
assert_eq!(Target::parse(".").unwrap(), Target::LocalPath(".".into()));
assert_eq!(Target::parse("some-package").unwrap(), Target::Npm("some-package".into()));
assert!(Target::parse("./does-not-exist-xyz").is_err());
assert!(Target::parse("https://gitlab.com/o/r").is_err());
}
fn scan_str(name: &str, content: &str) -> Vec<String> {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join(name), content).unwrap();
static_scan(dir.path()).into_iter().map(|f| f.id).collect()
}
#[test]
fn static_scan_catches_ssh_read() {
let ids = scan_str("index.js", r#"const k = fs.readFileSync(home + "/.ssh/id_rsa");"#);
assert!(ids.contains(&"scan.static.ssh_key_read".to_string()), "{ids:?}");
}
#[test]
fn static_scan_catches_env_exfil() {
let ids = scan_str("x.js", "fetch(url, {body: JSON.stringify(process.env)})");
assert!(ids.contains(&"scan.static.env_exfil_js".to_string()), "{ids:?}");
}
#[test]
fn static_scan_catches_b64_exec() {
let ids = scan_str("x.js", "eval(atob(payload))");
assert!(ids.contains(&"scan.static.b64_exec".to_string()), "{ids:?}");
}
#[test]
fn static_scan_install_hook_only_in_package_json() {
let ids = scan_str("package.json", r#"{"scripts": {"postinstall": "node evil.js"}}"#);
assert!(ids.contains(&"scan.static.install_script".to_string()), "{ids:?}");
let ids = scan_str("README.json", r#"{"scripts": {"postinstall": "node evil.js"}}"#);
assert!(!ids.contains(&"scan.static.install_script".to_string()), "{ids:?}");
}
#[test]
fn benign_source_is_clean() {
let ids = scan_str(
"server.js",
r#"
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
const server = new Server({ name: "weather", version: "1.0.0" });
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [{ name: "get_forecast", description: "Get weather forecast for a city" }],
}));
"#,
);
assert!(ids.is_empty(), "{ids:?}");
}
#[test]
fn days_since_parses_rfc3339() {
let d = chrono_lite_days_since("2020-01-01T00:00:00.000Z").unwrap();
assert!(d > 2000, "{d}");
let recent = chrono_lite_days_since("2099-01-01T00:00:00Z").unwrap();
assert!(recent < 0);
}
#[test]
fn verdict_mapping() {
let mut r = Report {
target: "t".into(), findings: vec![], passes_run: vec![],
passes_skipped: vec![], verdict: Verdict::Pass,
};
r.finalize();
assert_eq!(r.verdict, Verdict::Pass);
r.findings.push(Finding {
pass: "static", id: "x".into(), severity: Severity::Medium,
detail: "".into(), location: None,
});
r.finalize();
assert_eq!(r.verdict, Verdict::Caution);
assert_eq!(r.exit_code(), 1);
r.findings.push(Finding {
pass: "static", id: "y".into(), severity: Severity::Critical,
detail: "".into(), location: None,
});
r.finalize();
assert_eq!(r.verdict, Verdict::Fail);
assert_eq!(r.exit_code(), 2);
}
}