use crate::core::{Finding, Severity};
use crate::plugins::traits::{PluginError, PluginReport, ScanContext, ScanPhase, SecurityPlugin};
use async_trait::async_trait;
use lazy_static::lazy_static;
use regex::Regex;
use std::path::Path;
use std::time::Instant;
lazy_static! {
static ref PYTHON_DESER_PATTERNS: Vec<(Regex, &'static str, Severity, &'static str)> = vec![
(
Regex::new(r"(?i)pickle\.(loads?|Unpickler)\s*\(").unwrap(),
"Python pickle deserialization (RCE risk)",
Severity::Critical,
"pickle.load()/loads() executes arbitrary code during deserialization. \
Never unpickle untrusted data. CWE-502.",
),
(
Regex::new(r"(?i)cPickle\.(loads?|Unpickler)\s*\(").unwrap(),
"Python cPickle deserialization (RCE risk)",
Severity::Critical,
"cPickle is the C implementation of pickle and has the same RCE risk. CWE-502.",
),
(
Regex::new(r"(?i)yaml\.load\s*\(").unwrap(),
"Python yaml.load() without SafeLoader (RCE risk)",
Severity::Critical,
"yaml.load() without Loader=SafeLoader can execute arbitrary Python objects. \
Use yaml.safe_load() or yaml.load(data, Loader=SafeLoader). CWE-502.",
),
(
Regex::new(r"(?i)yaml\.unsafe_load\s*\(").unwrap(),
"Python yaml.unsafe_load() (RCE risk)",
Severity::Critical,
"yaml.unsafe_load() explicitly allows arbitrary code execution during YAML parsing. CWE-502.",
),
(
Regex::new(r"(?i)yaml\.full_load\s*\(").unwrap(),
"Python yaml.full_load() (potential RCE)",
Severity::High,
"yaml.full_load() allows more types than safe_load. Use safe_load for untrusted input.",
),
(
Regex::new(r"(?i)marshal\.loads?\s*\(").unwrap(),
"Python marshal deserialization",
Severity::High,
"marshal module is not safe for untrusted data. It can crash the interpreter.",
),
(
Regex::new(r"(?i)shelve\.open\s*\(").unwrap(),
"Python shelve (uses pickle internally)",
Severity::High,
"shelve uses pickle internally and has the same RCE risk for untrusted data. CWE-502.",
),
(
Regex::new(r"(?i)jsonpickle\.(decode|loads?)\s*\(").unwrap(),
"Python jsonpickle deserialization (RCE risk)",
Severity::Critical,
"jsonpickle can execute arbitrary code during deserialization. CWE-502.",
),
];
static ref JAVA_DESER_PATTERNS: Vec<(Regex, &'static str, Severity, &'static str)> = vec![
(
Regex::new(r"ObjectInputStream\s*\(").unwrap(),
"Java ObjectInputStream deserialization (RCE risk)",
Severity::Critical,
"ObjectInputStream.readObject() can trigger arbitrary code execution \
via gadget chains (e.g., Commons Collections, Spring). CWE-502.",
),
(
Regex::new(r"\.readObject\s*\(").unwrap(),
"Java readObject() call",
Severity::High,
"readObject() deserializes Java objects. If the input is untrusted, this enables RCE. CWE-502.",
),
(
Regex::new(r"\.readUnshared\s*\(").unwrap(),
"Java readUnshared() call",
Severity::High,
"readUnshared() is another deserialization entry point with the same risks as readObject().",
),
(
Regex::new(r"XMLDecoder\s*\(").unwrap(),
"Java XMLDecoder deserialization (RCE risk)",
Severity::Critical,
"XMLDecoder can execute arbitrary code during XML deserialization. CWE-502.",
),
(
Regex::new(r"(?i)XStream\s*\(\s*\)").unwrap(),
"Java XStream default constructor (RCE risk)",
Severity::Critical,
"XStream with default settings allows arbitrary code execution. Configure security framework. CWE-502.",
),
(
Regex::new(r"(?i)Runtime\.getRuntime\(\)\.exec\s*\(").unwrap(),
"Java Runtime.exec() call",
Severity::High,
"Direct command execution. Verify this is not reachable from deserialized input.",
),
];
static ref XXE_PATTERNS: Vec<(Regex, &'static str, Severity, &'static str)> = vec![
(
Regex::new(r"(?i)<!ENTITY\s+\w+\s+SYSTEM").unwrap(),
"XML External Entity (XXE) declaration",
Severity::Critical,
"XXE SYSTEM entities can read local files (file://), make SSRF requests, or cause DoS. CWE-611.",
),
(
Regex::new(r"(?i)<!ENTITY\s+%\s+\w+\s+SYSTEM").unwrap(),
"XML parameter entity (XXE/SSRF)",
Severity::Critical,
"Parameter entities can exfiltrate data via out-of-band channels. CWE-611.",
),
(
Regex::new(r"(?i)<!DOCTYPE\s+\w+\s+\[").unwrap(),
"XML inline DTD (potential XXE vector)",
Severity::Medium,
"Inline DTDs can declare external entities. Disable DTD processing for untrusted XML.",
),
(
Regex::new(r"(?i)file:///").unwrap(),
"Local file reference in XML (XXE payload)",
Severity::High,
"file:// URI in XML can read local files via XXE. CWE-611.",
),
(
Regex::new(r"(?i)FEATURE.*disallow-doctype-decl.*false").unwrap(),
"XML parser DTD processing not disabled",
Severity::High,
"Disallow-doctype-decl set to false allows DTD processing and XXE attacks.",
),
];
static ref PHP_DESER_PATTERNS: Vec<(Regex, &'static str, Severity, &'static str)> = vec![
(
Regex::new(r"(?i)unserialize\s*\(").unwrap(),
"PHP unserialize() (object injection risk)",
Severity::Critical,
"unserialize() on untrusted data enables PHP Object Injection via magic methods. CWE-502.",
),
];
static ref RUBY_DESER_PATTERNS: Vec<(Regex, &'static str, Severity, &'static str)> = vec![
(
Regex::new(r"(?i)Marshal\.load\s*\(").unwrap(),
"Ruby Marshal.load() (RCE risk)",
Severity::Critical,
"Marshal.load() on untrusted data can execute arbitrary code. CWE-502.",
),
(
Regex::new(r"(?i)YAML\.load\s*\(").unwrap(),
"Ruby YAML.load() (RCE risk)",
Severity::Critical,
"Ruby YAML.load() can instantiate arbitrary objects. Use YAML.safe_load() instead. CWE-502.",
),
];
static ref DOTNET_DESER_PATTERNS: Vec<(Regex, &'static str, Severity, &'static str)> = vec![
(
Regex::new(r"(?i)BinaryFormatter\s*\(").unwrap(),
".NET BinaryFormatter deserialization (RCE risk)",
Severity::Critical,
"BinaryFormatter is insecure and deprecated. Use System.Text.Json or a safe alternative. CWE-502.",
),
(
Regex::new(r"(?i)TypeNameHandling\s*=\s*TypeNameHandling\.(All|Auto|Objects|Arrays)").unwrap(),
".NET Json.NET insecure TypeNameHandling",
Severity::Critical,
"TypeNameHandling != None allows type instantiation from JSON, enabling RCE. CWE-502.",
),
];
}
pub struct DeserializationScanner;
impl Default for DeserializationScanner {
fn default() -> Self {
Self::new()
}
}
impl DeserializationScanner {
pub fn new() -> Self {
Self
}
fn apply_patterns(
path: &Path,
content: &str,
patterns: &[(Regex, &'static str, Severity, &'static str)],
findings: &mut Vec<Finding>,
) {
for (line_num, line) in content.lines().enumerate() {
for (pattern, title, severity, description) in patterns.iter() {
if pattern.is_match(line) {
if title.contains("yaml.load")
&& (line.contains("SafeLoader") || line.contains("BaseLoader"))
{
continue;
}
findings.push(
Finding::new(
format!("DESER-{:03}", findings.len() + 1),
title.to_string(),
*severity,
)
.with_file(path.to_path_buf())
.with_line((line_num + 1) as u32)
.with_evidence(line.trim().to_string())
.with_description(description.to_string()),
);
}
}
}
}
}
#[async_trait]
impl SecurityPlugin for DeserializationScanner {
fn name(&self) -> &str {
"deserialization"
}
fn version(&self) -> &str {
"0.1.0"
}
fn description(&self) -> &str {
"Detect unsafe deserialization and XXE vulnerabilities"
}
fn scan_phase(&self) -> ScanPhase {
ScanPhase::All
}
async fn initialize(&mut self) -> Result<(), PluginError> {
Ok(())
}
async fn scan(&self, context: &ScanContext<'_>) -> Result<PluginReport, PluginError> {
let start = Instant::now();
let mut report = PluginReport::new(self.name().to_string());
if let Some(content) = context.file_content {
let content_str = String::from_utf8_lossy(content);
let ext = context
.path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
match ext {
"py" | "pyw" => {
Self::apply_patterns(
context.path,
&content_str,
&PYTHON_DESER_PATTERNS,
&mut report.findings,
);
}
"java" | "kt" | "scala" | "groovy" => {
Self::apply_patterns(
context.path,
&content_str,
&JAVA_DESER_PATTERNS,
&mut report.findings,
);
}
"php" | "phtml" => {
Self::apply_patterns(
context.path,
&content_str,
&PHP_DESER_PATTERNS,
&mut report.findings,
);
}
"rb" | "erb" => {
Self::apply_patterns(
context.path,
&content_str,
&RUBY_DESER_PATTERNS,
&mut report.findings,
);
}
"cs" | "vb" => {
Self::apply_patterns(
context.path,
&content_str,
&DOTNET_DESER_PATTERNS,
&mut report.findings,
);
}
_ => {}
}
if ext == "xml"
|| ext == "xsl"
|| ext == "xslt"
|| ext == "svg"
|| ext == "xhtml"
|| content_str.trim_start().starts_with("<?xml")
|| content_str.contains("<!DOCTYPE")
{
Self::apply_patterns(
context.path,
&content_str,
&XXE_PATTERNS,
&mut report.findings,
);
}
if !report.findings.is_empty() {
report.scanned_files = 1;
}
}
report.duration_ms = start.elapsed().as_millis() as u64;
Ok(report)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::plugins::traits::ScanContext;
use std::collections::HashMap;
#[tokio::test]
async fn test_pickle_load() {
let scanner = DeserializationScanner::new();
let content = b"data = pickle.loads(request.data)";
let context = ScanContext {
path: Path::new("app.py"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report.findings.iter().any(|f| f.title.contains("pickle")));
}
#[tokio::test]
async fn test_yaml_load_unsafe() {
let scanner = DeserializationScanner::new();
let content = b"config = yaml.load(open('config.yml'))";
let context = ScanContext {
path: Path::new("app.py"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report
.findings
.iter()
.any(|f| f.title.contains("yaml.load")));
}
#[tokio::test]
async fn test_yaml_safe_load_ok() {
let scanner = DeserializationScanner::new();
let content = b"config = yaml.load(data, Loader=SafeLoader)";
let context = ScanContext {
path: Path::new("app.py"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report
.findings
.iter()
.all(|f| !f.title.contains("yaml.load")));
}
#[tokio::test]
async fn test_xxe_detection() {
let scanner = DeserializationScanner::new();
let content =
br#"<?xml version="1.0"?><!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]>"#;
let context = ScanContext {
path: Path::new("payload.xml"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report.findings.iter().any(|f| f.title.contains("XXE")));
}
#[tokio::test]
async fn test_java_object_input_stream() {
let scanner = DeserializationScanner::new();
let content = b"ObjectInputStream ois = new ObjectInputStream(socket.getInputStream());";
let context = ScanContext {
path: Path::new("Server.java"),
scan_phase: ScanPhase::PostExtract,
file_content: Some(content),
metadata: HashMap::new(),
};
let report = scanner.scan(&context).await.unwrap();
assert!(report
.findings
.iter()
.any(|f| f.title.contains("ObjectInputStream")));
}
}