use std::path::Path;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Node, Query, QueryCursor, Tree};
use crate::callgraph::scanner::{ProjectScanner, ScanConfig};
use crate::error::{Result, BrrrError};
use crate::lang::LanguageRegistry;
use crate::util::format_query_error;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Info,
Low,
Medium,
High,
Critical,
}
impl std::fmt::Display for Severity {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Info => write!(f, "INFO"),
Self::Low => write!(f, "LOW"),
Self::Medium => write!(f, "MEDIUM"),
Self::High => write!(f, "HIGH"),
Self::Critical => write!(f, "CRITICAL"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
Low,
Medium,
High,
}
impl std::fmt::Display for Confidence {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Low => write!(f, "LOW"),
Self::Medium => write!(f, "MEDIUM"),
Self::High => write!(f, "HIGH"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DeserializationMethod {
Pickle,
YamlUnsafe,
Marshal,
Shelve,
Dill,
JsonPickle,
JavaObjectStream,
XmlDecoder,
XStream,
Kryo,
Hessian,
RubyMarshal,
RubyYaml,
PhpUnserialize,
JsEval,
JsFunction,
NodeSerialize,
JsYaml,
Other,
}
impl std::fmt::Display for DeserializationMethod {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let name = match self {
Self::Pickle => "pickle",
Self::YamlUnsafe => "yaml.load (unsafe)",
Self::Marshal => "marshal",
Self::Shelve => "shelve",
Self::Dill => "dill",
Self::JsonPickle => "jsonpickle",
Self::JavaObjectStream => "ObjectInputStream",
Self::XmlDecoder => "XMLDecoder",
Self::XStream => "XStream",
Self::Kryo => "Kryo",
Self::Hessian => "Hessian",
Self::RubyMarshal => "Marshal.load",
Self::RubyYaml => "YAML.load",
Self::PhpUnserialize => "unserialize",
Self::JsEval => "eval",
Self::JsFunction => "Function constructor",
Self::NodeSerialize => "node-serialize",
Self::JsYaml => "js-yaml",
Self::Other => "deserialization",
};
write!(f, "{}", name)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InputSource {
HttpRequestBody,
HttpRequestParams,
FileUpload,
FileRead,
Database,
NetworkSocket,
MessageQueue,
EnvVar,
CmdLineArg,
StdIn,
InternalCache,
ConfigFile,
Unknown,
}
impl std::fmt::Display for InputSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let name = match self {
Self::HttpRequestBody => "HTTP request body",
Self::HttpRequestParams => "HTTP request parameters",
Self::FileUpload => "file upload",
Self::FileRead => "file read",
Self::Database => "database",
Self::NetworkSocket => "network socket",
Self::MessageQueue => "message queue",
Self::EnvVar => "environment variable",
Self::CmdLineArg => "command line argument",
Self::StdIn => "standard input",
Self::InternalCache => "internal cache",
Self::ConfigFile => "configuration file",
Self::Unknown => "unknown source",
};
write!(f, "{}", name)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SourceLocation {
pub file: String,
pub line: usize,
pub column: usize,
pub end_line: usize,
pub end_column: usize,
}
impl std::fmt::Display for SourceLocation {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}:{}", self.file, self.line, self.column)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeserializationFinding {
pub location: SourceLocation,
pub method: DeserializationMethod,
pub input_source: InputSource,
pub severity: Severity,
pub confidence: Confidence,
pub sink_function: String,
pub deserialized_data: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub code_snippet: Option<String>,
pub description: String,
pub remediation: String,
#[serde(skip_serializing_if = "std::ops::Not::not")]
pub possibly_safe: bool,
}
struct DeserializationSink {
language: &'static str,
module: Option<&'static str>,
function: &'static str,
method: DeserializationMethod,
severity: Severity,
unsafe_by_default: bool,
safe_alternative: Option<&'static str>,
description: &'static str,
}
const DESERIALIZATION_SINKS: &[DeserializationSink] = &[
DeserializationSink {
language: "python",
module: Some("pickle"),
function: "load",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Pickle can execute arbitrary code during deserialization via __reduce__",
},
DeserializationSink {
language: "python",
module: Some("pickle"),
function: "loads",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Pickle can execute arbitrary code during deserialization via __reduce__",
},
DeserializationSink {
language: "python",
module: Some("_pickle"),
function: "load",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "C implementation of pickle - same risks as pickle",
},
DeserializationSink {
language: "python",
module: Some("_pickle"),
function: "loads",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "C implementation of pickle - same risks as pickle",
},
DeserializationSink {
language: "python",
module: Some("cPickle"),
function: "load",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Legacy C pickle (Python 2) - same risks as pickle",
},
DeserializationSink {
language: "python",
module: Some("cPickle"),
function: "loads",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Legacy C pickle (Python 2) - same risks as pickle",
},
DeserializationSink {
language: "python",
module: Some("yaml"),
function: "load",
method: DeserializationMethod::YamlUnsafe,
severity: Severity::Critical,
unsafe_by_default: false, safe_alternative: Some("yaml.safe_load()"),
description: "yaml.load() can instantiate arbitrary objects via YAML tags",
},
DeserializationSink {
language: "python",
module: Some("yaml"),
function: "unsafe_load",
method: DeserializationMethod::YamlUnsafe,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("yaml.safe_load()"),
description: "Explicitly unsafe YAML loading - can execute arbitrary code",
},
DeserializationSink {
language: "python",
module: Some("yaml"),
function: "full_load",
method: DeserializationMethod::YamlUnsafe,
severity: Severity::High,
unsafe_by_default: true,
safe_alternative: Some("yaml.safe_load()"),
description: "full_load allows Python object instantiation",
},
DeserializationSink {
language: "python",
module: Some("yaml"),
function: "load_all",
method: DeserializationMethod::YamlUnsafe,
severity: Severity::Critical,
unsafe_by_default: false, safe_alternative: Some("yaml.safe_load_all()"),
description: "yaml.load_all() can instantiate arbitrary objects",
},
DeserializationSink {
language: "python",
module: Some("marshal"),
function: "load",
method: DeserializationMethod::Marshal,
severity: Severity::High,
unsafe_by_default: true,
safe_alternative: None,
description: "Marshal can execute code with malformed data",
},
DeserializationSink {
language: "python",
module: Some("marshal"),
function: "loads",
method: DeserializationMethod::Marshal,
severity: Severity::High,
unsafe_by_default: true,
safe_alternative: None,
description: "Marshal can execute code with malformed data",
},
DeserializationSink {
language: "python",
module: Some("shelve"),
function: "open",
method: DeserializationMethod::Shelve,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Shelve uses pickle internally - same RCE risks",
},
DeserializationSink {
language: "python",
module: Some("dill"),
function: "load",
method: DeserializationMethod::Dill,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Dill extends pickle - can serialize/deserialize more object types",
},
DeserializationSink {
language: "python",
module: Some("dill"),
function: "loads",
method: DeserializationMethod::Dill,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Dill extends pickle - can serialize/deserialize more object types",
},
DeserializationSink {
language: "python",
module: Some("cloudpickle"),
function: "load",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Cloudpickle extends pickle for distributed computing",
},
DeserializationSink {
language: "python",
module: Some("cloudpickle"),
function: "loads",
method: DeserializationMethod::Pickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: None,
description: "Cloudpickle extends pickle for distributed computing",
},
DeserializationSink {
language: "python",
module: Some("jsonpickle"),
function: "decode",
method: DeserializationMethod::JsonPickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("json.loads()"),
description: "jsonpickle can instantiate arbitrary Python objects",
},
DeserializationSink {
language: "python",
module: Some("jsonpickle"),
function: "loads",
method: DeserializationMethod::JsonPickle,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("json.loads()"),
description: "jsonpickle can instantiate arbitrary Python objects",
},
DeserializationSink {
language: "java",
module: Some("ObjectInputStream"),
function: "readObject",
method: DeserializationMethod::JavaObjectStream,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON/Jackson with type whitelisting"),
description: "Java deserialization can trigger gadget chains leading to RCE",
},
DeserializationSink {
language: "java",
module: Some("ObjectInputStream"),
function: "readUnshared",
method: DeserializationMethod::JavaObjectStream,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON/Jackson with type whitelisting"),
description: "Same risks as readObject",
},
DeserializationSink {
language: "java",
module: Some("XMLDecoder"),
function: "readObject",
method: DeserializationMethod::XmlDecoder,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("Jackson XML with type whitelisting"),
description: "XMLDecoder can instantiate arbitrary classes",
},
DeserializationSink {
language: "java",
module: Some("XStream"),
function: "fromXML",
method: DeserializationMethod::XStream,
severity: Severity::Critical,
unsafe_by_default: false, safe_alternative: Some("XStream with allowTypesByWildcard/security framework"),
description: "XStream can trigger gadget chains without proper type restrictions",
},
DeserializationSink {
language: "java",
module: Some("Kryo"),
function: "readObject",
method: DeserializationMethod::Kryo,
severity: Severity::High,
unsafe_by_default: false,
safe_alternative: Some("Kryo with setRegistrationRequired(true)"),
description: "Kryo is safer but can still be dangerous without registration",
},
DeserializationSink {
language: "java",
module: Some("Kryo"),
function: "readClassAndObject",
method: DeserializationMethod::Kryo,
severity: Severity::High,
unsafe_by_default: true,
safe_alternative: Some("Kryo with setRegistrationRequired(true)"),
description: "Reading class info from untrusted input is dangerous",
},
DeserializationSink {
language: "java",
module: Some("Hessian"),
function: "readObject",
method: DeserializationMethod::Hessian,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON serialization"),
description: "Hessian can trigger gadget chains",
},
DeserializationSink {
language: "java",
module: Some("HessianInput"),
function: "readObject",
method: DeserializationMethod::Hessian,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON serialization"),
description: "Hessian can trigger gadget chains",
},
DeserializationSink {
language: "ruby",
module: Some("Marshal"),
function: "load",
method: DeserializationMethod::RubyMarshal,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "Marshal.load can execute arbitrary code",
},
DeserializationSink {
language: "ruby",
module: Some("Marshal"),
function: "restore",
method: DeserializationMethod::RubyMarshal,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "Marshal.restore is an alias for load",
},
DeserializationSink {
language: "ruby",
module: Some("YAML"),
function: "load",
method: DeserializationMethod::RubyYaml,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("YAML.safe_load()"),
description: "YAML.load can instantiate arbitrary Ruby objects",
},
DeserializationSink {
language: "ruby",
module: Some("YAML"),
function: "unsafe_load",
method: DeserializationMethod::RubyYaml,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("YAML.safe_load()"),
description: "Explicitly unsafe YAML loading",
},
DeserializationSink {
language: "ruby",
module: Some("Psych"),
function: "load",
method: DeserializationMethod::RubyYaml,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("Psych.safe_load()"),
description: "Psych is the YAML parser - same risks as YAML.load",
},
DeserializationSink {
language: "ruby",
module: Some("Psych"),
function: "unsafe_load",
method: DeserializationMethod::RubyYaml,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("Psych.safe_load()"),
description: "Explicitly unsafe Psych loading",
},
DeserializationSink {
language: "php",
module: None,
function: "unserialize",
method: DeserializationMethod::PhpUnserialize,
severity: Severity::Critical,
unsafe_by_default: false, safe_alternative: Some("json_decode()"),
description: "PHP unserialize can trigger magic methods (__wakeup, __destruct)",
},
DeserializationSink {
language: "typescript",
module: None,
function: "eval",
method: DeserializationMethod::JsEval,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "eval executes arbitrary JavaScript code",
},
DeserializationSink {
language: "javascript",
module: None,
function: "eval",
method: DeserializationMethod::JsEval,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "eval executes arbitrary JavaScript code",
},
DeserializationSink {
language: "typescript",
module: None,
function: "Function",
method: DeserializationMethod::JsFunction,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "Function constructor can execute arbitrary code",
},
DeserializationSink {
language: "javascript",
module: None,
function: "Function",
method: DeserializationMethod::JsFunction,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "Function constructor can execute arbitrary code",
},
DeserializationSink {
language: "typescript",
module: Some("serialize-javascript"),
function: "deserialize",
method: DeserializationMethod::NodeSerialize,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "serialize-javascript uses eval internally",
},
DeserializationSink {
language: "javascript",
module: Some("serialize-javascript"),
function: "deserialize",
method: DeserializationMethod::NodeSerialize,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "serialize-javascript uses eval internally",
},
DeserializationSink {
language: "typescript",
module: Some("node-serialize"),
function: "unserialize",
method: DeserializationMethod::NodeSerialize,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "node-serialize is notorious for RCE vulnerabilities",
},
DeserializationSink {
language: "javascript",
module: Some("node-serialize"),
function: "unserialize",
method: DeserializationMethod::NodeSerialize,
severity: Severity::Critical,
unsafe_by_default: true,
safe_alternative: Some("JSON.parse()"),
description: "node-serialize is notorious for RCE vulnerabilities",
},
DeserializationSink {
language: "typescript",
module: Some("js-yaml"),
function: "load",
method: DeserializationMethod::JsYaml,
severity: Severity::High,
unsafe_by_default: false, safe_alternative: Some("js-yaml with JSON_SCHEMA or FAILSAFE_SCHEMA"),
description: "js-yaml can instantiate objects with custom types",
},
DeserializationSink {
language: "javascript",
module: Some("js-yaml"),
function: "load",
method: DeserializationMethod::JsYaml,
severity: Severity::High,
unsafe_by_default: false,
safe_alternative: Some("js-yaml with JSON_SCHEMA or FAILSAFE_SCHEMA"),
description: "js-yaml can instantiate objects with custom types",
},
DeserializationSink {
language: "typescript",
module: Some("js-yaml"),
function: "loadAll",
method: DeserializationMethod::JsYaml,
severity: Severity::High,
unsafe_by_default: false,
safe_alternative: Some("js-yaml with JSON_SCHEMA or FAILSAFE_SCHEMA"),
description: "js-yaml loadAll iterates over multiple documents",
},
DeserializationSink {
language: "javascript",
module: Some("js-yaml"),
function: "loadAll",
method: DeserializationMethod::JsYaml,
severity: Severity::High,
unsafe_by_default: false,
safe_alternative: Some("js-yaml with JSON_SCHEMA or FAILSAFE_SCHEMA"),
description: "js-yaml loadAll iterates over multiple documents",
},
];
const PYTHON_DESER_QUERY: &str = r#"
; pickle.load/loads - method call on module
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#any-of? @module "pickle" "_pickle" "cPickle" "dill" "cloudpickle" "jsonpickle")
(#any-of? @func "load" "loads" "decode")
) @sink
; yaml.load/unsafe_load/full_load
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#eq? @module "yaml")
(#any-of? @func "load" "unsafe_load" "full_load" "load_all")
) @sink
; marshal.load/loads
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#eq? @module "marshal")
(#any-of? @func "load" "loads")
) @sink
; shelve.open
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#eq? @module "shelve")
(#eq? @func "open")
) @sink
; torch.load (PyTorch - uses pickle)
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#eq? @module "torch")
(#eq? @func "load")
) @sink
; pandas.read_pickle
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#any-of? @module "pd" "pandas")
(#eq? @func "read_pickle")
) @sink
; joblib.load (uses pickle)
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#eq? @module "joblib")
(#eq? @func "load")
) @sink
; numpy.load (can load pickled objects)
(call
function: (attribute
object: (identifier) @module
attribute: (identifier) @func)
arguments: (argument_list) @args
(#any-of? @module "np" "numpy")
(#eq? @func "load")
) @sink
"#;
const PYTHON_TAINT_QUERY: &str = r#"
; HTTP request data (Flask, Django, FastAPI)
(attribute
object: (identifier) @obj
attribute: (identifier) @attr
(#any-of? @obj "request" "req")
(#any-of? @attr "data" "body" "stream" "get_data" "get_json" "files" "content")
) @source
; request.GET/POST (Django)
(subscript
value: (attribute
object: (identifier) @obj
attribute: (identifier) @attr)
(#eq? @obj "request")
(#any-of? @attr "GET" "POST" "FILES" "body")
) @source
; File operations that could read untrusted data
(call
function: (identifier) @func
(#eq? @func "open")
) @source
(call
function: (attribute attribute: (identifier) @method)
(#any-of? @method "read" "readline" "readlines")
) @source
; socket.recv
(call
function: (attribute
object: (_) @obj
attribute: (identifier) @method)
(#any-of? @method "recv" "recvfrom" "recvmsg" "recv_into")
) @source
; stdin
(attribute
object: (identifier) @obj
attribute: (identifier) @attr
(#eq? @obj "sys")
(#eq? @attr "stdin")
) @source
"#;
const JAVA_DESER_QUERY: &str = r#"
; ObjectInputStream.readObject
(method_invocation
object: (identifier) @obj
name: (identifier) @method
arguments: (argument_list) @args
(#any-of? @method "readObject" "readUnshared")
) @sink
; XMLDecoder.readObject
(method_invocation
object: (identifier) @obj
name: (identifier) @method
(#eq? @method "readObject")
) @sink
; XStream.fromXML
(method_invocation
object: (identifier) @obj
name: (identifier) @method
(#eq? @method "fromXML")
) @sink
; Kryo.readObject/readClassAndObject
(method_invocation
object: (identifier) @obj
name: (identifier) @method
(#any-of? @method "readObject" "readClassAndObject")
) @sink
; ObjectMapper.readValue (Jackson - check for polymorphic type handling)
(method_invocation
object: (identifier) @obj
name: (identifier) @method
arguments: (argument_list) @args
(#eq? @method "readValue")
) @sink
"#;
const TYPESCRIPT_DESER_QUERY: &str = r#"
; eval() calls
(call_expression
function: (identifier) @func
arguments: (arguments) @args
(#eq? @func "eval")
) @sink
; new Function()
(new_expression
constructor: (identifier) @ctor
arguments: (arguments) @args
(#eq? @ctor "Function")
) @sink
; require('node-serialize').unserialize
(call_expression
function: (member_expression
object: (call_expression
function: (identifier) @req
arguments: (arguments (string) @mod))
property: (property_identifier) @method)
arguments: (arguments) @args
(#eq? @req "require")
(#match? @mod "node-serialize|serialize-javascript")
(#any-of? @method "unserialize" "deserialize")
) @sink
; import-based usage
(call_expression
function: (member_expression
object: (identifier) @obj
property: (property_identifier) @method)
arguments: (arguments) @args
(#any-of? @method "unserialize" "deserialize")
) @sink
; js-yaml load
(call_expression
function: (member_expression
object: (identifier) @obj
property: (property_identifier) @method)
arguments: (arguments) @args
(#any-of? @obj "yaml" "jsyaml" "YAML")
(#any-of? @method "load" "loadAll" "safeLoad")
) @sink
"#;
pub fn scan_deserialization(path: &Path, language: Option<&str>) -> Result<Vec<DeserializationFinding>> {
let path_str = path.to_str().ok_or_else(|| {
BrrrError::InvalidArgument("Invalid path encoding".to_string())
})?;
let scanner = ProjectScanner::new(path_str)?;
let config = match language {
Some(lang) => ScanConfig::for_language(lang),
None => ScanConfig::default(),
};
let scan_result = scanner.scan_with_config(&config)?;
let files = scan_result.files;
let findings: Vec<DeserializationFinding> = files
.par_iter()
.filter_map(|file| {
scan_file_deserialization(file, language).ok()
})
.flatten()
.collect();
Ok(findings)
}
pub fn scan_file_deserialization(file: &Path, language: Option<&str>) -> Result<Vec<DeserializationFinding>> {
let registry = LanguageRegistry::global();
let lang = match language {
Some(lang_name) => registry
.get_by_name(lang_name)
.ok_or_else(|| BrrrError::UnsupportedLanguage(lang_name.to_string()))?,
None => registry
.detect_language(file)
.ok_or_else(|| BrrrError::UnsupportedLanguage(
file.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string(),
))?,
};
let lang_name = lang.name();
let query_str = match lang_name {
"python" => Some(PYTHON_DESER_QUERY),
"java" => Some(JAVA_DESER_QUERY),
"typescript" | "javascript" => Some(TYPESCRIPT_DESER_QUERY),
_ => None,
};
let query_str = query_str.ok_or_else(|| {
BrrrError::UnsupportedLanguage(format!("{} (no deserialization query)", lang_name))
})?;
let source = std::fs::read(file).map_err(|e| BrrrError::io_with_path(e, file))?;
let mut parser = lang.parser_for_path(file)?;
let tree = parser.parse(&source, None).ok_or_else(|| BrrrError::Parse {
file: file.display().to_string(),
message: "Failed to parse file".to_string(),
})?;
let ts_lang = tree.language();
let file_path = file.display().to_string();
let findings = find_deserialization_sinks(
&tree,
&source,
&ts_lang,
query_str,
lang_name,
&file_path,
)?;
Ok(findings)
}
fn find_deserialization_sinks(
tree: &Tree,
source: &[u8],
ts_lang: &tree_sitter::Language,
query_str: &str,
lang_name: &str,
file_path: &str,
) -> Result<Vec<DeserializationFinding>> {
let query = Query::new(ts_lang, query_str)
.map_err(|e| BrrrError::TreeSitter(format_query_error(lang_name, "deserialization", query_str, &e)))?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), source);
let sink_idx = query.capture_index_for_name("sink");
let module_idx = query.capture_index_for_name("module");
let func_idx = query.capture_index_for_name("func");
let method_idx = query.capture_index_for_name("method");
let args_idx = query.capture_index_for_name("args");
let mut findings = Vec::new();
while let Some(match_) = matches.next() {
let sink_node = sink_idx
.and_then(|idx| match_.captures.iter().find(|c| c.index == idx))
.map(|c| c.node);
let module_text = module_idx
.and_then(|idx| match_.captures.iter().find(|c| c.index == idx))
.map(|c| node_text(c.node, source));
let func_text = func_idx
.and_then(|idx| match_.captures.iter().find(|c| c.index == idx))
.map(|c| node_text(c.node, source));
let method_text = method_idx
.and_then(|idx| match_.captures.iter().find(|c| c.index == idx))
.map(|c| node_text(c.node, source));
let args_node = args_idx
.and_then(|idx| match_.captures.iter().find(|c| c.index == idx))
.map(|c| c.node);
let function_name = method_text.or(func_text).unwrap_or("unknown");
if let Some(sink_node) = sink_node {
let sink_def = find_sink_definition(lang_name, module_text, function_name);
let (is_safe, safety_reason) = check_safe_patterns(
sink_node,
args_node,
source,
lang_name,
module_text,
function_name,
);
let (input_source, deserialized_data) = analyze_input_source(
args_node,
source,
tree,
lang_name,
);
let (severity, confidence) = calculate_severity_confidence(
sink_def,
&input_source,
is_safe,
);
let location = SourceLocation {
file: file_path.to_string(),
line: sink_node.start_position().row + 1,
column: sink_node.start_position().column + 1,
end_line: sink_node.end_position().row + 1,
end_column: sink_node.end_position().column + 1,
};
let method = sink_def
.map(|s| s.method)
.unwrap_or(DeserializationMethod::Other);
let sink_function = if let Some(module) = module_text {
format!("{}.{}", module, function_name)
} else {
function_name.to_string()
};
let code_snippet = extract_code_snippet(source, &location);
let description = generate_description(
&method,
&input_source,
sink_def,
is_safe,
safety_reason.as_deref(),
);
let remediation = generate_remediation(&method, lang_name, sink_def);
if is_safe && severity <= Severity::Low {
continue;
}
findings.push(DeserializationFinding {
location,
method,
input_source,
severity,
confidence,
sink_function,
deserialized_data,
code_snippet,
description,
remediation,
possibly_safe: is_safe,
});
}
}
Ok(findings)
}
fn find_sink_definition(
lang: &str,
module: Option<&str>,
function: &str,
) -> Option<&'static DeserializationSink> {
DESERIALIZATION_SINKS.iter().find(|sink| {
sink.language == lang &&
sink.function == function &&
(sink.module.is_none() || sink.module == module)
})
}
fn check_safe_patterns(
sink_node: Node,
args_node: Option<Node>,
source: &[u8],
lang: &str,
module: Option<&str>,
function: &str,
) -> (bool, Option<String>) {
let sink_text = node_text(sink_node, source);
let args_text = args_node.map(|n| node_text(n, source)).unwrap_or("");
match lang {
"python" => {
if function == "safe_load" || function == "safe_load_all" {
return (true, Some("Using safe_load".to_string()));
}
if module == Some("yaml") && function == "load" {
if args_text.contains("SafeLoader") ||
args_text.contains("Loader=yaml.SafeLoader") ||
args_text.contains("Loader=SafeLoader")
{
return (true, Some("Using SafeLoader".to_string()));
}
}
if (module == Some("np") || module == Some("numpy")) && function == "load" {
if args_text.contains("allow_pickle=False") ||
args_text.contains("allow_pickle = False")
{
return (true, Some("allow_pickle=False".to_string()));
}
}
if args_text.contains("cache") && !args_text.contains("request") {
return (true, Some("Loading from cache (possibly internal)".to_string()));
}
if args_text.contains("/etc/") ||
args_text.contains("config") && !args_text.contains("request")
{
return (true, Some("Loading from config file (possibly trusted)".to_string()));
}
}
"java" => {
if sink_text.contains("Filter") || sink_text.contains("whitelist") {
return (true, Some("May have ObjectInputFilter".to_string()));
}
}
"typescript" | "javascript" => {
if args_text.contains("JSON_SCHEMA") ||
args_text.contains("FAILSAFE_SCHEMA") ||
args_text.contains("schema: ")
{
return (true, Some("Using safe YAML schema".to_string()));
}
if function == "eval" && sink_text.contains("JSON.parse") {
return (true, Some("eval of JSON.parse (safe)".to_string()));
}
}
"php" => {
if args_text.contains("allowed_classes") {
if args_text.contains("false") || args_text.contains("[]") {
return (true, Some("allowed_classes restriction".to_string()));
}
}
}
_ => {}
}
(false, None)
}
fn analyze_input_source(
args_node: Option<Node>,
source: &[u8],
_tree: &Tree,
_lang: &str,
) -> (InputSource, String) {
let args_text = args_node
.map(|n| node_text(n, source).to_string())
.unwrap_or_else(|| "unknown".to_string());
let lower_args = args_text.to_lowercase();
if lower_args.contains("request") {
if lower_args.contains("body") || lower_args.contains("data") ||
lower_args.contains("content") || lower_args.contains("stream")
{
return (InputSource::HttpRequestBody, args_text);
}
if lower_args.contains("param") || lower_args.contains("query") ||
lower_args.contains("get") || lower_args.contains("args")
{
return (InputSource::HttpRequestParams, args_text);
}
if lower_args.contains("file") {
return (InputSource::FileUpload, args_text);
}
return (InputSource::HttpRequestBody, args_text);
}
if lower_args.contains("open(") || lower_args.contains("file") ||
lower_args.contains("read(") || lower_args.contains("path")
{
if lower_args.contains("upload") || lower_args.contains("user") ||
lower_args.contains("input")
{
return (InputSource::FileUpload, args_text);
}
return (InputSource::FileRead, args_text);
}
if lower_args.contains("socket") || lower_args.contains("recv") ||
lower_args.contains("conn") || lower_args.contains("stream")
{
return (InputSource::NetworkSocket, args_text);
}
if lower_args.contains("cursor") || lower_args.contains("query") ||
lower_args.contains("db.") || lower_args.contains("row")
{
return (InputSource::Database, args_text);
}
if lower_args.contains("queue") || lower_args.contains("message") ||
lower_args.contains("kafka") || lower_args.contains("rabbit") ||
lower_args.contains("celery")
{
return (InputSource::MessageQueue, args_text);
}
if lower_args.contains("stdin") || lower_args.contains("input(") {
return (InputSource::StdIn, args_text);
}
if lower_args.contains("argv") || lower_args.contains("sys.argv") ||
lower_args.contains("args[")
{
return (InputSource::CmdLineArg, args_text);
}
if lower_args.contains("cache") && !lower_args.contains("request") {
return (InputSource::InternalCache, args_text);
}
if lower_args.contains("config") && !lower_args.contains("request") {
return (InputSource::ConfigFile, args_text);
}
(InputSource::Unknown, args_text)
}
fn calculate_severity_confidence(
sink_def: Option<&DeserializationSink>,
input_source: &InputSource,
is_safe: bool,
) -> (Severity, Confidence) {
let base_severity = sink_def.map(|s| s.severity).unwrap_or(Severity::High);
let severity = if is_safe {
match base_severity {
Severity::Critical => Severity::Low,
Severity::High => Severity::Info,
_ => Severity::Info,
}
} else {
match input_source {
InputSource::HttpRequestBody |
InputSource::HttpRequestParams |
InputSource::FileUpload |
InputSource::NetworkSocket |
InputSource::MessageQueue => base_severity,
InputSource::StdIn |
InputSource::CmdLineArg => {
match base_severity {
Severity::Critical => Severity::High,
s => s,
}
}
InputSource::FileRead |
InputSource::Database => {
base_severity
}
InputSource::InternalCache |
InputSource::ConfigFile => {
Severity::Low
}
InputSource::EnvVar |
InputSource::Unknown => {
match base_severity {
Severity::Critical => Severity::High,
s => s,
}
}
}
};
let confidence = match input_source {
InputSource::HttpRequestBody |
InputSource::HttpRequestParams |
InputSource::FileUpload |
InputSource::NetworkSocket => Confidence::High,
InputSource::MessageQueue |
InputSource::StdIn |
InputSource::CmdLineArg => Confidence::High,
InputSource::Database |
InputSource::FileRead => Confidence::Medium,
InputSource::InternalCache |
InputSource::ConfigFile => Confidence::Low,
InputSource::EnvVar |
InputSource::Unknown => Confidence::Low,
};
(severity, confidence)
}
fn extract_code_snippet(source: &[u8], location: &SourceLocation) -> Option<String> {
let source_str = std::str::from_utf8(source).ok()?;
let lines: Vec<&str> = source_str.lines().collect();
let start = location.line.saturating_sub(2);
let end = (location.end_line + 1).min(lines.len());
let snippet: Vec<String> = lines[start..end]
.iter()
.enumerate()
.map(|(i, line)| format!("{:4} | {}", start + i + 1, line))
.collect();
Some(snippet.join("\n"))
}
fn generate_description(
method: &DeserializationMethod,
input_source: &InputSource,
sink_def: Option<&DeserializationSink>,
is_safe: bool,
safety_reason: Option<&str>,
) -> String {
let sink_desc = sink_def
.map(|s| s.description)
.unwrap_or("Potential arbitrary code execution during deserialization");
if is_safe {
format!(
"Deserialization using {} from {}: {}. Note: {}",
method, input_source, sink_desc,
safety_reason.unwrap_or("May be using safe pattern")
)
} else {
format!(
"UNSAFE DESERIALIZATION: {} from {} - {}",
method, input_source, sink_desc
)
}
}
fn generate_remediation(
method: &DeserializationMethod,
lang: &str,
sink_def: Option<&DeserializationSink>,
) -> String {
let safe_alt = sink_def.and_then(|s| s.safe_alternative);
match (method, lang) {
(DeserializationMethod::Pickle, "python") |
(DeserializationMethod::Dill, "python") |
(DeserializationMethod::Shelve, "python") => {
"CRITICAL: Pickle-based deserialization allows arbitrary code execution.\n\
NEVER deserialize untrusted data with pickle.\n\n\
Alternatives:\n\
- Use JSON (json.loads) for simple data structures\n\
- Use Protocol Buffers or MessagePack for binary serialization\n\
- If pickle is required, only load from trusted sources with integrity verification\n\
- Consider using RestrictedPython or sandboxing if external data is unavoidable".to_string()
}
(DeserializationMethod::YamlUnsafe, "python") => {
"CRITICAL: yaml.load() can instantiate arbitrary Python objects.\n\n\
FIX: Use yaml.safe_load() instead:\n\
```python\n\
# UNSAFE:\n\
data = yaml.load(user_input) # RCE risk!\n\n\
# SAFE:\n\
data = yaml.safe_load(user_input) # Only loads basic types\n\
# Or with explicit SafeLoader:\n\
data = yaml.load(user_input, Loader=yaml.SafeLoader)\n\
```".to_string()
}
(DeserializationMethod::Marshal, "python") => {
"HIGH RISK: marshal module can execute code with malformed data.\n\n\
Alternatives:\n\
- Use JSON for data interchange\n\
- Use pickle only for trusted internal data\n\
- marshal is intended for .pyc files, not data serialization".to_string()
}
(DeserializationMethod::JavaObjectStream, "java") => {
"CRITICAL: Java ObjectInputStream can trigger gadget chain attacks.\n\n\
Mitigations:\n\
- Use ObjectInputFilter (Java 9+) to whitelist allowed classes\n\
- Use JSON/Jackson with @JsonTypeInfo whitelisting\n\
- Avoid deserializing untrusted data entirely\n\
- Remove vulnerable libraries (Commons Collections, etc.)\n\
- Use serialization libraries with type restrictions".to_string()
}
(DeserializationMethod::XmlDecoder, "java") => {
"CRITICAL: XMLDecoder can instantiate arbitrary classes.\n\n\
Fix: Avoid XMLDecoder for untrusted input.\n\
Alternatives:\n\
- Use Jackson XML with type whitelisting\n\
- Use JAXB with proper schema validation\n\
- Parse XML manually and construct objects explicitly".to_string()
}
(DeserializationMethod::XStream, "java") => {
"CRITICAL: XStream without security framework can trigger RCE.\n\n\
Fix: Configure XStream with security measures:\n\
```java\n\
XStream xstream = new XStream();\n\
xstream.addPermission(NoTypePermission.NONE);\n\
xstream.addPermission(PrimitiveTypePermission.PRIMITIVES);\n\
xstream.allowTypesByWildcard(new String[] {\n\
\"com.yourcompany.**\"\n\
});\n\
```".to_string()
}
(DeserializationMethod::RubyMarshal, "ruby") => {
"CRITICAL: Marshal.load can execute arbitrary code.\n\n\
Alternatives:\n\
- Use JSON.parse for simple data\n\
- Use Oj.safe_load for JSON with options\n\
- Never deserialize untrusted data with Marshal".to_string()
}
(DeserializationMethod::RubyYaml, "ruby") => {
"CRITICAL: YAML.load can instantiate arbitrary Ruby objects.\n\n\
FIX: Use YAML.safe_load instead:\n\
```ruby\n\
# UNSAFE:\n\
data = YAML.load(user_input) # RCE risk!\n\n\
# SAFE:\n\
data = YAML.safe_load(user_input) # Only basic types\n\
# Or with permitted classes:\n\
data = YAML.safe_load(user_input, permitted_classes: [Symbol])\n\
```".to_string()
}
(DeserializationMethod::PhpUnserialize, "php") => {
"CRITICAL: unserialize() can trigger magic methods.\n\n\
FIX: Use json_decode() or restrict allowed classes:\n\
```php\n\
// UNSAFE:\n\
$data = unserialize($user_input); // RCE risk!\n\n\
// SAFER (PHP 7+):\n\
$data = unserialize($user_input, ['allowed_classes' => false]);\n\n\
// BEST: Use JSON\n\
$data = json_decode($user_input);\n\
```".to_string()
}
(DeserializationMethod::JsEval, _) |
(DeserializationMethod::JsFunction, _) => {
"CRITICAL: eval/Function can execute arbitrary JavaScript.\n\n\
Alternatives:\n\
- Use JSON.parse() for JSON data\n\
- Use a proper template engine for dynamic content\n\
- If dynamic code is required, use vm2 with sandboxing\n\
- Consider WebAssembly for safe code execution".to_string()
}
(DeserializationMethod::NodeSerialize, _) => {
"CRITICAL: node-serialize/serialize-javascript can execute arbitrary code.\n\n\
These packages use eval internally and should NEVER be used with untrusted data.\n\n\
Alternatives:\n\
- Use JSON.parse()/JSON.stringify() for serialization\n\
- Use MessagePack or Protocol Buffers for binary serialization\n\
- Remove these packages from your dependencies".to_string()
}
(DeserializationMethod::JsYaml, _) => {
"HIGH RISK: js-yaml can execute code with unsafe types.\n\n\
FIX: Use safe schema options:\n\
```javascript\n\
const yaml = require('js-yaml');\n\n\
// UNSAFE (default in older versions):\n\
const data = yaml.load(userInput);\n\n\
// SAFE:\n\
const data = yaml.load(userInput, { schema: yaml.JSON_SCHEMA });\n\
// Or use safeLoad (deprecated but safe)\n\
const data = yaml.safeLoad(userInput);\n\
```".to_string()
}
_ => {
if let Some(alt) = safe_alt {
format!(
"Unsafe deserialization detected.\n\n\
Recommended alternative: {}\n\n\
General guidelines:\n\
- Never deserialize untrusted data\n\
- Use JSON or other safe formats when possible\n\
- If deserialization is required, whitelist allowed types",
alt
)
} else {
"Unsafe deserialization detected.\n\n\
General guidelines:\n\
- Never deserialize untrusted data\n\
- Use JSON or other safe formats when possible\n\
- If deserialization is required, whitelist allowed types".to_string()
}
}
}
}
fn node_text<'a>(node: Node<'a>, source: &'a [u8]) -> &'a str {
std::str::from_utf8(&source[node.start_byte()..node.end_byte()]).unwrap_or("")
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_temp_file(content: &str, extension: &str) -> NamedTempFile {
let mut file = tempfile::Builder::new()
.suffix(extension)
.tempfile()
.expect("Failed to create temp file");
file.write_all(content.as_bytes()).expect("Failed to write");
file
}
#[test]
fn test_python_pickle_load_detection() {
let source = r#"
import pickle
def load_user_data(request):
data = pickle.load(request.body)
return data
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect pickle.load vulnerability");
let finding = &findings[0];
assert_eq!(finding.method, DeserializationMethod::Pickle);
assert_eq!(finding.severity, Severity::Critical);
assert_eq!(finding.input_source, InputSource::HttpRequestBody);
}
#[test]
fn test_python_pickle_loads_detection() {
let source = r#"
import pickle
def process(data):
obj = pickle.loads(data)
return obj
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect pickle.loads vulnerability");
}
#[test]
fn test_python_yaml_load_unsafe() {
let source = r#"
import yaml
def parse_config(config_str):
# Dangerous! Can execute arbitrary code
return yaml.load(config_str)
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
if !findings.is_empty() {
let finding = &findings[0];
assert!(
finding.method == DeserializationMethod::YamlUnsafe ||
finding.sink_function.contains("yaml"),
"yaml.load should be categorized as YAML-related"
);
}
}
#[test]
fn test_python_yaml_safe_load_not_flagged() {
let source = r#"
import yaml
def parse_config(config_str):
# This is safe
return yaml.safe_load(config_str)
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
assert!(
findings.is_empty() || findings.iter().all(|f| f.possibly_safe),
"safe_load should not be flagged as unsafe"
);
}
#[test]
fn test_python_yaml_with_safeloader() {
let source = r#"
import yaml
def parse_config(config_str):
return yaml.load(config_str, Loader=yaml.SafeLoader)
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
if !findings.is_empty() {
assert!(
findings[0].possibly_safe || findings[0].severity <= Severity::Low,
"yaml.load with SafeLoader should be marked as safe"
);
}
}
#[test]
fn test_python_marshal_detection() {
let source = r#"
import marshal
def load_bytecode(data):
return marshal.loads(data)
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect marshal vulnerability");
assert_eq!(findings[0].method, DeserializationMethod::Marshal);
}
#[test]
fn test_python_shelve_detection() {
let source = r#"
import shelve
def load_session(session_file):
db = shelve.open(session_file)
return db['user']
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect shelve vulnerability");
assert_eq!(findings[0].method, DeserializationMethod::Shelve);
}
#[test]
fn test_python_torch_load_detection() {
let source = r#"
import torch
def load_model(model_path):
return torch.load(model_path)
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect torch.load vulnerability");
}
#[test]
fn test_python_numpy_load_allow_pickle_false() {
let source = r#"
import numpy as np
def load_array(file_path):
return np.load(file_path, allow_pickle=False)
"#;
let file = create_temp_file(source, ".py");
let findings = scan_file_deserialization(file.path(), Some("python"))
.expect("Scan should succeed");
if !findings.is_empty() {
assert!(
findings[0].possibly_safe,
"np.load with allow_pickle=False should be safe"
);
}
}
#[test]
fn test_typescript_eval_detection() {
let source = r#"
function processData(userInput: string) {
return eval(userInput);
}
"#;
let file = create_temp_file(source, ".ts");
let findings = scan_file_deserialization(file.path(), Some("typescript"))
.expect("Scan should succeed");
if !findings.is_empty() {
assert!(
findings[0].method == DeserializationMethod::JsEval ||
findings[0].sink_function.contains("eval"),
"eval should be categorized correctly"
);
assert!(
findings[0].severity >= Severity::High,
"eval should have high severity"
);
}
}
#[test]
fn test_typescript_function_constructor_detection() {
let source = r#"
function createDynamicFunction(code: string) {
return new Function(code);
}
"#;
let file = create_temp_file(source, ".ts");
let findings = scan_file_deserialization(file.path(), Some("typescript"))
.expect("Scan should succeed");
}
#[test]
fn test_javascript_yaml_load() {
let source = r#"
const yaml = require('js-yaml');
function parseConfig(input) {
return yaml.load(input);
}
"#;
let file = create_temp_file(source, ".js");
let findings = scan_file_deserialization(file.path(), Some("javascript"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect js-yaml vulnerability");
}
#[test]
fn test_java_objectinputstream_detection() {
let source = r#"
import java.io.*;
public class UserDeserializer {
public Object deserialize(InputStream input) throws Exception {
ObjectInputStream ois = new ObjectInputStream(input);
return ois.readObject();
}
}
"#;
let file = create_temp_file(source, ".java");
let findings = scan_file_deserialization(file.path(), Some("java"))
.expect("Scan should succeed");
if !findings.is_empty() {
assert!(
findings[0].method == DeserializationMethod::JavaObjectStream ||
findings[0].sink_function.contains("readObject") ||
findings[0].sink_function.contains("ObjectInputStream"),
"ObjectInputStream.readObject should be categorized correctly"
);
}
}
#[test]
fn test_java_xstream_detection() {
let source = r#"
import com.thoughtworks.xstream.XStream;
public class XmlParser {
public Object parse(String xml) {
XStream xstream = new XStream();
return xstream.fromXML(xml);
}
}
"#;
let file = create_temp_file(source, ".java");
let findings = scan_file_deserialization(file.path(), Some("java"))
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect XStream vulnerability");
}
#[test]
fn test_severity_ordering() {
assert!(Severity::Critical > Severity::High);
assert!(Severity::High > Severity::Medium);
assert!(Severity::Medium > Severity::Low);
assert!(Severity::Low > Severity::Info);
}
#[test]
fn test_confidence_ordering() {
assert!(Confidence::High > Confidence::Medium);
assert!(Confidence::Medium > Confidence::Low);
}
#[test]
fn test_input_source_display() {
assert_eq!(format!("{}", InputSource::HttpRequestBody), "HTTP request body");
assert_eq!(format!("{}", InputSource::FileUpload), "file upload");
assert_eq!(format!("{}", InputSource::Unknown), "unknown source");
}
#[test]
fn test_method_display() {
assert_eq!(format!("{}", DeserializationMethod::Pickle), "pickle");
assert_eq!(format!("{}", DeserializationMethod::YamlUnsafe), "yaml.load (unsafe)");
assert_eq!(format!("{}", DeserializationMethod::JavaObjectStream), "ObjectInputStream");
}
#[test]
fn test_source_location_display() {
let loc = SourceLocation {
file: "test.py".to_string(),
line: 10,
column: 5,
end_line: 10,
end_column: 20,
};
assert_eq!(format!("{}", loc), "test.py:10:5");
}
#[test]
fn test_safe_alternatives_exist() {
let critical_sinks: Vec<_> = DESERIALIZATION_SINKS.iter()
.filter(|s| s.severity == Severity::Critical)
.collect();
let with_alternatives: Vec<_> = critical_sinks.iter()
.filter(|s| s.safe_alternative.is_some())
.collect();
assert!(
with_alternatives.len() >= critical_sinks.len() / 2,
"Most critical sinks should have documented safe alternatives"
);
}
#[test]
fn test_deserialization_sink_count() {
let python_sinks: Vec<_> = DESERIALIZATION_SINKS.iter()
.filter(|s| s.language == "python")
.collect();
assert!(python_sinks.len() >= 10, "Should have comprehensive Python coverage");
let java_sinks: Vec<_> = DESERIALIZATION_SINKS.iter()
.filter(|s| s.language == "java")
.collect();
assert!(java_sinks.len() >= 5, "Should have comprehensive Java coverage");
}
}