rastray 0.15.0

Blazing-fast static analysis CLI for security, dependency, and performance audits.
use std::fs;
use std::sync::OnceLock;

use regex::Regex;

use crate::cli::Severity;
use crate::crawler::{CrawlSummary, FileKind};
use crate::reporter::{Category, Finding, Location};

use super::{Analyzer, AnalyzerError};

#[derive(Debug, Default)]
pub struct DeserializationAnalyzer;

impl DeserializationAnalyzer {
    pub fn new() -> Self {
        Self
    }
}

impl Analyzer for DeserializationAnalyzer {
    fn name(&self) -> &'static str {
        "deserialization"
    }

    fn analyze(&self, crawl: &CrawlSummary) -> Result<Vec<Finding>, AnalyzerError> {
        let patterns = compiled_patterns()?;
        let mut findings = Vec::new();
        for file in &crawl.files {
            if file.kind != FileKind::Source {
                continue;
            }
            let Some(ext) = file
                .path
                .extension()
                .and_then(|s| s.to_str())
                .map(|s| s.to_ascii_lowercase())
            else {
                continue;
            };
            let contents = match fs::read_to_string(&file.path) {
                Ok(c) => c,
                Err(_) => continue,
            };
            for pattern in patterns {
                if !pattern.extensions.iter().any(|e| *e == ext) {
                    continue;
                }
                for m in pattern.regex.find_iter(&contents) {
                    let (line, column) = byte_offset_to_line_col(&contents, m.start());
                    let location = Location::file(file.path.clone())
                        .with_span(m.start(), m.len())
                        .with_line(line, column);
                    findings.push(
                        Finding::new(
                            pattern.code,
                            pattern.message.to_string(),
                            pattern.severity,
                            Category::Security,
                        )
                        .with_help(pattern.help)
                        .with_location(location),
                    );
                }
            }
        }
        Ok(findings)
    }
}

struct PatternSpec {
    code: &'static str,
    message: &'static str,
    severity: Severity,
    help: &'static str,
    pattern: &'static str,
    extensions: &'static [&'static str],
}

struct CompiledPattern {
    code: &'static str,
    message: &'static str,
    severity: Severity,
    help: &'static str,
    regex: Regex,
    extensions: &'static [&'static str],
}

const PY_EXTENSIONS: &[&str] = &["py"];
const JS_EXTENSIONS: &[&str] = &["js", "jsx", "ts", "tsx", "mjs", "cjs"];
const RB_EXTENSIONS: &[&str] = &["rb"];
const JAVA_EXTENSIONS: &[&str] = &["java", "kt", "kts"];
const PHP_EXTENSIONS: &[&str] = &["php"];

const PATTERN_SPECS: &[PatternSpec] = &[
    PatternSpec {
        code: "RSTR-DES-001",
        message: "pickle.loads on untrusted input is remote code execution",
        severity: Severity::Critical,
        help: "never deserialize untrusted pickle; switch to JSON or a schema-validated format",
        pattern: r"\bpickle\.(loads?|Unpickler)\s*\(",
        extensions: PY_EXTENSIONS,
    },
    PatternSpec {
        code: "RSTR-DES-002",
        message: "yaml.load without an explicit SafeLoader can execute arbitrary Python",
        severity: Severity::High,
        help: "use yaml.safe_load(...) or yaml.load(stream, Loader=yaml.SafeLoader)",
        pattern: r"\byaml\.load\s*\(",
        extensions: PY_EXTENSIONS,
    },
    PatternSpec {
        code: "RSTR-DES-003",
        message: "marshal.loads on untrusted input is remote code execution",
        severity: Severity::Critical,
        help: "never deserialize untrusted marshal data; switch to JSON",
        pattern: r"\bmarshal\.loads?\s*\(",
        extensions: PY_EXTENSIONS,
    },
    PatternSpec {
        code: "RSTR-DES-004",
        message: "node-serialize unserialize() is documented to allow RCE; do not use",
        severity: Severity::Critical,
        help: "remove node-serialize; use JSON.parse for trusted data, or a schema-validated parser",
        pattern: r#"\bserialize\.unserialize\s*\(|require\s*\(\s*['"]node-serialize['"]"#,
        extensions: JS_EXTENSIONS,
    },
    PatternSpec {
        code: "RSTR-DES-005",
        message: "Ruby Marshal.load on untrusted input is remote code execution",
        severity: Severity::Critical,
        help: "never deserialize untrusted Marshal data; use JSON instead",
        pattern: r"\bMarshal\.(load|restore)\s*\(",
        extensions: RB_EXTENSIONS,
    },
    PatternSpec {
        code: "RSTR-DES-006",
        message: "ObjectInputStream readObject on untrusted input is a known RCE vector (e.g. CVE-2015-7501)",
        severity: Severity::Critical,
        help: "avoid native Java deserialization; use JSON or a schema-validated format",
        pattern: r"\bnew\s+ObjectInputStream\s*\(|\.readObject\s*\(\s*\)",
        extensions: JAVA_EXTENSIONS,
    },
    PatternSpec {
        code: "RSTR-DES-007",
        message: "PHP unserialize() on untrusted input is a known RCE vector",
        severity: Severity::Critical,
        help: "avoid unserialize; use json_decode for trusted data or a typed schema parser",
        pattern: r"\bunserialize\s*\(",
        extensions: PHP_EXTENSIONS,
    },
];

static PATTERNS: OnceLock<Result<Vec<CompiledPattern>, regex::Error>> = OnceLock::new();

fn compiled_patterns() -> Result<&'static [CompiledPattern], AnalyzerError> {
    let cached = PATTERNS.get_or_init(|| {
        PATTERN_SPECS
            .iter()
            .map(|spec| {
                Regex::new(spec.pattern).map(|regex| CompiledPattern {
                    code: spec.code,
                    message: spec.message,
                    severity: spec.severity,
                    help: spec.help,
                    regex,
                    extensions: spec.extensions,
                })
            })
            .collect::<Result<Vec<_>, _>>()
    });
    match cached {
        Ok(v) => Ok(v.as_slice()),
        Err(e) => Err(AnalyzerError::Failed {
            name: "deserialization",
            message: format!("failed to compile a builtin deserialization pattern: {e}"),
        }),
    }
}

fn byte_offset_to_line_col(text: &str, offset: usize) -> (usize, usize) {
    let mut line = 1usize;
    let mut col = 1usize;
    for (i, ch) in text.char_indices() {
        if i >= offset {
            break;
        }
        if ch == '\n' {
            line += 1;
            col = 1;
        } else {
            col += 1;
        }
    }
    (line, col)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn compiled_patterns_compile_cleanly() {
        let result = compiled_patterns();
        if let Err(e) = &result {
            eprintln!("pattern compile error: {e:?}");
        }
        assert!(result.is_ok());
    }

    #[test]
    fn pickle_loads_python_matches() {
        let patterns = match compiled_patterns() {
            Ok(p) => p,
            Err(_) => return,
        };
        let re = patterns
            .iter()
            .find(|p| p.code == "RSTR-DES-001")
            .map(|p| &p.regex);
        let Some(re) = re else { return };
        assert!(re.is_match("obj = pickle.loads(data)"));
        assert!(re.is_match("obj = pickle.load(f)"));
        assert!(re.is_match("u = pickle.Unpickler(f)"));
    }

    #[test]
    fn yaml_load_python_matches() {
        let patterns = match compiled_patterns() {
            Ok(p) => p,
            Err(_) => return,
        };
        let re = patterns
            .iter()
            .find(|p| p.code == "RSTR-DES-002")
            .map(|p| &p.regex);
        let Some(re) = re else { return };
        assert!(re.is_match("cfg = yaml.load(open('config.yml'))"));
        assert!(!re.is_match("cfg = yaml.safe_load(open('config.yml'))"));
    }

    #[test]
    fn marshal_ruby_matches() {
        let patterns = match compiled_patterns() {
            Ok(p) => p,
            Err(_) => return,
        };
        let re = patterns
            .iter()
            .find(|p| p.code == "RSTR-DES-005")
            .map(|p| &p.regex);
        let Some(re) = re else { return };
        assert!(re.is_match("Marshal.load(data)"));
        assert!(re.is_match("Marshal.restore(data)"));
        assert!(!re.is_match("JSON.parse(data)"));
    }

    #[test]
    fn java_readobject_matches() {
        let patterns = match compiled_patterns() {
            Ok(p) => p,
            Err(_) => return,
        };
        let re = patterns
            .iter()
            .find(|p| p.code == "RSTR-DES-006")
            .map(|p| &p.regex);
        let Some(re) = re else { return };
        assert!(re.is_match("ObjectInputStream ois = new ObjectInputStream(s);"));
        assert!(re.is_match("Object o = ois.readObject();"));
    }

    #[test]
    fn php_unserialize_matches() {
        let patterns = match compiled_patterns() {
            Ok(p) => p,
            Err(_) => return,
        };
        let re = patterns
            .iter()
            .find(|p| p.code == "RSTR-DES-007")
            .map(|p| &p.regex);
        let Some(re) = re else { return };
        assert!(re.is_match("$obj = unserialize($data);"));
        assert!(!re.is_match("$obj = json_decode($data);"));
    }
}