oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! VBA code scanner for detecting suspicious patterns.
//!
//! Scans VBA source code for AutoExec triggers, suspicious keywords,
//! IOCs (URLs, IPs, emails, executables), encoded strings, and obfuscation.

use crate::common::patterns;
use crate::vba::keywords::{
    FindingType, AUTOEXEC_KEYWORDS, AUTOEXEC_REGEX, SUSPICIOUS_KEYWORDS, SUSPICIOUS_REGEX,
};

/// A single finding from scanning VBA code.
#[derive(Debug, Clone)]
pub struct Finding {
    /// Type of finding.
    pub finding_type: FindingType,
    /// The keyword or pattern that matched.
    pub keyword: String,
    /// Human-readable description.
    pub description: String,
}

/// Scanner for VBA source code.
pub struct VbaScanner;

impl VbaScanner {
    /// Scan VBA source code and return all findings.
    pub fn scan(code: &str) -> Vec<Finding> {
        let mut findings = Vec::new();

        Self::scan_autoexec(code, &mut findings);
        Self::scan_suspicious(code, &mut findings);
        Self::scan_ioc(code, &mut findings);
        Self::scan_hex_strings(code, &mut findings);
        Self::scan_base64_strings(code, &mut findings);
        Self::scan_dridex(code, &mut findings);

        findings
    }

    /// Scan for AutoExec triggers.
    fn scan_autoexec(code: &str, findings: &mut Vec<Finding>) {
        let code_lower = code.to_lowercase();

        for entry in AUTOEXEC_KEYWORDS {
            let keyword_lower = entry.keyword.to_lowercase();
            if code_lower.contains(&keyword_lower) {
                // Verify it's a word boundary match (not substring)
                if Self::contains_word_case_insensitive(code, entry.keyword) {
                    findings.push(Finding {
                        finding_type: FindingType::AutoExec,
                        keyword: entry.keyword.to_string(),
                        description: entry.description.to_string(),
                    });
                }
            }
        }

        for (regex, desc) in AUTOEXEC_REGEX.iter() {
            if let Some(m) = regex.find(code) {
                findings.push(Finding {
                    finding_type: FindingType::AutoExec,
                    keyword: m.as_str().to_string(),
                    description: desc.to_string(),
                });
            }
        }
    }

    /// Scan for suspicious keywords.
    fn scan_suspicious(code: &str, findings: &mut Vec<Finding>) {
        let code_lower = code.to_lowercase();

        for entry in SUSPICIOUS_KEYWORDS {
            let keyword_lower = entry.keyword.to_lowercase();
            if code_lower.contains(&keyword_lower) {
                findings.push(Finding {
                    finding_type: FindingType::Suspicious,
                    keyword: entry.keyword.to_string(),
                    description: entry.description.to_string(),
                });
            }
        }

        for (regex, desc) in SUSPICIOUS_REGEX.iter() {
            if let Some(m) = regex.find(code) {
                findings.push(Finding {
                    finding_type: FindingType::Suspicious,
                    keyword: m.as_str().to_string(),
                    description: desc.to_string(),
                });
            }
        }
    }

    /// Scan for IOCs: URLs, IP addresses, emails, executable filenames.
    fn scan_ioc(code: &str, findings: &mut Vec<Finding>) {
        for m in patterns::RE_URL.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::Ioc,
                keyword: m.as_str().to_string(),
                description: "URL found".to_string(),
            });
        }

        for m in patterns::RE_IPV4.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::Ioc,
                keyword: m.as_str().to_string(),
                description: "IPv4 address found".to_string(),
            });
        }

        for m in patterns::RE_EMAIL.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::Ioc,
                keyword: m.as_str().to_string(),
                description: "Email address found".to_string(),
            });
        }

        for m in patterns::RE_EXECUTABLE.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::Ioc,
                keyword: m.as_str().to_string(),
                description: "Executable file name found".to_string(),
            });
        }
    }

    /// Scan for hex-encoded strings.
    fn scan_hex_strings(code: &str, findings: &mut Vec<Finding>) {
        for m in patterns::RE_HEX_STRING.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::HexString,
                keyword: Self::truncate(m.as_str(), 80),
                description: "Hex-encoded string (may be obfuscated payload)".to_string(),
            });
        }
    }

    /// Scan for base64-encoded strings.
    fn scan_base64_strings(code: &str, findings: &mut Vec<Finding>) {
        for m in patterns::RE_BASE64_STRING.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::Base64String,
                keyword: Self::truncate(m.as_str(), 80),
                description: "Base64-encoded string (may be obfuscated payload)".to_string(),
            });
        }
    }

    /// Scan for Dridex-style obfuscation.
    fn scan_dridex(code: &str, findings: &mut Vec<Finding>) {
        for m in patterns::RE_DRIDEX.find_iter(code) {
            findings.push(Finding {
                finding_type: FindingType::Dridex,
                keyword: m.as_str().to_string(),
                description: "Dridex-style string obfuscation".to_string(),
            });
        }
    }

    /// Check if code contains a word (case-insensitive, rough word boundary).
    fn contains_word_case_insensitive(code: &str, word: &str) -> bool {
        let code_lower = code.to_lowercase();
        let word_lower = word.to_lowercase();

        for (idx, _) in code_lower.match_indices(&word_lower) {
            let before_ok = idx == 0
                || !code.as_bytes()[idx - 1].is_ascii_alphanumeric();
            let after_idx = idx + word.len();
            let after_ok = after_idx >= code.len()
                || !code.as_bytes()[after_idx].is_ascii_alphanumeric();

            if before_ok && after_ok {
                return true;
            }
        }
        false
    }

    /// Truncate a string to a maximum length, appending "..." if truncated.
    fn truncate(s: &str, max_len: usize) -> String {
        if s.len() <= max_len {
            s.to_string()
        } else {
            format!("{}...", &s[..max_len])
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_scan_autoexec() {
        let code = "Sub AutoOpen()\n  MsgBox \"Hello\"\nEnd Sub";
        let findings = VbaScanner::scan(code);
        let autoexec: Vec<_> = findings
            .iter()
            .filter(|f| f.finding_type == FindingType::AutoExec)
            .collect();
        assert!(!autoexec.is_empty(), "Should detect AutoOpen");
    }

    #[test]
    fn test_scan_suspicious() {
        let code = r#"Set obj = CreateObject("WScript.Shell")"#;
        let findings = VbaScanner::scan(code);
        let suspicious: Vec<_> = findings
            .iter()
            .filter(|f| f.finding_type == FindingType::Suspicious)
            .collect();
        assert!(!suspicious.is_empty(), "Should detect suspicious keywords");
    }

    #[test]
    fn test_scan_url_ioc() {
        let code = r#"url = "http://evil.com/payload.exe""#;
        let findings = VbaScanner::scan(code);
        let iocs: Vec<_> = findings
            .iter()
            .filter(|f| f.finding_type == FindingType::Ioc)
            .collect();
        assert!(!iocs.is_empty(), "Should detect URL IOC");
    }

    #[test]
    fn test_scan_clean_code() {
        let code = "Sub MyFunction()\n  Dim x As Integer\n  x = 1 + 2\nEnd Sub";
        let findings = VbaScanner::scan(code);
        // Clean code should have very few or no findings
        let suspicious: Vec<_> = findings
            .iter()
            .filter(|f| f.finding_type == FindingType::Suspicious)
            .collect();
        // "Mid" and a few generic keywords might match but not dangerous ones
        assert!(
            suspicious.len() < 5,
            "Clean code should not have many suspicious findings"
        );
    }

    #[test]
    fn test_scan_dridex() {
        let code = r#"s = Chr(65) & Chr(66) & StrReverse("tset")"#;
        let findings = VbaScanner::scan(code);
        let dridex: Vec<_> = findings
            .iter()
            .filter(|f| f.finding_type == FindingType::Dridex)
            .collect();
        assert!(!dridex.is_empty(), "Should detect Dridex-style obfuscation");
    }

    #[test]
    fn test_truncate() {
        assert_eq!(VbaScanner::truncate("short", 10), "short");
        assert_eq!(VbaScanner::truncate("a very long string", 10), "a very lon...");
    }
}