oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! Regex patterns for detecting IOCs (Indicators of Compromise) in VBA code.
//!
//! Patterns are compiled lazily on first use via `std::sync::LazyLock`.

use std::sync::LazyLock;

use regex::Regex;

/// URL pattern (http/https/ftp)
pub static RE_URL: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r#"(?i)(https?://|ftp://)[^\s"'>\])]+"#).expect("invalid URL regex")
});

/// IPv4 address pattern
pub static RE_IPV4: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b")
        .expect("invalid IPv4 regex")
});

/// Email address pattern
pub static RE_EMAIL: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)\b[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}\b")
        .expect("invalid email regex")
});

/// Executable file name pattern
pub static RE_EXECUTABLE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)\b\w+\.(exe|dll|scr|bat|cmd|com|pif|vbs|vbe|js|jse|wsf|wsh|ps1|msi)\b")
        .expect("invalid executable regex")
});

/// Hex string pattern (at least 16 hex chars, looks like encoded payload)
pub static RE_HEX_STRING: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r#""(?:[0-9A-Fa-f]{2}){8,}""#).expect("invalid hex string regex")
});

/// Base64 string pattern (at least 20 chars of base64)
pub static RE_BASE64_STRING: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r#""(?:[A-Za-z0-9+/]{4}){5,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?""#)
        .expect("invalid base64 regex")
});

/// Dridex-style string obfuscation pattern
pub static RE_DRIDEX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)\b(?:StrReverse|Chr\$?|ChrB\$?|ChrW\$?|Asc\$?)\s*\(")
        .expect("invalid Dridex regex")
});

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_url_match() {
        assert!(RE_URL.is_match("http://evil.com/payload.exe"));
        assert!(RE_URL.is_match("https://example.org/test"));
        assert!(!RE_URL.is_match("not a url"));
    }

    #[test]
    fn test_ipv4_match() {
        assert!(RE_IPV4.is_match("192.168.1.1"));
        assert!(RE_IPV4.is_match("10.0.0.1"));
        assert!(!RE_IPV4.is_match("999.999.999.999"));
    }

    #[test]
    fn test_email_match() {
        assert!(RE_EMAIL.is_match("test@example.com"));
        assert!(!RE_EMAIL.is_match("not-an-email"));
    }

    #[test]
    fn test_executable_match() {
        assert!(RE_EXECUTABLE.is_match("payload.exe"));
        assert!(RE_EXECUTABLE.is_match("script.ps1"));
        assert!(!RE_EXECUTABLE.is_match("document.txt"));
    }

    #[test]
    fn test_hex_string() {
        assert!(RE_HEX_STRING.is_match(r#""4D5A90000300000004000000FFFF0000""#));
        assert!(!RE_HEX_STRING.is_match(r#""ABCD""#)); // too short
    }

    #[test]
    fn test_base64_string() {
        assert!(RE_BASE64_STRING.is_match(r#""TWFuIGlzIGRpc3Rpbmd1aXNoZWQ=""#));
    }

    #[test]
    fn test_dridex_pattern() {
        assert!(RE_DRIDEX.is_match("Chr(65)"));
        assert!(RE_DRIDEX.is_match("StrReverse(x)"));
    }
}