oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! MacroRaptor analyzer implementation.
//!
//! Port of oletools/mraptor.py — heuristic A/W/X detection.

use std::sync::LazyLock;

use regex::Regex;

use crate::error::Result;
use crate::vba::parser::VbaParser;

// ---------------------------------------------------------------------------
// Regex patterns ported from mraptor.py
// ---------------------------------------------------------------------------

// AutoExec triggers — procedures that run automatically
static RE_AUTOEXEC: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?:^|\n)\s*(?:(?:Private|Public)\s+)?(?:Sub|Function)\s+(?:Auto(?:Open|Close|Exec|Exit|_Open|_Close)|Document_?(?:Open|Close|BeforeClose|BeforeSave|Change|ContentControlOnEnter)|Workbook_(?:Open|Activate|BeforeClose|BeforeSave|Deactivate)|(?:This)?(?:Workbook|Document)_(?:Open|Close|Activate|BeforeClose|BeforeSave|Deactivate)|Sheet_(?:Activate|Change|Calculate|Deactivate|BeforeDelete|FollowHyperlink|SelectionChange)|CommandButton\d*_Click|UserForm_(?:Initialize|Activate|Click))\s*\("
    ).unwrap()
});

// Additional AutoExec patterns for module-level attributes
static RE_AUTOEXEC_ATTR: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r#"(?im)^\s*Attribute\s+\S+\.VB_ProcData\.VB_Invoke_Func\s*=\s*"[^"]*[aA]"#
    ).unwrap()
});

// Write operations — file system and registry modifications
static RE_WRITE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)\b(?:Open\s+.+\s+For\s+(?:Output|Append|Binary)\b|Write\s*#|Put\s*#|Print\s*#|FileCopy\b|CopyFile\b|CopyFolder\b|CreateTextFile\b|CreateFolder\b|SaveAs\b|SaveToFile\b|\.Save\b|\.Write\b|\.WriteText\b|\.SaveAs\b|MkDir\b|RmDir\b|Kill\b|SetStringValue\b|SetValueEx\b|RegWrite\b|FileSystemObject|ADODB\.Stream|Scripting\.FileSystemObject)"
    ).unwrap()
});

// Execute operations — process creation, shell commands
static RE_EXECUTE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)\b(?:Shell\b|CreateObject\b|GetObject\b|CallByName\b|Eval\b|Run\b|MacScript\b|WScript\.Shell|ShellExecute\b|\.Exec\b|\.Run\b|Environ\b|PowerShell|cmd\.exe|cmd\s*/c|wscript|cscript|mshta|ExecuteExcel4Macro\b|Application\.OnTime\b)"
    ).unwrap()
});

// ---------------------------------------------------------------------------
// Flags and result types
// ---------------------------------------------------------------------------

/// Bit flags indicating which categories were detected.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MRaptorFlags {
    /// AutoExec trigger detected.
    pub autoexec: bool,
    /// Write operation detected.
    pub write: bool,
    /// Execute operation detected.
    pub execute: bool,
}

impl MRaptorFlags {
    /// Check if the macro is suspicious: A AND (W OR X).
    pub fn is_suspicious(&self) -> bool {
        self.autoexec && (self.write || self.execute)
    }
}

/// Overall result of macro analysis.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MRaptorResult {
    /// No macros found in the document.
    NoMacro,
    /// Macros found but not suspicious.
    Clean,
    /// Macros flagged as suspicious (A AND (W OR X)).
    Suspicious,
}

// ---------------------------------------------------------------------------
// MacroRaptor
// ---------------------------------------------------------------------------

/// MacroRaptor analyzer.
pub struct MacroRaptor;

impl MacroRaptor {
    /// Scan VBA source code for A/W/X patterns.
    ///
    /// Strips VBA comments and string literals before matching
    /// to reduce false positives.
    pub fn scan_code(code: &str) -> MRaptorFlags {
        let cleaned = Self::strip_comments_and_strings(code);

        let autoexec =
            RE_AUTOEXEC.is_match(&cleaned) || RE_AUTOEXEC_ATTR.is_match(&cleaned);
        let write = RE_WRITE.is_match(&cleaned);
        let execute = RE_EXECUTE.is_match(&cleaned);

        MRaptorFlags {
            autoexec,
            write,
            execute,
        }
    }

    /// Scan a file (OLE or OOXML) for suspicious macros.
    ///
    /// Extracts all VBA code, then runs `scan_code` on each module.
    /// Returns the aggregate result and combined flags.
    pub fn scan_file(data: &[u8]) -> Result<(MRaptorResult, MRaptorFlags)> {
        let parser = VbaParser::from_bytes(data)?;

        let has_macros = parser.detect_vba_macros()?;
        if !has_macros {
            return Ok((
                MRaptorResult::NoMacro,
                MRaptorFlags {
                    autoexec: false,
                    write: false,
                    execute: false,
                },
            ));
        }

        let macros = parser.extract_macros()?;
        if macros.is_empty() {
            return Ok((
                MRaptorResult::NoMacro,
                MRaptorFlags {
                    autoexec: false,
                    write: false,
                    execute: false,
                },
            ));
        }

        let mut combined = MRaptorFlags {
            autoexec: false,
            write: false,
            execute: false,
        };

        for m in &macros {
            let flags = Self::scan_code(&m.code);
            combined.autoexec |= flags.autoexec;
            combined.write |= flags.write;
            combined.execute |= flags.execute;
        }

        let result = if combined.is_suspicious() {
            MRaptorResult::Suspicious
        } else {
            MRaptorResult::Clean
        };

        Ok((result, combined))
    }

    /// Strip VBA comments (lines starting with ' or Rem) and string literals
    /// to avoid false positive matches inside comments/strings.
    fn strip_comments_and_strings(code: &str) -> String {
        let mut result = String::with_capacity(code.len());

        for line in code.lines() {
            let trimmed = line.trim();

            // Skip full-line comments
            if trimmed.starts_with('\'') || trimmed.starts_with("Rem ") || trimmed.eq_ignore_ascii_case("Rem") {
                result.push('\n');
                continue;
            }

            // Process character by character to handle inline comments and strings
            let mut chars = line.chars().peekable();
            let mut in_string = false;

            while let Some(ch) = chars.next() {
                if in_string {
                    if ch == '"' {
                        // Check for escaped double-quote ""
                        if chars.peek() == Some(&'"') {
                            chars.next();
                        } else {
                            in_string = false;
                        }
                    }
                    // Skip string content
                } else if ch == '"' {
                    in_string = true;
                    // Skip string content
                } else if ch == '\'' {
                    // Inline comment — skip rest of line
                    break;
                } else {
                    result.push(ch);
                }
            }
            result.push('\n');
        }

        result
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_autoexec_and_shell_suspicious() {
        let code = r#"
Sub AutoOpen()
    Shell "cmd.exe /c calc"
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(flags.execute);
        assert!(flags.is_suspicious());
    }

    #[test]
    fn test_autoexec_and_write_suspicious() {
        let code = r#"
Sub Document_Open()
    Open "C:\test.txt" For Output As #1
    Print #1, "malicious"
    Close #1
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(flags.write);
        assert!(flags.is_suspicious());
    }

    #[test]
    fn test_autoexec_only_clean() {
        let code = r#"
Sub AutoOpen()
    MsgBox "Hello World"
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(!flags.write);
        assert!(!flags.execute);
        assert!(!flags.is_suspicious());
    }

    #[test]
    fn test_no_macro_empty() {
        let code = "";
        let flags = MacroRaptor::scan_code(code);
        assert!(!flags.autoexec);
        assert!(!flags.write);
        assert!(!flags.execute);
        assert!(!flags.is_suspicious());
    }

    #[test]
    fn test_write_and_execute_without_autoexec() {
        let code = r#"
Sub MyFunc()
    Shell "cmd.exe"
    FileCopy src, dst
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(!flags.autoexec);
        assert!(flags.write);
        assert!(flags.execute);
        assert!(!flags.is_suspicious(), "Without AutoExec, not suspicious");
    }

    #[test]
    fn test_strip_comments() {
        let code = r#"
' This is a comment with Shell in it
Sub AutoOpen()
    ' Shell "cmd.exe" - this should not match
    MsgBox "hello"
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(!flags.execute, "Shell in comment should not match");
    }

    #[test]
    fn test_strip_string_literals() {
        let code = r#"
Sub AutoOpen()
    x = "Shell cmd.exe CreateObject"
    MsgBox x
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(!flags.execute, "Shell inside string literal should not match");
    }

    #[test]
    fn test_workbook_open() {
        let code = r#"
Sub Workbook_Open()
    CreateObject("WScript.Shell").Run "calc"
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(flags.execute);
        assert!(flags.is_suspicious());
    }

    #[test]
    fn test_document_close_with_filesystemobject() {
        let code = r#"
Sub Document_Close()
    Set fso = CreateObject("Scripting.FileSystemObject")
    fso.CopyFile "a.txt", "b.txt"
End Sub
"#;
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(flags.write);
        assert!(flags.execute);
        assert!(flags.is_suspicious());
    }

    #[test]
    fn test_mraptor_result_enum() {
        assert_ne!(MRaptorResult::NoMacro, MRaptorResult::Clean);
        assert_ne!(MRaptorResult::Clean, MRaptorResult::Suspicious);
    }

    #[test]
    fn test_flags_default_not_suspicious() {
        let flags = MRaptorFlags {
            autoexec: false,
            write: false,
            execute: false,
        };
        assert!(!flags.is_suspicious());
    }

    #[test]
    fn test_rem_comment_stripped() {
        let code = "Rem Shell CreateObject\nSub AutoOpen()\nEnd Sub\n";
        let flags = MacroRaptor::scan_code(code);
        assert!(flags.autoexec);
        assert!(!flags.execute, "Rem comment content should be stripped");
    }
}