oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! RTF object parser — extracts OLE objects from RTF documents.
//!
//! Uses `RtfParser` to find `objdata` destinations, decodes hex data,
//! and parses `OleObject` / `OleNativeStream` from the binary content.

use crate::error::{Error, Result};
use crate::oleobj::native_stream::OleNativeStream;
use crate::oleobj::ole_object::OleObject;
use crate::rtfobj::cve::CveDetection;
use crate::rtfobj::parser::RtfParser;

/// An OLE object extracted from an RTF document.
#[derive(Debug, Clone)]
pub struct RtfObject {
    /// Index of this object in the RTF (0-based).
    pub index: usize,
    /// Raw binary data from the objdata destination.
    pub raw_data: Vec<u8>,
    /// Parsed OLE 1.0 object (if parsing succeeded).
    pub ole_object: Option<OleObject>,
    /// Parsed OLE Native Stream (if class_name is "Package").
    pub native_stream: Option<OleNativeStream>,
    /// CVE detections for this object.
    pub cve_detections: Vec<CveDetection>,
}

/// RTF OLE object extractor.
pub struct RtfObjParser;

impl RtfObjParser {
    /// Extract all OLE objects from an RTF document.
    pub fn extract(data: &[u8]) -> Result<Vec<RtfObject>> {
        let parse_result = RtfParser::parse(data)?;

        let mut objects = Vec::new();
        let mut index = 0;

        for dest in &parse_result.destinations {
            if dest.name != "objdata" {
                continue;
            }

            // Decode hex data
            let raw_data = match Self::decode_hex(&dest.hex_data) {
                Ok(d) => d,
                Err(e) => {
                    log::debug!("Failed to decode objdata hex: {}", e);
                    continue;
                }
            };

            if raw_data.is_empty() {
                continue;
            }

            // Try to parse as OLE 1.0 object
            let ole_object = OleObject::parse(&raw_data).ok();

            // Check for CVE patterns
            let mut cve_detections = Vec::new();
            if let Some(ref obj) = ole_object {
                cve_detections = crate::rtfobj::cve::detect_cves(obj);
            }

            // If class_name is "Package", parse OleNativeStream from the OLE data
            let native_stream = ole_object.as_ref().and_then(|obj| {
                if obj.is_package() && !obj.data.is_empty() {
                    OleNativeStream::parse(&obj.data).ok()
                } else {
                    None
                }
            });

            objects.push(RtfObject {
                index,
                raw_data,
                ole_object,
                native_stream,
                cve_detections,
            });

            index += 1;
        }

        Ok(objects)
    }

    /// Decode hex string to bytes.
    ///
    /// Filters out whitespace and non-hex characters.
    /// If the result has odd length, drops the last nibble.
    fn decode_hex(hex_str: &str) -> Result<Vec<u8>> {
        // Filter only hex characters
        let clean: String = hex_str
            .chars()
            .filter(|c| c.is_ascii_hexdigit())
            .collect();

        if clean.is_empty() {
            return Ok(Vec::new());
        }

        // If odd length, drop last nibble
        let end = if !clean.len().is_multiple_of(2) {
            clean.len() - 1
        } else {
            clean.len()
        };

        hex::decode(&clean[..end]).map_err(|e| Error::RtfParsing(format!("Hex decode error: {e}")))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_decode_hex_valid() {
        let result = RtfObjParser::decode_hex("48656c6c6f").unwrap();
        assert_eq!(result, b"Hello");
    }

    #[test]
    fn test_decode_hex_with_whitespace() {
        let result = RtfObjParser::decode_hex("48 65 6c 6c 6f").unwrap();
        assert_eq!(result, b"Hello");
    }

    #[test]
    fn test_decode_hex_odd_length() {
        let result = RtfObjParser::decode_hex("48656c6c6f0").unwrap();
        // Drops last nibble, so "48656c6c6f" = "Hello"
        assert_eq!(result, b"Hello");
    }

    #[test]
    fn test_decode_hex_empty() {
        let result = RtfObjParser::decode_hex("").unwrap();
        assert!(result.is_empty());
    }

    #[test]
    fn test_extract_not_rtf() {
        let result = RtfObjParser::extract(b"Not RTF");
        assert!(result.is_err());
    }

    #[test]
    fn test_extract_no_objects() {
        let rtf = br"{\rtf1 Hello World}";
        let objects = RtfObjParser::extract(rtf).unwrap();
        assert!(objects.is_empty());
    }

    #[test]
    fn test_extract_with_objdata() {
        // Build a minimal RTF with an objdata containing an OLE 1.0 object
        // OLE version=0x501, format_id=2, class_name="Test\0", topic=""(0), item=""(0), data_size=0
        let ole_bytes: Vec<u8> = vec![
            0x01, 0x05, 0x00, 0x00, // ole_version
            0x02, 0x00, 0x00, 0x00, // format_id (embedded)
            0x05, 0x00, 0x00, 0x00, // class_name_len = 5
            b'T', b'e', b's', b't', 0x00, // "Test\0"
            0x00, 0x00, 0x00, 0x00, // topic_name_len = 0
            0x00, 0x00, 0x00, 0x00, // item_name_len = 0
            0x00, 0x00, 0x00, 0x00, // data_size = 0
        ];

        let hex_str = hex::encode(&ole_bytes);
        let rtf = format!(r"{{\rtf1 {{\object {{\objdata {}}}}}}}", hex_str);

        let objects = RtfObjParser::extract(rtf.as_bytes()).unwrap();
        assert_eq!(objects.len(), 1);
        assert!(objects[0].ole_object.is_some());
        let obj = objects[0].ole_object.as_ref().unwrap();
        assert_eq!(obj.class_name, "Test");
    }

    #[test]
    fn test_extract_ole2link_cve() {
        // OLE object with class_name "OLE2Link"
        let ole_bytes: Vec<u8> = vec![
            0x01, 0x05, 0x00, 0x00,
            0x02, 0x00, 0x00, 0x00,
            0x09, 0x00, 0x00, 0x00,
            b'O', b'L', b'E', b'2', b'L', b'i', b'n', b'k', 0x00,
            0x00, 0x00, 0x00, 0x00,
            0x00, 0x00, 0x00, 0x00,
            0x00, 0x00, 0x00, 0x00,
        ];

        let hex_str = hex::encode(&ole_bytes);
        let rtf = format!(r"{{\rtf1 {{\object {{\objdata {}}}}}}}", hex_str);

        let objects = RtfObjParser::extract(rtf.as_bytes()).unwrap();
        assert_eq!(objects.len(), 1);
        assert!(!objects[0].cve_detections.is_empty());
        assert!(objects[0]
            .cve_detections
            .iter()
            .any(|c| c.cve_id == "CVE-2017-0199"));
    }
}