oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! OLE 1.0 Object parser (MS-OLEDS 2.2).
//!
//! Parses OLE 1.0 embedded objects which use a length-prefixed string format
//! for class name, topic name, and item name, followed by embedded data.

use crate::error::{Error, Result};

/// Parsed OLE 1.0 Object.
#[derive(Debug, Clone)]
pub struct OleObject {
    /// OLE version (typically 0x00000501 for OLE 1.0).
    pub ole_version: u32,
    /// Format ID (1 = linked, 2 = embedded, 3 = static).
    pub format_id: u32,
    /// Class name (e.g., "Package", "OLE2Link", "Equation.3").
    pub class_name: String,
    /// Topic name (often the original filename).
    pub topic_name: String,
    /// Item name.
    pub item_name: String,
    /// Size of embedded data.
    pub data_size: u32,
    /// The embedded data.
    pub data: Vec<u8>,
}

/// Format IDs for OLE 1.0 objects.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OleFormatId {
    /// Linked object.
    Linked,
    /// Embedded object.
    Embedded,
    /// Static (presentation only).
    Static,
    /// Unknown format ID.
    Unknown(u32),
}

impl From<u32> for OleFormatId {
    fn from(val: u32) -> Self {
        match val {
            1 => OleFormatId::Linked,
            2 => OleFormatId::Embedded,
            3 => OleFormatId::Static,
            other => OleFormatId::Unknown(other),
        }
    }
}

impl OleObject {
    /// Parse an OLE 1.0 object from raw binary data.
    ///
    /// Binary layout:
    /// - ole_version: u32 LE
    /// - format_id: u32 LE
    /// - class_name_len: u32 LE, then class_name bytes (ANSI, includes null)
    /// - topic_name_len: u32 LE, then topic_name bytes
    /// - item_name_len: u32 LE, then item_name bytes
    /// - data_size: u32 LE
    /// - data: [u8; data_size]
    pub fn parse(data: &[u8]) -> Result<Self> {
        if data.len() < 8 {
            return Err(Error::OleObjectParsing("OLE object data too short".into()));
        }

        let mut pos = 0;

        let ole_version = read_u32_le(data, &mut pos)?;
        let format_id = read_u32_le(data, &mut pos)?;

        // class_name (length-prefixed)
        let class_name = read_length_prefixed_string(data, &mut pos)?;

        // topic_name (length-prefixed)
        let topic_name = read_length_prefixed_string(data, &mut pos)?;

        // item_name (length-prefixed)
        let item_name = read_length_prefixed_string(data, &mut pos)?;

        // data_size
        let data_size = read_u32_le(data, &mut pos)?;

        // data
        let end = pos + data_size as usize;
        let obj_data = if end > data.len() {
            data[pos..].to_vec()
        } else {
            data[pos..end].to_vec()
        };

        Ok(Self {
            ole_version,
            format_id,
            class_name,
            topic_name,
            item_name,
            data_size,
            data: obj_data,
        })
    }

    /// Get the format ID as a typed enum.
    pub fn format(&self) -> OleFormatId {
        OleFormatId::from(self.format_id)
    }

    /// Check if the class name indicates a "Package" object
    /// (which wraps an OleNativeStream with an embedded file).
    pub fn is_package(&self) -> bool {
        self.class_name.eq_ignore_ascii_case("Package")
    }

    /// Check if the class name indicates an OLE2Link
    /// (potentially CVE-2017-0199).
    pub fn is_ole2link(&self) -> bool {
        self.class_name.eq_ignore_ascii_case("OLE2Link")
    }

    /// Check if the class name indicates an Equation Editor object
    /// (potentially CVE-2017-11882).
    pub fn is_equation(&self) -> bool {
        self.class_name
            .to_ascii_lowercase()
            .starts_with("equation.")
    }
}

// ---------------------------------------------------------------------------
// Helper functions
// ---------------------------------------------------------------------------

fn read_u32_le(data: &[u8], pos: &mut usize) -> Result<u32> {
    if *pos + 4 > data.len() {
        return Err(Error::OleObjectParsing(
            "Unexpected end of data reading u32".into(),
        ));
    }
    let val = u32::from_le_bytes([data[*pos], data[*pos + 1], data[*pos + 2], data[*pos + 3]]);
    *pos += 4;
    Ok(val)
}

fn read_length_prefixed_string(data: &[u8], pos: &mut usize) -> Result<String> {
    let len = read_u32_le(data, pos)? as usize;
    if len == 0 {
        return Ok(String::new());
    }
    if *pos + len > data.len() {
        return Err(Error::OleObjectParsing(
            "Unexpected end of data reading length-prefixed string".into(),
        ));
    }

    // The string includes a null terminator — strip it
    let end = if len > 0 && data[*pos + len - 1] == 0 {
        *pos + len - 1
    } else {
        *pos + len
    };

    let s = String::from_utf8_lossy(&data[*pos..end]).to_string();
    *pos += len;
    Ok(s)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn build_ole_object(
        class_name: &str,
        topic_name: &str,
        item_name: &str,
        payload: &[u8],
    ) -> Vec<u8> {
        let mut buf = Vec::new();

        // ole_version
        buf.extend_from_slice(&0x00000501u32.to_le_bytes());
        // format_id (embedded)
        buf.extend_from_slice(&2u32.to_le_bytes());

        // class_name (length-prefixed, includes null)
        let cn_len = (class_name.len() + 1) as u32;
        buf.extend_from_slice(&cn_len.to_le_bytes());
        buf.extend_from_slice(class_name.as_bytes());
        buf.push(0);

        // topic_name
        let tn_len = (topic_name.len() + 1) as u32;
        buf.extend_from_slice(&tn_len.to_le_bytes());
        buf.extend_from_slice(topic_name.as_bytes());
        buf.push(0);

        // item_name
        let in_len = (item_name.len() + 1) as u32;
        buf.extend_from_slice(&in_len.to_le_bytes());
        buf.extend_from_slice(item_name.as_bytes());
        buf.push(0);

        // data_size
        buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
        // data
        buf.extend_from_slice(payload);

        buf
    }

    #[test]
    fn test_parse_valid_object() {
        let payload = b"\x01\x02\x03\x04";
        let data = build_ole_object("Package", "test.doc", "item1", payload);

        let obj = OleObject::parse(&data).unwrap();
        assert_eq!(obj.ole_version, 0x00000501);
        assert_eq!(obj.format_id, 2);
        assert_eq!(obj.class_name, "Package");
        assert_eq!(obj.topic_name, "test.doc");
        assert_eq!(obj.item_name, "item1");
        assert_eq!(obj.data_size, 4);
        assert_eq!(obj.data, payload);
    }

    #[test]
    fn test_is_package() {
        let data = build_ole_object("Package", "", "", &[]);
        let obj = OleObject::parse(&data).unwrap();
        assert!(obj.is_package());
    }

    #[test]
    fn test_is_ole2link() {
        let data = build_ole_object("OLE2Link", "", "", &[]);
        let obj = OleObject::parse(&data).unwrap();
        assert!(obj.is_ole2link());
    }

    #[test]
    fn test_is_equation() {
        let data = build_ole_object("Equation.3", "", "", &[]);
        let obj = OleObject::parse(&data).unwrap();
        assert!(obj.is_equation());
    }

    #[test]
    fn test_format_id() {
        let data = build_ole_object("Test", "", "", &[]);
        let obj = OleObject::parse(&data).unwrap();
        assert_eq!(obj.format(), OleFormatId::Embedded);
    }

    #[test]
    fn test_parse_too_short() {
        let result = OleObject::parse(&[0x01, 0x00]);
        assert!(result.is_err());
    }

    #[test]
    fn test_parse_empty() {
        let result = OleObject::parse(&[]);
        assert!(result.is_err());
    }

    #[test]
    fn test_empty_names() {
        let mut buf = Vec::new();
        buf.extend_from_slice(&0x00000501u32.to_le_bytes());
        buf.extend_from_slice(&2u32.to_le_bytes());
        // All empty strings (length = 0)
        buf.extend_from_slice(&0u32.to_le_bytes());
        buf.extend_from_slice(&0u32.to_le_bytes());
        buf.extend_from_slice(&0u32.to_le_bytes());
        buf.extend_from_slice(&0u32.to_le_bytes()); // data_size = 0

        let obj = OleObject::parse(&buf).unwrap();
        assert_eq!(obj.class_name, "");
        assert_eq!(obj.data_size, 0);
    }
}