oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! OLE metadata extraction (olemeta).
//!
//! Extracts document properties from OLE SummaryInformation and
//! DocumentSummaryInformation streams.

use crate::error::Result;
use crate::ole::container::OleFile;

/// Document metadata extracted from an OLE file.
#[derive(Debug, Clone, Default)]
pub struct OleMetadata {
    pub title: Option<String>,
    pub subject: Option<String>,
    pub author: Option<String>,
    pub keywords: Option<String>,
    pub comments: Option<String>,
    pub last_saved_by: Option<String>,
    pub revision_number: Option<String>,
    pub application: Option<String>,
    pub creation_date: Option<String>,
    pub last_saved_date: Option<String>,
    pub num_pages: Option<u32>,
    pub num_words: Option<u32>,
    pub num_chars: Option<u32>,
    pub security: Option<u32>,
}

/// Summary Information property stream path.
const SUMMARY_INFORMATION: &str = "/\x05SummaryInformation";
/// Document Summary Information property stream path.
const DOC_SUMMARY_INFORMATION: &str = "/\x05DocumentSummaryInformation";

impl OleMetadata {
    /// Extract metadata from an OLE file.
    pub fn extract(ole: &mut OleFile) -> Result<Self> {
        let mut meta = Self::default();

        // Extract from SummaryInformation stream
        if ole.is_stream(SUMMARY_INFORMATION)
            && let Ok(data) = ole.open_stream(SUMMARY_INFORMATION) {
                meta.parse_summary_information(&data);
            }

        // Extract from DocumentSummaryInformation stream
        if ole.is_stream(DOC_SUMMARY_INFORMATION)
            && let Ok(data) = ole.open_stream(DOC_SUMMARY_INFORMATION) {
                meta.parse_doc_summary_information(&data);
            }

        Ok(meta)
    }

    /// Parse SummaryInformation property set stream.
    ///
    /// Format: MS-OLEPS (Office Logical Property Set) specification.
    fn parse_summary_information(&mut self, data: &[u8]) {
        // Validate minimum size: byte order (2) + version (2) + OS version (4) +
        // CLSID (16) + num sections (4) = 28 bytes
        if data.len() < 28 {
            return;
        }

        // Check byte order mark
        let byte_order = u16::from_le_bytes([data[0], data[1]]);
        if byte_order != 0xFFFE {
            return;
        }

        let num_sections = u32::from_le_bytes([data[24], data[25], data[26], data[27]]) as usize;
        if num_sections == 0 {
            return;
        }

        // First section offset is at byte 48 (after FMTID)
        if data.len() < 48 {
            return;
        }
        let section_offset =
            u32::from_le_bytes([data[44], data[45], data[46], data[47]]) as usize;

        self.parse_property_section(data, section_offset, false);
    }

    /// Parse DocumentSummaryInformation property set stream.
    fn parse_doc_summary_information(&mut self, data: &[u8]) {
        if data.len() < 28 {
            return;
        }

        let byte_order = u16::from_le_bytes([data[0], data[1]]);
        if byte_order != 0xFFFE {
            return;
        }

        let num_sections = u32::from_le_bytes([data[24], data[25], data[26], data[27]]) as usize;
        if num_sections == 0 {
            return;
        }

        if data.len() < 48 {
            return;
        }
        let section_offset =
            u32::from_le_bytes([data[44], data[45], data[46], data[47]]) as usize;

        self.parse_property_section(data, section_offset, true);
    }

    fn parse_property_section(&mut self, data: &[u8], offset: usize, _is_doc_summary: bool) {
        if offset + 8 > data.len() {
            return;
        }

        let num_properties =
            u32::from_le_bytes([data[offset + 4], data[offset + 5], data[offset + 6], data[offset + 7]])
                as usize;

        for i in 0..num_properties {
            let entry_offset = offset + 8 + i * 8;
            if entry_offset + 8 > data.len() {
                break;
            }

            let prop_id = u32::from_le_bytes([
                data[entry_offset],
                data[entry_offset + 1],
                data[entry_offset + 2],
                data[entry_offset + 3],
            ]);
            let prop_offset = u32::from_le_bytes([
                data[entry_offset + 4],
                data[entry_offset + 5],
                data[entry_offset + 6],
                data[entry_offset + 7],
            ]) as usize;

            let abs_offset = offset + prop_offset;
            if abs_offset + 4 > data.len() {
                continue;
            }

            let value = self.read_property_value(data, abs_offset);

            // SummaryInformation property IDs
            match prop_id {
                0x02 => self.title = value,         // PIDSI_TITLE
                0x03 => self.subject = value,       // PIDSI_SUBJECT
                0x04 => self.author = value,        // PIDSI_AUTHOR
                0x05 => self.keywords = value,      // PIDSI_KEYWORDS
                0x06 => self.comments = value,      // PIDSI_COMMENTS
                0x08 => self.last_saved_by = value,  // PIDSI_LASTAUTHOR
                0x09 => self.revision_number = value, // PIDSI_REVNUMBER
                0x12 => self.application = value,    // PIDSI_APPNAME
                _ => {}
            }
        }
    }

    /// Read a property value from the given offset.
    /// Returns a string representation for string types, None otherwise.
    fn read_property_value(&self, data: &[u8], offset: usize) -> Option<String> {
        if offset + 8 > data.len() {
            return None;
        }

        let vt_type = u32::from_le_bytes([
            data[offset],
            data[offset + 1],
            data[offset + 2],
            data[offset + 3],
        ]);

        match vt_type {
            // VT_LPSTR (0x1E)
            0x1E => {
                let str_len = u32::from_le_bytes([
                    data[offset + 4],
                    data[offset + 5],
                    data[offset + 6],
                    data[offset + 7],
                ]) as usize;

                if offset + 8 + str_len > data.len() {
                    return None;
                }

                let bytes = &data[offset + 8..offset + 8 + str_len];
                // Trim trailing null bytes
                let trimmed = bytes.split(|&b| b == 0).next().unwrap_or(bytes);
                let (decoded, _, _) = encoding_rs::WINDOWS_1252.decode(trimmed);
                Some(decoded.into_owned())
            }
            // VT_LPWSTR (0x1F)
            0x1F => {
                let char_count = u32::from_le_bytes([
                    data[offset + 4],
                    data[offset + 5],
                    data[offset + 6],
                    data[offset + 7],
                ]) as usize;

                let byte_len = char_count * 2;
                if offset + 8 + byte_len > data.len() {
                    return None;
                }

                let bytes = &data[offset + 8..offset + 8 + byte_len];
                let u16s: Vec<u16> = bytes
                    .chunks_exact(2)
                    .map(|c| u16::from_le_bytes([c[0], c[1]]))
                    .collect();
                String::from_utf16(&u16s).ok().map(|s| {
                    s.trim_end_matches('\0').to_string()
                })
            }
            _ => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_default_metadata() {
        let meta = OleMetadata::default();
        assert!(meta.title.is_none());
        assert!(meta.author.is_none());
    }
}