oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! OLE2 Compound Document container.
//!
//! Wraps the `cfb` crate to provide a simplified API for reading OLE files,
//! listing streams, and extracting CLSID and metadata.

use std::io::{Cursor, Read};
use std::path::Path;

use uuid::Uuid;

use crate::error::{Error, Result};
use crate::ole::clsid;

/// Represents an open OLE2 Compound Document.
pub struct OleFile {
    inner: cfb::CompoundFile<Cursor<Vec<u8>>>,
}

/// Information about an OLE entry including timestamps.
#[derive(Debug, Clone)]
pub struct OleEntryWithTimestamp {
    /// Full path within the OLE container.
    pub path: String,
    /// Whether this is a stream (data) or storage (directory).
    pub is_stream: bool,
    /// Size in bytes (0 for storages).
    pub size: u64,
    /// CLSID of this entry.
    pub clsid: Uuid,
    /// Creation timestamp.
    pub created: Option<std::time::SystemTime>,
    /// Modification timestamp.
    pub modified: Option<std::time::SystemTime>,
}

/// Information about an OLE stream or storage entry.
#[derive(Debug, Clone)]
pub struct OleEntry {
    /// Full path within the OLE container (e.g., "/Macros/VBA/ThisDocument").
    pub path: String,
    /// Whether this is a stream (data) or storage (directory).
    pub is_stream: bool,
    /// Size in bytes (0 for storages).
    pub size: u64,
    /// CLSID of this entry (usually only set on storages).
    pub clsid: Uuid,
}

impl OleFile {
    /// Open an OLE file from a filesystem path.
    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
        let data = std::fs::read(path)?;
        Self::from_bytes(&data)
    }

    /// Open an OLE file from a byte slice.
    pub fn from_bytes(data: &[u8]) -> Result<Self> {
        let cursor = Cursor::new(data.to_vec());
        let inner = cfb::CompoundFile::open(cursor)
            .map_err(|e| Error::InvalidOle(format!("Failed to open OLE file: {e}")))?;
        Ok(Self { inner })
    }

    /// Open an OLE file from a reader.
    pub fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
        let mut data = Vec::new();
        reader.read_to_end(&mut data)?;
        Self::from_bytes(&data)
    }

    /// Check if a byte slice starts with the OLE magic signature.
    pub fn is_ole(data: &[u8]) -> bool {
        data.len() >= 8 && data[0..8] == [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]
    }

    /// List all streams and storages in the OLE container.
    pub fn list_entries(&self) -> Vec<OleEntry> {
        let mut entries = Vec::new();
        let root = Path::new("/");
        Self::collect_entries(&self.inner, root, &mut entries);
        entries
    }

    /// List only stream paths (no storages).
    pub fn list_streams(&self) -> Vec<String> {
        self.list_entries()
            .into_iter()
            .filter(|e| e.is_stream)
            .map(|e| e.path)
            .collect()
    }

    /// Check if a stream exists at the given path.
    pub fn exists(&self, path: &str) -> bool {
        self.inner.is_stream(path)
            || self.inner.is_storage(path)
    }

    /// Check if the given path is a stream (data, not a directory).
    pub fn is_stream(&self, path: &str) -> bool {
        self.inner.is_stream(path)
    }

    /// Read the full contents of a stream.
    pub fn open_stream(&mut self, path: &str) -> Result<Vec<u8>> {
        let mut stream = self.inner.open_stream(path).map_err(|e| {
            Error::InvalidOle(format!("Failed to open stream '{path}': {e}"))
        })?;
        let mut buf = Vec::new();
        stream.read_to_end(&mut buf)?;
        Ok(buf)
    }

    /// Get the CLSID of the root entry.
    pub fn root_clsid(&self) -> Uuid {
        let root = self.inner.root_entry();
        root.clsid().to_owned()
    }

    /// Look up the root CLSID description, if known.
    pub fn root_clsid_description(&self) -> Option<&'static str> {
        clsid::lookup_clsid(&self.root_clsid())
    }

    /// Get the CLSID of an entry at the given path.
    pub fn entry_clsid(&self, path: &str) -> Option<Uuid> {
        self.inner.entry(path).ok().map(|e| e.clsid().to_owned())
    }

    /// List all entries with creation/modification timestamps.
    pub fn list_entries_with_timestamps(&self) -> Vec<OleEntryWithTimestamp> {
        let mut entries = Vec::new();
        let root = Path::new("/");
        Self::collect_entries_with_timestamps(&self.inner, root, &mut entries);
        entries
    }

    // Recursively collect entries with timestamps from the OLE tree.
    fn collect_entries_with_timestamps(
        cf: &cfb::CompoundFile<Cursor<Vec<u8>>>,
        dir: &Path,
        entries: &mut Vec<OleEntryWithTimestamp>,
    ) {
        if let Ok(iter) = cf.read_storage(dir) {
            let children: Vec<_> = iter.collect();
            for entry in children {
                let path_str = entry.path().to_string_lossy().to_string();
                let is_stream = entry.is_stream();
                let size = entry.len();
                let clsid = entry.clsid().to_owned();
                let created = Some(entry.created());
                let modified = Some(entry.modified());

                entries.push(OleEntryWithTimestamp {
                    path: path_str.clone(),
                    is_stream,
                    size,
                    clsid,
                    created,
                    modified,
                });

                if !is_stream {
                    Self::collect_entries_with_timestamps(cf, Path::new(&path_str), entries);
                }
            }
        }
    }

    // Recursively collect entries from the OLE tree.
    fn collect_entries(
        cf: &cfb::CompoundFile<Cursor<Vec<u8>>>,
        dir: &Path,
        entries: &mut Vec<OleEntry>,
    ) {
        if let Ok(iter) = cf.read_storage(dir) {
            let children: Vec<_> = iter.collect();
            for entry in children {
                let path_str = entry.path().to_string_lossy().to_string();
                let is_stream = entry.is_stream();
                let size = entry.len();
                let clsid = entry.clsid().to_owned();

                entries.push(OleEntry {
                    path: path_str.clone(),
                    is_stream,
                    size,
                    clsid,
                });

                if !is_stream {
                    Self::collect_entries(cf, Path::new(&path_str), entries);
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_is_ole_magic() {
        let ole_header = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
        assert!(OleFile::is_ole(&ole_header));
        assert!(!OleFile::is_ole(&[0x50, 0x4B, 0x03, 0x04])); // ZIP
        assert!(!OleFile::is_ole(&[0x00, 0x01]));
    }

    #[test]
    fn test_invalid_ole_data() {
        let result = OleFile::from_bytes(&[0x00, 0x01, 0x02, 0x03]);
        assert!(result.is_err());
    }

    #[test]
    fn test_empty_data() {
        let result = OleFile::from_bytes(&[]);
        assert!(result.is_err());
    }
}