oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! OLE object extractor — high-level API for extracting embedded objects.
//!
//! Automatically detects OLE vs OOXML format, finds `\x01Ole10Native` streams,
//! external relationships, and custom UI parts.

use std::io::{Cursor, Read};

use crate::error::{Error, Result};
use crate::ole::container::OleFile;
use crate::oleobj::native_stream::OleNativeStream;
use crate::ooxml::relationships::{self, Relationship};

/// An embedded object extracted from a document.
#[derive(Debug, Clone)]
pub struct EmbeddedObject {
    /// Source stream or file path within the container.
    pub source: String,
    /// Original filename (if available).
    pub filename: String,
    /// Source path from OleNativeStream (if available).
    pub src_path: String,
    /// Temp path from OleNativeStream (if available).
    pub temp_path: String,
    /// The embedded data.
    pub data: Vec<u8>,
}

/// Information about a custom UI part found in the document.
#[derive(Debug, Clone)]
pub struct CustomUiInfo {
    /// Part name within the archive.
    pub part_name: String,
    /// Relationship type.
    pub rel_type: String,
}

/// High-level extractor for embedded OLE objects.
pub struct OleObjExtractor {
    data: Vec<u8>,
    is_ole: bool,
    is_ooxml: bool,
}

impl OleObjExtractor {
    /// Create an extractor from raw file data.
    /// Automatically detects OLE vs OOXML format.
    pub fn from_bytes(data: &[u8]) -> Result<Self> {
        let is_ole = OleFile::is_ole(data);
        let is_ooxml = data.len() >= 4 && data[0..4] == [0x50, 0x4B, 0x03, 0x04];

        if !is_ole && !is_ooxml {
            return Err(Error::UnsupportedFormat(
                "Not an OLE or OOXML file".into(),
            ));
        }

        Ok(Self {
            data: data.to_vec(),
            is_ole,
            is_ooxml,
        })
    }

    /// Extract all embedded OLE objects from the document.
    pub fn extract_objects(&self) -> Result<Vec<EmbeddedObject>> {
        if self.is_ole {
            self.extract_objects_ole()
        } else if self.is_ooxml {
            self.extract_objects_ooxml()
        } else {
            Ok(Vec::new())
        }
    }

    /// Find external relationships in the document (OOXML only).
    ///
    /// External relationships may point to remote resources and are
    /// used in various exploit techniques.
    pub fn find_external_relationships(&self) -> Result<Vec<Relationship>> {
        if !self.is_ooxml {
            return Ok(Vec::new());
        }

        let cursor = Cursor::new(&self.data);
        let mut archive = zip::ZipArchive::new(cursor)
            .map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;

        let mut all_external = Vec::new();

        // Find all .rels files
        let rels_files: Vec<String> = (0..archive.len())
            .filter_map(|i| {
                archive
                    .by_index(i)
                    .ok()
                    .filter(|e| e.name().ends_with(".rels"))
                    .map(|e| e.name().to_string())
            })
            .collect();

        for rels_path in &rels_files {
            let mut rels_data = Vec::new();
            if let Ok(mut entry) = archive.by_name(rels_path) {
                entry.read_to_end(&mut rels_data)?;
            }
            if rels_data.is_empty() {
                continue;
            }

            if let Ok(rels) = relationships::parse_relationships(&rels_data) {
                let external = relationships::find_external_relationships(&rels);
                for rel in external {
                    all_external.push(rel.clone());
                }
            }
        }

        Ok(all_external)
    }

    /// Find custom UI parts (potential CVE-2021-42292 indicator).
    ///
    /// Looks for customUI relationship types in OOXML documents.
    pub fn find_custom_ui(&self) -> Result<Vec<CustomUiInfo>> {
        if !self.is_ooxml {
            return Ok(Vec::new());
        }

        let cursor = Cursor::new(&self.data);
        let mut archive = zip::ZipArchive::new(cursor)
            .map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;

        let mut custom_ui = Vec::new();

        // Check for customUI parts
        let rels_files: Vec<String> = (0..archive.len())
            .filter_map(|i| {
                archive
                    .by_index(i)
                    .ok()
                    .filter(|e| e.name().ends_with(".rels"))
                    .map(|e| e.name().to_string())
            })
            .collect();

        for rels_path in &rels_files {
            let mut rels_data = Vec::new();
            if let Ok(mut entry) = archive.by_name(rels_path) {
                entry.read_to_end(&mut rels_data)?;
            }
            if rels_data.is_empty() {
                continue;
            }

            if let Ok(rels) = relationships::parse_relationships(&rels_data) {
                for rel in &rels {
                    if rel.rel_type.contains("customUI")
                        || rel.rel_type.contains("customui")
                    {
                        custom_ui.push(CustomUiInfo {
                            part_name: rel.target.clone(),
                            rel_type: rel.rel_type.clone(),
                        });
                    }
                }
            }
        }

        Ok(custom_ui)
    }

    // -----------------------------------------------------------------------
    // OLE extraction
    // -----------------------------------------------------------------------

    fn extract_objects_ole(&self) -> Result<Vec<EmbeddedObject>> {
        let mut ole = OleFile::from_bytes(&self.data)?;
        let streams = ole.list_streams();
        let mut objects = Vec::new();

        for stream_path in &streams {
            // Look for \x01Ole10Native streams
            let lower = stream_path.to_lowercase();
            if lower.contains("ole10native") {
                let data = ole.open_stream(stream_path)?;
                match OleNativeStream::parse(&data) {
                    Ok(native) => {
                        objects.push(EmbeddedObject {
                            source: stream_path.clone(),
                            filename: native.filename.clone(),
                            src_path: native.src_path.clone(),
                            temp_path: native.temp_path.clone(),
                            data: native.data,
                        });
                    }
                    Err(e) => {
                        log::debug!(
                            "Failed to parse OleNativeStream from {}: {}",
                            stream_path,
                            e
                        );
                    }
                }
            }
        }

        // Also check for ObjectPool storage
        if ole.exists("/ObjectPool") || ole.exists("ObjectPool") {
            let pool_streams: Vec<_> = streams
                .iter()
                .filter(|s| {
                    let lower = s.to_lowercase();
                    lower.starts_with("/objectpool") || lower.starts_with("objectpool")
                })
                .cloned()
                .collect();

            for stream_path in &pool_streams {
                if !stream_path.to_lowercase().contains("ole10native") {
                    // Try to read the raw stream
                    if ole.is_stream(stream_path)
                        && let Ok(data) = ole.open_stream(stream_path)
                            && !data.is_empty() {
                                objects.push(EmbeddedObject {
                                    source: stream_path.clone(),
                                    filename: String::new(),
                                    src_path: String::new(),
                                    temp_path: String::new(),
                                    data,
                                });
                            }
                }
            }
        }

        Ok(objects)
    }

    // -----------------------------------------------------------------------
    // OOXML extraction
    // -----------------------------------------------------------------------

    fn extract_objects_ooxml(&self) -> Result<Vec<EmbeddedObject>> {
        let cursor = Cursor::new(&self.data);
        let mut archive = zip::ZipArchive::new(cursor)
            .map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;

        let mut objects = Vec::new();

        // Find OLE object parts (typically in word/embeddings/ or xl/embeddings/)
        let ole_entries: Vec<String> = (0..archive.len())
            .filter_map(|i| {
                archive.by_index(i).ok().and_then(|e| {
                    let name = e.name().to_string();
                    let lower = name.to_lowercase();
                    if lower.contains("embeddings/") && lower.ends_with(".bin") {
                        Some(name)
                    } else {
                        None
                    }
                })
            })
            .collect();

        for entry_name in &ole_entries {
            let mut entry_data = Vec::new();
            if let Ok(mut entry) = archive.by_name(entry_name) {
                entry.read_to_end(&mut entry_data)?;
            }
            if entry_data.is_empty() {
                continue;
            }

            // Try to parse as OLE container
            if OleFile::is_ole(&entry_data) {
                let mut ole = OleFile::from_bytes(&entry_data)?;
                let streams = ole.list_streams();

                for stream_path in &streams {
                    if stream_path.to_lowercase().contains("ole10native") {
                        let data = ole.open_stream(stream_path)?;
                        match OleNativeStream::parse(&data) {
                            Ok(native) => {
                                objects.push(EmbeddedObject {
                                    source: format!("{}//{}", entry_name, stream_path),
                                    filename: native.filename.clone(),
                                    src_path: native.src_path.clone(),
                                    temp_path: native.temp_path.clone(),
                                    data: native.data,
                                });
                            }
                            Err(e) => {
                                log::debug!(
                                    "Failed to parse OleNativeStream from {}/{}: {}",
                                    entry_name,
                                    stream_path,
                                    e
                                );
                            }
                        }
                    }
                }
            } else {
                // Raw embedded data (not OLE)
                objects.push(EmbeddedObject {
                    source: entry_name.clone(),
                    filename: String::new(),
                    src_path: String::new(),
                    temp_path: String::new(),
                    data: entry_data,
                });
            }
        }

        Ok(objects)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_from_bytes_invalid() {
        let result = OleObjExtractor::from_bytes(&[0x00, 0x01, 0x02]);
        assert!(result.is_err());
    }

    #[test]
    fn test_from_bytes_empty() {
        let result = OleObjExtractor::from_bytes(&[]);
        assert!(result.is_err());
    }

    #[test]
    fn test_external_relationships_non_ooxml() {
        // Fake OLE header
        let mut data = vec![0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
        data.resize(512, 0);

        // This will fail at OLE parsing, but the extractor should be constructible
        // For non-OOXML, find_external_relationships returns empty
        if let Ok(extractor) = OleObjExtractor::from_bytes(&data) {
            let rels = extractor.find_external_relationships().unwrap();
            assert!(rels.is_empty());
        }
    }

    #[test]
    fn test_custom_ui_non_ooxml() {
        let mut data = vec![0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
        data.resize(512, 0);

        if let Ok(extractor) = OleObjExtractor::from_bytes(&data) {
            let ui = extractor.find_custom_ui().unwrap();
            assert!(ui.is_empty());
        }
    }

    #[test]
    fn test_embedded_object_debug() {
        let obj = EmbeddedObject {
            source: "test".into(),
            filename: "test.bin".into(),
            src_path: String::new(),
            temp_path: String::new(),
            data: vec![1, 2, 3],
        };
        assert!(format!("{:?}", obj).contains("test.bin"));
    }

    #[test]
    fn test_target_mode_filter() {
        use crate::ooxml::relationships::TargetMode;
        // Verify that TargetMode::External filtering logic works
        let rel = Relationship {
            id: "rId1".into(),
            rel_type: "test".into(),
            target: "http://evil.com".into(),
            target_mode: TargetMode::External,
        };
        assert_eq!(rel.target_mode, TargetMode::External);
    }
}