oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! OleNativeStream parser (MS-OLEDS 2.3.6).
//!
//! Parses the binary format of `\x01Ole10Native` streams embedded in OLE
//! documents, extracting the original filename, paths, and embedded data.

use crate::error::{Error, Result};

/// Parsed OLE Native Stream.
#[derive(Debug, Clone)]
pub struct OleNativeStream {
    /// Total size of native data (from header).
    pub native_data_size: u32,
    /// Flags (2 bytes).
    pub flags: u16,
    /// Original filename (null-terminated string from stream).
    pub filename: String,
    /// Source path (null-terminated string from stream).
    pub src_path: String,
    /// Temporary path (null-terminated string from stream).
    pub temp_path: String,
    /// Actual size of embedded data.
    pub actual_size: u32,
    /// The embedded data itself.
    pub data: Vec<u8>,
}

impl OleNativeStream {
    /// Parse an OleNativeStream from raw bytes.
    ///
    /// Binary layout:
    /// - native_data_size: u32 LE
    /// - flags: u16 LE
    /// - filename: null-terminated ANSI string
    /// - src_path: null-terminated ANSI string
    /// - unknown_long_1: u32 LE
    /// - unknown_long_2: u32 LE
    /// - temp_path: null-terminated ANSI string
    /// - actual_size: u32 LE
    /// - data: [u8; actual_size]
    pub fn parse(data: &[u8]) -> Result<Self> {
        if data.len() < 6 {
            return Err(Error::OleObjectParsing(
                "OleNativeStream data too short".into(),
            ));
        }

        let mut pos = 0;

        // native_data_size
        let native_data_size = read_u32_le(data, &mut pos)?;

        // Check if we have enough data for the rest
        if pos + 2 > data.len() {
            return Err(Error::OleObjectParsing(
                "OleNativeStream truncated after native_data_size".into(),
            ));
        }

        // flags
        let flags = read_u16_le(data, &mut pos)?;

        // filename (null-terminated)
        let filename = read_null_terminated_string(data, &mut pos)?;

        // src_path (null-terminated)
        let src_path = read_null_terminated_string(data, &mut pos)?;

        // unknown_long_1 and unknown_long_2 (skip 8 bytes)
        if pos + 8 > data.len() {
            return Err(Error::OleObjectParsing(
                "OleNativeStream truncated at unknown longs".into(),
            ));
        }
        pos += 8;

        // temp_path (null-terminated)
        let temp_path = read_null_terminated_string(data, &mut pos)?;

        // actual_size
        let actual_size = read_u32_le(data, &mut pos)?;

        // data
        let end = pos + actual_size as usize;
        if end > data.len() {
            // Use whatever data is available
            let available = &data[pos..];
            return Ok(Self {
                native_data_size,
                flags,
                filename,
                src_path,
                temp_path,
                actual_size,
                data: available.to_vec(),
            });
        }

        let embedded_data = data[pos..end].to_vec();

        Ok(Self {
            native_data_size,
            flags,
            filename,
            src_path,
            temp_path,
            actual_size,
            data: embedded_data,
        })
    }

    /// Get the file extension from the filename, if any.
    pub fn extension(&self) -> Option<&str> {
        self.filename.rsplit('.').next()
    }
}

// ---------------------------------------------------------------------------
// Helper functions
// ---------------------------------------------------------------------------

fn read_u32_le(data: &[u8], pos: &mut usize) -> Result<u32> {
    if *pos + 4 > data.len() {
        return Err(Error::OleObjectParsing("Unexpected end of data reading u32".into()));
    }
    let val = u32::from_le_bytes([data[*pos], data[*pos + 1], data[*pos + 2], data[*pos + 3]]);
    *pos += 4;
    Ok(val)
}

fn read_u16_le(data: &[u8], pos: &mut usize) -> Result<u16> {
    if *pos + 2 > data.len() {
        return Err(Error::OleObjectParsing("Unexpected end of data reading u16".into()));
    }
    let val = u16::from_le_bytes([data[*pos], data[*pos + 1]]);
    *pos += 2;
    Ok(val)
}

fn read_null_terminated_string(data: &[u8], pos: &mut usize) -> Result<String> {
    let start = *pos;
    while *pos < data.len() {
        if data[*pos] == 0 {
            let s = String::from_utf8_lossy(&data[start..*pos]).to_string();
            *pos += 1; // skip null
            return Ok(s);
        }
        *pos += 1;
    }
    // Reached end of data without null terminator
    let s = String::from_utf8_lossy(&data[start..]).to_string();
    Ok(s)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn build_native_stream(
        filename: &str,
        src_path: &str,
        temp_path: &str,
        payload: &[u8],
    ) -> Vec<u8> {
        let mut buf = Vec::new();

        // native_data_size (placeholder)
        buf.extend_from_slice(&0u32.to_le_bytes());
        // flags
        buf.extend_from_slice(&0x0002u16.to_le_bytes());
        // filename + null
        buf.extend_from_slice(filename.as_bytes());
        buf.push(0);
        // src_path + null
        buf.extend_from_slice(src_path.as_bytes());
        buf.push(0);
        // unknown_long_1, unknown_long_2
        buf.extend_from_slice(&0u32.to_le_bytes());
        buf.extend_from_slice(&0u32.to_le_bytes());
        // temp_path + null
        buf.extend_from_slice(temp_path.as_bytes());
        buf.push(0);
        // actual_size
        let sz = payload.len() as u32;
        buf.extend_from_slice(&sz.to_le_bytes());
        // data
        buf.extend_from_slice(payload);

        // Patch native_data_size
        let total = (buf.len() - 4) as u32;
        buf[0..4].copy_from_slice(&total.to_le_bytes());

        buf
    }

    #[test]
    fn test_parse_valid_stream() {
        let payload = b"Hello, embedded world!";
        let data = build_native_stream("test.doc", "C:\\test.doc", "C:\\tmp\\test.doc", payload);

        let stream = OleNativeStream::parse(&data).unwrap();
        assert_eq!(stream.filename, "test.doc");
        assert_eq!(stream.src_path, "C:\\test.doc");
        assert_eq!(stream.temp_path, "C:\\tmp\\test.doc");
        assert_eq!(stream.actual_size, payload.len() as u32);
        assert_eq!(stream.data, payload);
    }

    #[test]
    fn test_parse_empty_payload() {
        let data = build_native_stream("empty.bin", "", "", &[]);

        let stream = OleNativeStream::parse(&data).unwrap();
        assert_eq!(stream.filename, "empty.bin");
        assert_eq!(stream.actual_size, 0);
        assert!(stream.data.is_empty());
    }

    #[test]
    fn test_parse_truncated_data() {
        let result = OleNativeStream::parse(&[0x01, 0x00]);
        assert!(result.is_err());
    }

    #[test]
    fn test_parse_too_short() {
        let result = OleNativeStream::parse(&[]);
        assert!(result.is_err());
    }

    #[test]
    fn test_extension() {
        let data = build_native_stream("payload.exe", "", "", b"MZ");
        let stream = OleNativeStream::parse(&data).unwrap();
        assert_eq!(stream.extension(), Some("exe"));
    }

    #[test]
    fn test_truncated_payload() {
        let payload = b"Full data here";
        let mut data = build_native_stream("test.bin", "", "", payload);
        // Truncate: remove last 5 bytes so actual_size > available
        data.truncate(data.len() - 5);

        let stream = OleNativeStream::parse(&data).unwrap();
        assert_eq!(stream.actual_size, payload.len() as u32);
        // Should have partial data
        assert!(stream.data.len() < payload.len());
    }
}