oletools_rs 0.1.0

Rust port of oletools — analysis tools for Microsoft Office files (VBA macros, DDE, OLE objects, RTF exploits)
Documentation
//! DDE detection in Word OOXML (.docx) files.
//!
//! Scans XML for:
//! - `w:fldChar` + `w:instrText` combinations
//! - `w:fldSimple` elements with `w:instr` attribute

use std::io::{Cursor, Read};

use quick_xml::events::Event;
use quick_xml::Reader;

use crate::error::{Error, Result};
use crate::msodde::field_parser::{self, DdeField};

/// Scan a Word OOXML (.docx) file for DDE fields.
pub fn process_docx(data: &[u8]) -> Result<Vec<DdeField>> {
    let cursor = Cursor::new(data);
    let mut archive = zip::ZipArchive::new(cursor)
        .map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;

    let mut fields = Vec::new();

    // Find document XML parts
    let xml_parts: Vec<String> = (0..archive.len())
        .filter_map(|i| {
            archive.by_index(i).ok().and_then(|e| {
                let name = e.name().to_string();
                let lower = name.to_lowercase();
                if (lower.contains("word/document") || lower.contains("word/header")
                    || lower.contains("word/footer"))
                    && lower.ends_with(".xml")
                {
                    Some(name)
                } else {
                    None
                }
            })
        })
        .collect();

    for part_name in &xml_parts {
        let mut xml_data = Vec::new();
        if let Ok(mut entry) = archive.by_name(part_name) {
            entry.read_to_end(&mut xml_data)?;
        }
        if xml_data.is_empty() {
            continue;
        }

        let part_fields = extract_fields_from_xml(&xml_data)?;
        fields.extend(part_fields);
    }

    Ok(fields)
}

/// Extract DDE fields from Word XML content.
fn extract_fields_from_xml(xml_data: &[u8]) -> Result<Vec<DdeField>> {
    let mut reader = Reader::from_reader(Cursor::new(xml_data));
    reader.config_mut().trim_text(true);

    let mut fields = Vec::new();
    let mut buf = Vec::new();
    let mut in_field = false;
    let mut current_instruction = String::new();

    loop {
        match reader.read_event_into(&mut buf) {
            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
                let local_name =
                    String::from_utf8_lossy(e.local_name().as_ref()).to_string();

                match local_name.as_str() {
                    "fldChar" => {
                        // Check fldCharType attribute
                        for attr in e.attributes().flatten() {
                            let key = String::from_utf8_lossy(attr.key.local_name().as_ref())
                                .to_string();
                            if key == "fldCharType" {
                                let value =
                                    String::from_utf8_lossy(&attr.value).to_string();
                                match value.as_str() {
                                    "begin" => {
                                        in_field = true;
                                        current_instruction.clear();
                                    }
                                    "end" => {
                                        if in_field {
                                            process_instruction(
                                                &current_instruction,
                                                &mut fields,
                                            );
                                            in_field = false;
                                            current_instruction.clear();
                                        }
                                    }
                                    _ => {}
                                }
                            }
                        }
                    }
                    "fldSimple" => {
                        // Check w:instr attribute
                        for attr in e.attributes().flatten() {
                            let key = String::from_utf8_lossy(attr.key.local_name().as_ref())
                                .to_string();
                            if key == "instr" {
                                let value =
                                    String::from_utf8_lossy(&attr.value).to_string();
                                process_instruction(&value, &mut fields);
                            }
                        }
                    }
                    _ => {}
                }
            }
            Ok(Event::Text(ref e)) => {
                if in_field {
                    current_instruction
                        .push_str(&e.unescape().unwrap_or_default());
                }
            }
            Ok(Event::Eof) => break,
            Err(e) => {
                return Err(Error::XmlParsing(format!("Error parsing Word XML: {e}")));
            }
            _ => {}
        }
        buf.clear();
    }

    Ok(fields)
}

fn process_instruction(instruction: &str, fields: &mut Vec<DdeField>) {
    let trimmed = instruction.trim();
    if field_parser::is_dde_field(trimmed)
        && let Some(dde) = field_parser::parse_dde_field(trimmed) {
            fields.push(dde);
        }

    // Check for QUOTE-encoded DDE
    if let Some(decoded) = field_parser::decode_quote_field(trimmed)
        && field_parser::is_dde_field(&decoded)
            && let Some(mut dde) = field_parser::parse_dde_field(&decoded) {
                dde.quote_decoded = Some(decoded);
                fields.push(dde);
            }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_fldsimple_dde() {
        let xml = br#"<?xml version="1.0"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
  <w:body>
    <w:p>
      <w:fldSimple w:instr=" DDEAUTO cmd.exe /c calc ">
        <w:r><w:t>result</w:t></w:r>
      </w:fldSimple>
    </w:p>
  </w:body>
</w:document>"#;

        let fields = extract_fields_from_xml(xml).unwrap();
        assert_eq!(fields.len(), 1);
        assert_eq!(fields[0].source, "cmd.exe");
    }

    #[test]
    fn test_extract_fldchar_dde() {
        let xml = br#"<?xml version="1.0"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
  <w:body>
    <w:p>
      <w:r><w:fldChar w:fldCharType="begin"/></w:r>
      <w:r><w:instrText> DDEAUTO Excel Sheet1!R1C1 </w:instrText></w:r>
      <w:r><w:fldChar w:fldCharType="end"/></w:r>
    </w:p>
  </w:body>
</w:document>"#;

        let fields = extract_fields_from_xml(xml).unwrap();
        assert_eq!(fields.len(), 1);
        assert_eq!(fields[0].source, "Excel");
    }

    #[test]
    fn test_extract_no_dde() {
        let xml = br#"<?xml version="1.0"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
  <w:body>
    <w:p>
      <w:fldSimple w:instr=" DATE \@ &quot;yyyy-MM-dd&quot; ">
        <w:r><w:t>2024-01-01</w:t></w:r>
      </w:fldSimple>
    </w:p>
  </w:body>
</w:document>"#;

        let fields = extract_fields_from_xml(xml).unwrap();
        assert!(fields.is_empty());
    }

    #[test]
    fn test_extract_empty_xml() {
        let xml = br#"<?xml version="1.0"?><w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"/>"#;
        let fields = extract_fields_from_xml(xml).unwrap();
        assert!(fields.is_empty());
    }
}