use std::io::{Cursor, Read};
use std::path::Path;
use base64::Engine as _;
use crate::error::{Error, Result};
use crate::ole::container::OleFile;
use crate::vba::module::VbaModule;
use crate::vba::project::{ModuleType, VbaProject};
use crate::vba::scanner::{Finding, VbaScanner};
#[derive(Debug, Clone)]
pub struct MacroInfo {
pub filename: String,
pub stream_path: String,
pub name: String,
pub code: String,
pub module_type: ModuleType,
}
#[derive(Debug, Clone)]
pub struct AnalysisResults {
pub findings: Vec<Finding>,
pub macro_count: usize,
pub has_autoexec: bool,
pub has_suspicious: bool,
pub has_ioc: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SourceFormat {
Ole,
Ooxml,
FlatOpc,
Word2003Xml,
}
pub struct VbaParser {
data: Vec<u8>,
filename: String,
format: Option<SourceFormat>,
}
impl VbaParser {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref();
let data = std::fs::read(path)?;
let filename = path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let format = Self::detect_format(&data);
Ok(Self {
data,
filename,
format,
})
}
pub fn from_bytes(data: &[u8]) -> Result<Self> {
let format = Self::detect_format(data);
Ok(Self {
data: data.to_vec(),
filename: "<bytes>".to_string(),
format,
})
}
pub fn detect_vba_macros(&self) -> Result<bool> {
match self.format {
Some(SourceFormat::Ole) => self.detect_vba_ole(),
Some(SourceFormat::Ooxml) => self.detect_vba_ooxml(),
Some(SourceFormat::FlatOpc) => self.detect_vba_flatopc(),
Some(SourceFormat::Word2003Xml) => self.detect_vba_word2003xml(),
None => Err(Error::UnsupportedFormat(
"Cannot determine file format".into(),
)),
}
}
pub fn extract_macros(&self) -> Result<Vec<MacroInfo>> {
match self.format {
Some(SourceFormat::Ole) => self.extract_macros_ole(),
Some(SourceFormat::Ooxml) => self.extract_macros_ooxml(),
Some(SourceFormat::FlatOpc) => self.extract_macros_flatopc(),
Some(SourceFormat::Word2003Xml) => self.extract_macros_word2003xml(),
None => Err(Error::UnsupportedFormat(
"Cannot determine file format".into(),
)),
}
}
pub fn analyze(&self) -> Result<AnalysisResults> {
let macros = self.extract_macros()?;
let mut all_findings = Vec::new();
for macro_info in ¯os {
let findings = VbaScanner::scan(¯o_info.code);
all_findings.extend(findings);
}
let has_autoexec = all_findings
.iter()
.any(|f| f.finding_type == crate::vba::keywords::FindingType::AutoExec);
let has_suspicious = all_findings
.iter()
.any(|f| f.finding_type == crate::vba::keywords::FindingType::Suspicious);
let has_ioc = all_findings
.iter()
.any(|f| f.finding_type == crate::vba::keywords::FindingType::Ioc);
Ok(AnalysisResults {
findings: all_findings,
macro_count: macros.len(),
has_autoexec,
has_suspicious,
has_ioc,
})
}
fn detect_format(data: &[u8]) -> Option<SourceFormat> {
if OleFile::is_ole(data) {
return Some(SourceFormat::Ole);
}
if data.len() >= 4 && data[0..4] == [0x50, 0x4B, 0x03, 0x04] {
return Some(SourceFormat::Ooxml);
}
if let Ok(text) = std::str::from_utf8(&data[..std::cmp::min(data.len(), 1000)]) {
if text.contains("<?mso-application") || text.contains("pkg:package") {
return Some(SourceFormat::FlatOpc);
}
if text.contains("<?xml") && text.contains("urn:schemas-microsoft-com:office:word") {
return Some(SourceFormat::Word2003Xml);
}
if text.contains("<?xml")
&& (text.contains("w:wordDocument") || text.contains("w:document"))
{
return Some(SourceFormat::Word2003Xml);
}
}
None
}
fn detect_vba_ole(&self) -> Result<bool> {
let ole = OleFile::from_bytes(&self.data)?;
let streams = ole.list_streams();
Ok(streams.iter().any(|s| {
let lower = s.to_lowercase();
lower.contains("vba") && lower.ends_with("/dir")
}))
}
fn extract_macros_ole(&self) -> Result<Vec<MacroInfo>> {
let mut ole = OleFile::from_bytes(&self.data)?;
self.extract_macros_from_ole(&mut ole, "")
}
fn extract_macros_from_ole(
&self,
ole: &mut OleFile,
prefix: &str,
) -> Result<Vec<MacroInfo>> {
let streams = ole.list_streams();
let dir_stream_path = streams
.iter()
.find(|s| {
let lower = s.to_lowercase();
lower.ends_with("/dir") && lower.contains("vba")
})
.cloned();
let dir_path = match dir_stream_path {
Some(p) => p,
None => return Ok(Vec::new()),
};
let vba_storage = dir_path
.rsplit_once('/')
.map(|(parent, _)| parent.to_string())
.unwrap_or_default();
let dir_data = ole.open_stream(&dir_path)?;
let project = VbaProject::from_dir_stream(&dir_data)?;
let mut macros = Vec::new();
for module_desc in &project.modules {
let stream_path = if vba_storage.is_empty() {
module_desc.stream_name.clone()
} else {
format!("{}/{}", vba_storage, module_desc.stream_name)
};
let stream_data = match ole.open_stream(&stream_path) {
Ok(data) => data,
Err(_) => continue,
};
let source = VbaModule::extract_source(
&stream_data,
module_desc.text_offset,
project.codepage,
)
.unwrap_or_default();
if !source.trim().is_empty() {
macros.push(MacroInfo {
filename: format!("{}{}", prefix, self.filename),
stream_path: stream_path.clone(),
name: module_desc.name.clone(),
code: source,
module_type: module_desc.module_type,
});
}
}
Ok(macros)
}
fn detect_vba_ooxml(&self) -> Result<bool> {
let cursor = Cursor::new(&self.data);
let mut archive = zip::ZipArchive::new(cursor)
.map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;
for i in 0..archive.len() {
if let Ok(entry) = archive.by_index(i) {
let name = entry.name().to_lowercase();
if name.ends_with("vbaproject.bin") {
return Ok(true);
}
}
}
Ok(false)
}
fn extract_macros_ooxml(&self) -> Result<Vec<MacroInfo>> {
let cursor = Cursor::new(&self.data);
let mut archive = zip::ZipArchive::new(cursor)
.map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;
let mut macros = Vec::new();
let vba_entries: Vec<String> = (0..archive.len())
.filter_map(|i| {
archive
.by_index(i)
.ok()
.filter(|e| e.name().to_lowercase().ends_with("vbaproject.bin"))
.map(|e| e.name().to_string())
})
.collect();
for vba_path in vba_entries {
let mut vba_data = Vec::new();
{
let mut entry = archive
.by_name(&vba_path)
.map_err(|e| Error::InvalidOoxml(format!("Cannot read {vba_path}: {e}")))?;
entry.read_to_end(&mut vba_data)?;
}
let mut vba_ole = OleFile::from_bytes(&vba_data)?;
let prefix = format!("{}//", vba_path);
let extracted = self.extract_macros_from_ole(&mut vba_ole, &prefix)?;
macros.extend(extracted);
}
Ok(macros)
}
fn detect_vba_flatopc(&self) -> Result<bool> {
let text = String::from_utf8_lossy(&self.data);
Ok(text.contains("vbaProject") || text.contains("vbaData"))
}
fn extract_macros_flatopc(&self) -> Result<Vec<MacroInfo>> {
let text = String::from_utf8_lossy(&self.data);
let mut macros = Vec::new();
for pattern in &["<pkg:binaryData>", "<w:binData"] {
if let Some(start_idx) = text.find(pattern) {
let content_start = text[start_idx..].find('>').map(|i| start_idx + i + 1);
if let Some(cs) = content_start {
let content_end = text[cs..].find('<').map(|i| cs + i);
if let Some(ce) = content_end {
let b64_content: String =
text[cs..ce].chars().filter(|c| !c.is_whitespace()).collect();
if let Ok(vba_data) =
base64::engine::general_purpose::STANDARD.decode(&b64_content)
&& OleFile::is_ole(&vba_data) {
let mut vba_ole = OleFile::from_bytes(&vba_data)?;
let extracted =
self.extract_macros_from_ole(&mut vba_ole, "FlatOPC//")?;
macros.extend(extracted);
}
}
}
}
}
Ok(macros)
}
fn detect_vba_word2003xml(&self) -> Result<bool> {
let text = String::from_utf8_lossy(&self.data);
Ok(text.contains("Microsoft.VBA") || text.contains("w:binData"))
}
fn extract_macros_word2003xml(&self) -> Result<Vec<MacroInfo>> {
self.extract_macros_flatopc()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_format_ole() {
let ole_header = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0x00, 0x00];
assert_eq!(VbaParser::detect_format(&ole_header), Some(SourceFormat::Ole));
}
#[test]
fn test_detect_format_zip() {
let zip_header = [0x50, 0x4B, 0x03, 0x04, 0x00, 0x00];
assert_eq!(VbaParser::detect_format(&zip_header), Some(SourceFormat::Ooxml));
}
#[test]
fn test_detect_format_unknown() {
let unknown = [0x00, 0x01, 0x02, 0x03];
assert_eq!(VbaParser::detect_format(&unknown), None);
}
#[test]
fn test_parser_invalid_data() {
let result = VbaParser::from_bytes(&[0x00, 0x01, 0x02, 0x03]);
assert!(result.is_ok()); let parser = result.unwrap();
assert!(parser.detect_vba_macros().is_err()); }
}