use crate::common::codepages;
use crate::error::Result;
use crate::vba::decompressor;
#[derive(Debug, Clone)]
pub struct VbaProject {
pub codepage: u16,
pub name: String,
pub description: String,
pub help_context: u32,
pub modules: Vec<ModuleDescriptor>,
pub references: Vec<VbaReference>,
}
#[derive(Debug, Clone)]
pub struct ModuleDescriptor {
pub name: String,
pub stream_name: String,
pub text_offset: u32,
pub module_type: ModuleType,
pub is_private: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ModuleType {
Standard,
Class,
Document,
Form,
}
#[derive(Debug, Clone)]
pub struct VbaReference {
pub name: String,
pub ref_type: ReferenceType,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReferenceType {
Registered,
Project,
Control,
Original,
}
const PROJECTSYSKIND: u16 = 0x0001;
const PROJECTLCID: u16 = 0x0002;
const PROJECTLCIDINVOKE: u16 = 0x0014;
const PROJECTCODEPAGE: u16 = 0x0003;
const PROJECTNAME: u16 = 0x0004;
const PROJECTDOCSTRING: u16 = 0x0005;
const PROJECTHELPFILEPATH: u16 = 0x0006;
const PROJECTHELPCONTEXT: u16 = 0x0007;
const PROJECTLIBFLAGS: u16 = 0x0008;
const PROJECTVERSION: u16 = 0x0009;
const PROJECTCONSTANTS: u16 = 0x000C;
const REFERENCEREGISTERED: u16 = 0x000D;
const REFERENCEPROJECT: u16 = 0x000E;
const REFERENCECONTROL: u16 = 0x002F;
const REFERENCEORIGINAL: u16 = 0x0033;
const REFERENCENAME: u16 = 0x0016;
const PROJECTMODULES: u16 = 0x000F;
const PROJECTCOOKIE: u16 = 0x0013;
const MODULENAME: u16 = 0x0019;
const MODULENAMEUNICODE: u16 = 0x0047;
const MODULESTREAMNAME: u16 = 0x001A;
const MODULEDOCSTRING: u16 = 0x001C;
const MODULEOFFSET: u16 = 0x0031;
const MODULEHELPCONTEXT: u16 = 0x001E;
const MODULECOOKIE: u16 = 0x002C;
const MODULETYPEPROCEDURAL: u16 = 0x0021;
const MODULETYPEDOCUMENT: u16 = 0x0022;
const MODULEPRIVATE: u16 = 0x0028;
const MODULEEND: u16 = 0x002B;
impl VbaProject {
pub fn from_dir_stream(compressed_dir: &[u8]) -> Result<Self> {
let dir_data = decompressor::decompress_stream(compressed_dir)?;
Self::parse_dir(&dir_data)
}
pub fn parse_dir(data: &[u8]) -> Result<Self> {
let mut pos = 0;
let mut codepage = 1252u16;
let mut name = String::new();
let mut description = String::new();
let mut help_context = 0u32;
let mut modules = Vec::new();
let mut references = Vec::new();
let mut current_ref_name = String::new();
while pos + 6 <= data.len() {
let record_id = read_u16(data, pos);
let record_size = read_u32(data, pos + 2) as usize;
let record_data_start = pos + 6;
let record_data_end = record_data_start + record_size;
if record_data_end > data.len() {
break;
}
match record_id {
PROJECTCODEPAGE => {
if record_size >= 2 {
codepage = read_u16(data, record_data_start);
}
pos = record_data_end;
}
PROJECTNAME => {
name = decode_bytes(data, record_data_start, record_size, codepage);
pos = record_data_end;
}
PROJECTDOCSTRING => {
description = decode_bytes(data, record_data_start, record_size, codepage);
pos = record_data_end;
if pos + 6 <= data.len() {
let _unicode_id = read_u16(data, pos);
let unicode_size = read_u32(data, pos + 2) as usize;
pos = pos + 6 + unicode_size;
}
}
PROJECTHELPFILEPATH => {
pos = record_data_end;
if pos + 6 <= data.len() {
let _unicode_id = read_u16(data, pos);
let unicode_size = read_u32(data, pos + 2) as usize;
pos = pos + 6 + unicode_size;
}
}
PROJECTHELPCONTEXT => {
if record_size >= 4 {
help_context = read_u32(data, record_data_start);
}
pos = record_data_end;
}
PROJECTVERSION => {
pos = record_data_start + 4 + 2;
}
PROJECTCONSTANTS => {
pos = record_data_end;
if pos + 6 <= data.len() {
let _unicode_id = read_u16(data, pos);
let unicode_size = read_u32(data, pos + 2) as usize;
pos = pos + 6 + unicode_size;
}
}
REFERENCENAME => {
current_ref_name = decode_bytes(data, record_data_start, record_size, codepage);
pos = record_data_end;
if pos + 6 <= data.len() {
let next_id = read_u16(data, pos);
if next_id == 0x003E {
let unicode_size = read_u32(data, pos + 2) as usize;
pos = pos + 6 + unicode_size;
}
}
}
REFERENCEREGISTERED => {
references.push(VbaReference {
name: current_ref_name.clone(),
ref_type: ReferenceType::Registered,
});
pos = record_data_end;
}
REFERENCEPROJECT => {
references.push(VbaReference {
name: current_ref_name.clone(),
ref_type: ReferenceType::Project,
});
pos = record_data_end;
}
REFERENCECONTROL => {
references.push(VbaReference {
name: current_ref_name.clone(),
ref_type: ReferenceType::Control,
});
pos = record_data_end;
Self::skip_control_reference(data, &mut pos);
}
REFERENCEORIGINAL => {
references.push(VbaReference {
name: current_ref_name.clone(),
ref_type: ReferenceType::Original,
});
pos = record_data_end;
}
PROJECTMODULES => {
pos = record_data_end;
break;
}
PROJECTSYSKIND | PROJECTLCID | PROJECTLCIDINVOKE | PROJECTLIBFLAGS => {
pos = record_data_end;
}
_ => {
pos = record_data_end;
}
}
}
if pos + 6 <= data.len() {
let id = read_u16(data, pos);
if id == PROJECTCOOKIE {
let size = read_u32(data, pos + 2) as usize;
pos = pos + 6 + size;
}
}
while pos + 6 <= data.len() {
let record_id = read_u16(data, pos);
let record_size = read_u32(data, pos + 2) as usize;
let record_data_start = pos + 6;
let record_data_end = record_data_start + record_size;
if record_data_end > data.len() {
break;
}
if record_id == MODULENAME {
let module = Self::parse_module(data, &mut pos, codepage)?;
modules.push(module);
} else {
pos = record_data_end;
}
}
Ok(VbaProject {
codepage,
name,
description,
help_context,
modules,
references,
})
}
fn parse_module(
data: &[u8],
pos: &mut usize,
codepage: u16,
) -> Result<ModuleDescriptor> {
let mut name = String::new();
let mut stream_name = String::new();
let mut text_offset = 0u32;
let mut module_type = ModuleType::Standard;
let mut is_private = false;
while *pos + 6 <= data.len() {
let record_id = read_u16(data, *pos);
let record_size = read_u32(data, *pos + 2) as usize;
let record_data_start = *pos + 6;
let record_data_end = record_data_start + record_size;
if record_data_end > data.len() && record_id != MODULEEND {
break;
}
match record_id {
MODULENAME => {
name = decode_bytes(data, record_data_start, record_size, codepage);
*pos = record_data_end;
}
MODULENAMEUNICODE => {
if record_size >= 2 {
let u16s: Vec<u16> = data[record_data_start..record_data_end]
.chunks_exact(2)
.map(|c| u16::from_le_bytes([c[0], c[1]]))
.collect();
if let Ok(s) = String::from_utf16(&u16s) {
name = s.trim_end_matches('\0').to_string();
}
}
*pos = record_data_end;
}
MODULESTREAMNAME => {
stream_name = decode_bytes(data, record_data_start, record_size, codepage);
*pos = record_data_end;
if *pos + 6 <= data.len() {
let _uid = read_u16(data, *pos);
let usize_ = read_u32(data, *pos + 2) as usize;
*pos = *pos + 6 + usize_;
}
}
MODULEDOCSTRING => {
*pos = record_data_end;
if *pos + 6 <= data.len() {
let _uid = read_u16(data, *pos);
let usize_ = read_u32(data, *pos + 2) as usize;
*pos = *pos + 6 + usize_;
}
}
MODULEOFFSET => {
if record_size >= 4 {
text_offset = read_u32(data, record_data_start);
}
*pos = record_data_end;
}
MODULEHELPCONTEXT | MODULECOOKIE => {
*pos = record_data_end;
}
MODULETYPEPROCEDURAL => {
module_type = ModuleType::Standard;
*pos = record_data_end;
}
MODULETYPEDOCUMENT => {
module_type = ModuleType::Document;
*pos = record_data_end;
}
MODULEPRIVATE => {
is_private = true;
*pos = record_data_end;
}
MODULEEND => {
*pos += 6; break;
}
_ => {
*pos = record_data_end;
}
}
}
Ok(ModuleDescriptor {
name,
stream_name,
text_offset,
module_type,
is_private,
})
}
fn skip_control_reference(data: &[u8], pos: &mut usize) {
while *pos + 6 <= data.len() {
let id = read_u16(data, *pos);
match id {
0x0030 => {
let size = read_u32(data, *pos + 2) as usize;
*pos = *pos + 6 + size;
}
0x0016 | 0x003E => {
let size = read_u32(data, *pos + 2) as usize;
*pos = *pos + 6 + size;
}
_ => break,
}
}
}
}
fn read_u16(data: &[u8], offset: usize) -> u16 {
if offset + 2 > data.len() {
return 0;
}
u16::from_le_bytes([data[offset], data[offset + 1]])
}
fn read_u32(data: &[u8], offset: usize) -> u32 {
if offset + 4 > data.len() {
return 0;
}
u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]])
}
fn decode_bytes(data: &[u8], offset: usize, len: usize, codepage: u16) -> String {
if offset + len > data.len() {
return String::new();
}
let bytes = &data[offset..offset + len];
if let Some(encoding) = codepages::codepage_to_encoding(codepage) {
let (decoded, _, _) = encoding.decode(bytes);
decoded.into_owned()
} else {
String::from_utf8_lossy(bytes).into_owned()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_u16() {
let data = [0x03, 0x00, 0xFF, 0x7F];
assert_eq!(read_u16(&data, 0), 3);
assert_eq!(read_u16(&data, 2), 0x7FFF);
}
#[test]
fn test_read_u32() {
let data = [0x01, 0x00, 0x00, 0x00];
assert_eq!(read_u32(&data, 0), 1);
}
#[test]
fn test_decode_bytes_utf8() {
let data = b"Hello";
let result = decode_bytes(data, 0, 5, 65001);
assert_eq!(result, "Hello");
}
#[test]
fn test_decode_bytes_windows_1252() {
let data = [0xC9, 0x6C, 0xE8, 0x76, 0x65]; let result = decode_bytes(&data, 0, 5, 1252);
assert!(result.len() > 0);
}
#[test]
fn test_module_type() {
assert_ne!(ModuleType::Standard, ModuleType::Class);
assert_ne!(ModuleType::Document, ModuleType::Form);
}
}