use std::collections::HashMap;
use std::io::Read;
use std::path::PathBuf;
use byteorder::{LittleEndian, ReadBytesExt};
use log::Level;
use cfb::{Cfb, XlsEncoding};
use utils::read_u16;
#[derive(Debug)]
pub enum VbaError {
Cfb(::cfb::CfbError),
Io(::std::io::Error),
ModuleNotFound(String),
Unknown {
typ: &'static str,
val: u16,
},
LibId,
InvalidRecordId {
expected: u16,
found: u16,
},
}
from_err!(::cfb::CfbError, VbaError, Cfb);
from_err!(::std::io::Error, VbaError, Io);
impl std::fmt::Display for VbaError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
VbaError::Io(e) => write!(f, "I/O error: {}", e),
VbaError::Cfb(e) => write!(f, "Cfb error: {}", e),
VbaError::ModuleNotFound(e) => write!(f, "Cannot find module '{}'", e),
VbaError::Unknown { typ, val } => write!(f, "Unknown {} '{:X}'", typ, val),
VbaError::LibId => write!(f, "Unexpected libid format"),
VbaError::InvalidRecordId { expected, found } => write!(
f,
"Invalid record id: expecting {:X} found {:X}",
expected, found
),
}
}
}
impl std::error::Error for VbaError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
VbaError::Io(e) => Some(e),
VbaError::Cfb(e) => Some(e),
_ => None,
}
}
}
#[allow(dead_code)]
#[derive(Clone)]
pub struct VbaProject {
references: Vec<Reference>,
modules: HashMap<String, Vec<u8>>,
encoding: XlsEncoding,
}
impl VbaProject {
pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject, VbaError> {
let mut cfb = Cfb::new(r, len)?;
VbaProject::from_cfb(r, &mut cfb)
}
pub fn from_cfb<R: Read>(r: &mut R, cfb: &mut Cfb) -> Result<VbaProject, VbaError> {
let stream = cfb.get_stream("dir", r)?;
let stream = ::cfb::decompress_stream(&*stream)?;
let stream = &mut &*stream;
let encoding = read_dir_information(stream)?;
let refs = Reference::from_stream(stream, &encoding)?;
let mods: Vec<Module> = read_modules(stream, &encoding)?;
let modules: HashMap<String, Vec<u8>> = mods
.into_iter()
.map(|m| {
cfb.get_stream(&m.stream_name, r).and_then(|s| {
::cfb::decompress_stream(&s[m.text_offset..]).map(move |s| (m.name, s))
})
})
.collect::<Result<HashMap<_, _>, _>>()?;
Ok(VbaProject {
references: refs,
modules: modules,
encoding: encoding,
})
}
pub fn get_references(&self) -> &[Reference] {
&self.references
}
pub fn get_module_names(&self) -> Vec<&str> {
self.modules.keys().map(|k| &**k).collect()
}
pub fn get_module(&self, name: &str) -> Result<String, VbaError> {
debug!("read module {}", name);
let data = self.get_module_raw(name)?;
Ok(self.encoding.decode_all(data))
}
pub fn get_module_raw(&self, name: &str) -> Result<&[u8], VbaError> {
match self.modules.get(name) {
Some(m) => Ok(&**m),
None => return Err(VbaError::ModuleNotFound(name.into())),
}
}
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Reference {
pub name: String,
pub description: String,
pub path: PathBuf,
}
impl Reference {
pub fn is_missing(&self) -> bool {
!self.path.exists()
}
fn from_stream(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Reference>, VbaError> {
debug!("read all references metadata");
let mut references = Vec::new();
let mut reference = Reference {
name: "".to_string(),
description: "".to_string(),
path: "".into(),
};
loop {
let check = stream.read_u16::<LittleEndian>();
match check? {
0x000F => {
if !reference.name.is_empty() {
references.push(reference);
}
break;
}
0x0016 => {
if !reference.name.is_empty() {
references.push(reference);
}
let name = read_variable_record(stream, 1)?;
let name = encoding.decode_all(name);
reference = Reference {
name: name.clone(),
description: name,
path: "".into(),
};
check_variable_record(0x003E, stream)?;
}
0x0033 => {
reference.set_libid(stream, encoding)?;
}
0x002F => {
*stream = &stream[4..];
reference.set_libid(stream, encoding)?;
*stream = &stream[6..];
match stream.read_u16::<LittleEndian>()? {
0x0016 => {
read_variable_record(stream, 1)?;
check_variable_record(0x003E, stream)?;
check_record(0x0030, stream)?;
}
0x0030 => (),
e => {
return Err(VbaError::Unknown {
typ: "token in reference control",
val: e,
});
}
}
*stream = &stream[4..];
reference.set_libid(stream, encoding)?;
*stream = &stream[26..];
}
0x000D => {
*stream = &stream[4..];
reference.set_libid(stream, encoding)?;
*stream = &stream[6..];
}
0x000E => {
*stream = &stream[4..];
let absolute = read_variable_record(stream, 1)?;
{
let absolute = encoding.decode_all(absolute);
reference.path = if absolute.starts_with("*\\C") {
absolute[3..].into()
} else {
absolute.into()
};
}
read_variable_record(stream, 1)?;
*stream = &stream[6..];
}
c => {
return Err(VbaError::Unknown {
typ: "check id",
val: c,
});
}
}
}
debug!("references: {:#?}", references);
Ok(references)
}
fn set_libid(&mut self, stream: &mut &[u8], encoding: &XlsEncoding) -> Result<(), VbaError> {
let libid = read_variable_record(stream, 1)?;
if libid.is_empty() || libid.ends_with(b"##") {
return Ok(());
}
let libid = encoding.decode_all(libid);
let mut parts = libid.rsplit('#');
match (parts.next(), parts.next()) {
(Some(desc), Some(path)) => {
self.description = desc.into();
if !path.is_empty() && self.path.as_os_str().is_empty() {
self.path = path.into();
}
Ok(())
}
_ => return Err(VbaError::LibId),
}
}
}
#[derive(Debug, Clone, Default)]
struct Module {
name: String,
stream_name: String,
text_offset: usize,
}
fn read_dir_information(stream: &mut &[u8]) -> Result<XlsEncoding, VbaError> {
debug!("read dir header");
*stream = &stream[30..];
let encoding = XlsEncoding::from_codepage(read_u16(&stream[6..8]))?;
*stream = &stream[8..];
check_variable_record(0x0004, stream)?;
check_variable_record(0x0005, stream)?;
check_variable_record(0x0040, stream)?;
check_variable_record(0x0006, stream)?;
check_variable_record(0x003D, stream)?;
*stream = &stream[32..];
check_variable_record(0x000C, stream)?;
check_variable_record(0x003C, stream)?;
Ok(encoding)
}
fn read_modules(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Module>, VbaError> {
debug!("read all modules metadata");
*stream = &stream[4..];
let module_len = stream.read_u16::<LittleEndian>()? as usize;
*stream = &stream[8..];
let mut modules = Vec::with_capacity(module_len);
for _ in 0..module_len {
let name = check_variable_record(0x0019, stream)?;
let name = encoding.decode_all(name);
check_variable_record(0x0047, stream)?;
let stream_name = check_variable_record(0x001A, stream)?;
let stream_name = encoding.decode_all(stream_name);
check_variable_record(0x0032, stream)?;
check_variable_record(0x001C, stream)?;
check_variable_record(0x0048, stream)?;
check_record(0x0031, stream)?;
*stream = &stream[4..];
let offset = stream.read_u32::<LittleEndian>()? as usize;
check_record(0x001E, stream)?;
*stream = &stream[8..];
check_record(0x002C, stream)?;
*stream = &stream[6..];
match stream.read_u16::<LittleEndian>()? {
0x0021 |
0x0022 => (),
e => return Err(VbaError::Unknown { typ: "module typ", val: e }),
}
loop {
*stream = &stream[4..];
match stream.read_u16::<LittleEndian>() {
Ok(0x0025) | Ok(0x0028) => (),
Ok(0x002B) => break,
Ok(e) => return Err(VbaError::Unknown { typ: "record id", val: e }),
Err(e) => return Err(VbaError::Io(e)),
}
}
*stream = &stream[4..];
modules.push(Module {
name: name,
stream_name: stream_name,
text_offset: offset,
});
}
Ok(modules)
}
fn read_variable_record<'a>(r: &mut &'a [u8], mult: usize) -> Result<&'a [u8], VbaError> {
let len = r.read_u32::<LittleEndian>()? as usize * mult;
let (read, next) = r.split_at(len);
*r = next;
Ok(read)
}
fn check_variable_record<'a>(id: u16, r: &mut &'a [u8]) -> Result<&'a [u8], VbaError> {
check_record(id, r)?;
let record = read_variable_record(r, 1)?;
if log_enabled!(Level::Warn) && record.len() > 100_000 {
warn!(
"record id {} as a suspicious huge length of {} (hex: {:x})",
id,
record.len(),
record.len() as u32
);
}
Ok(record)
}
fn check_record(id: u16, r: &mut &[u8]) -> Result<(), VbaError> {
debug!("check record {:x}", id);
let record_id = r.read_u16::<LittleEndian>()?;
if record_id != id {
Err(VbaError::InvalidRecordId {
expected: id,
found: record_id,
})
} else {
Ok(())
}
}