use std::collections::HashMap;
use std::io::Read;
use std::path::PathBuf;
use byteorder::{LittleEndian, ReadBytesExt};
use log::Level;
use cfb::{Cfb, XlsEncoding};
use utils::read_u16;
#[derive(Debug, Fail)]
pub enum VbaError {
#[fail(display = "{}", _0)]
Cfb(#[cause] ::cfb::CfbError),
#[fail(display = "{}", _0)]
Io(#[cause] ::std::io::Error),
#[fail(display = "Cannot find module '{}'", _0)]
ModuleNotFound(String),
#[fail(display = "Unknown {} '{:X}'", typ, val)]
Unknown {
typ: &'static str,
val: u16,
},
#[fail(display = "Unexpected libid format")]
LibId,
#[fail(display = "Invalid record id: expecting {:X} found {:X}", expected, found)]
InvalidRecordId {
expected: u16,
found: u16,
},
}
from_err!(::cfb::CfbError, VbaError, Cfb);
from_err!(::std::io::Error, VbaError, Io);
#[allow(dead_code)]
#[derive(Clone)]
pub struct VbaProject {
references: Vec<Reference>,
modules: HashMap<String, Vec<u8>>,
encoding: XlsEncoding,
}
impl VbaProject {
pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject, VbaError> {
let mut cfb = Cfb::new(r, len)?;
VbaProject::from_cfb(r, &mut cfb)
}
pub fn from_cfb<R: Read>(r: &mut R, cfb: &mut Cfb) -> Result<VbaProject, VbaError> {
let stream = cfb.get_stream("dir", r)?;
let stream = ::cfb::decompress_stream(&*stream)?;
let stream = &mut &*stream;
let encoding = read_dir_information(stream)?;
let refs = Reference::from_stream(stream, &encoding)?;
let mods: Vec<Module> = read_modules(stream, &encoding)?;
let modules: HashMap<String, Vec<u8>> = mods
.into_iter()
.map(|m| {
cfb.get_stream(&m.stream_name, r).and_then(|s| {
::cfb::decompress_stream(&s[m.text_offset..]).map(move |s| (m.name, s))
})
})
.collect::<Result<HashMap<_, _>, _>>()?;
Ok(VbaProject {
references: refs,
modules: modules,
encoding: encoding,
})
}
pub fn get_references(&self) -> &[Reference] {
&self.references
}
pub fn get_module_names(&self) -> Vec<&str> {
self.modules.keys().map(|k| &**k).collect()
}
pub fn get_module(&self, name: &str) -> Result<String, VbaError> {
debug!("read module {}", name);
let data = self.get_module_raw(name)?;
Ok(self.encoding.decode_all(data))
}
pub fn get_module_raw(&self, name: &str) -> Result<&[u8], VbaError> {
match self.modules.get(name) {
Some(m) => Ok(&**m),
None => return Err(VbaError::ModuleNotFound(name.into())),
}
}
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Reference {
pub name: String,
pub description: String,
pub path: PathBuf,
}
impl Reference {
pub fn is_missing(&self) -> bool {
!self.path.exists()
}
fn from_stream(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Reference>, VbaError> {
debug!("read all references metadata");
let mut references = Vec::new();
let mut reference = Reference {
name: "".to_string(),
description: "".to_string(),
path: "".into(),
};
loop {
let check = stream.read_u16::<LittleEndian>();
match check? {
0x000F => {
if !reference.name.is_empty() {
references.push(reference);
}
break;
}
0x0016 => {
if !reference.name.is_empty() {
references.push(reference);
}
let name = read_variable_record(stream, 1)?;
let name = encoding.decode_all(name);
reference = Reference {
name: name.clone(),
description: name,
path: "".into(),
};
check_variable_record(0x003E, stream)?; }
0x0033 => {
reference.set_libid(stream, encoding)?;
}
0x002F => {
*stream = &stream[4..]; reference.set_libid(stream, encoding)?;
*stream = &stream[6..];
match stream.read_u16::<LittleEndian>()? {
0x0016 => {
read_variable_record(stream, 1)?; check_variable_record(0x003E, stream)?; check_record(0x0030, stream)?;
}
0x0030 => (),
e => {
return Err(VbaError::Unknown {
typ: "token in reference control",
val: e,
})
}
}
*stream = &stream[4..];
reference.set_libid(stream, encoding)?;
*stream = &stream[26..];
}
0x000D => {
*stream = &stream[4..];
reference.set_libid(stream, encoding)?;
*stream = &stream[6..];
}
0x000E => {
*stream = &stream[4..];
let absolute = read_variable_record(stream, 1)?; {
let absolute = encoding.decode_all(absolute);
reference.path = if absolute.starts_with("*\\C") {
absolute[3..].into()
} else {
absolute.into()
};
}
read_variable_record(stream, 1)?; *stream = &stream[6..];
}
c => {
return Err(VbaError::Unknown {
typ: "check id",
val: c,
})
}
}
}
debug!("references: {:#?}", references);
Ok(references)
}
fn set_libid(&mut self, stream: &mut &[u8], encoding: &XlsEncoding) -> Result<(), VbaError> {
let libid = read_variable_record(stream, 1)?; if libid.is_empty() || libid.ends_with(b"##") {
return Ok(());
}
let libid = encoding.decode_all(libid);
let mut parts = libid.rsplit('#');
match (parts.next(), parts.next()) {
(Some(desc), Some(path)) => {
self.description = desc.into();
if !path.is_empty() && self.path.as_os_str().is_empty() {
self.path = path.into();
}
Ok(())
}
_ => return Err(VbaError::LibId),
}
}
}
#[derive(Debug, Clone, Default)]
struct Module {
name: String,
stream_name: String,
text_offset: usize,
}
fn read_dir_information(stream: &mut &[u8]) -> Result<XlsEncoding, VbaError> {
debug!("read dir header");
*stream = &stream[30..];
let encoding = XlsEncoding::from_codepage(read_u16(&stream[6..8]))?;
*stream = &stream[8..];
check_variable_record(0x0004, stream)?;
check_variable_record(0x0005, stream)?;
check_variable_record(0x0040, stream)?;
check_variable_record(0x0006, stream)?;
check_variable_record(0x003D, stream)?;
*stream = &stream[32..];
check_variable_record(0x000C, stream)?;
check_variable_record(0x003C, stream)?;
Ok(encoding)
}
fn read_modules(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Module>, VbaError> {
debug!("read all modules metadata");
*stream = &stream[4..];
let module_len = stream.read_u16::<LittleEndian>()? as usize;
*stream = &stream[8..]; let mut modules = Vec::with_capacity(module_len);
for _ in 0..module_len {
let name = check_variable_record(0x0019, stream)?;
let name = encoding.decode_all(name);
check_variable_record(0x0047, stream)?;
let stream_name = check_variable_record(0x001A, stream)?; let stream_name = encoding.decode_all(stream_name);
check_variable_record(0x0032, stream)?; check_variable_record(0x001C, stream)?; check_variable_record(0x0048, stream)?;
check_record(0x0031, stream)?;
*stream = &stream[4..];
let offset = stream.read_u32::<LittleEndian>()? as usize;
check_record(0x001E, stream)?;
*stream = &stream[8..];
check_record(0x002C, stream)?;
*stream = &stream[6..];
match stream.read_u16::<LittleEndian>()? {
0x0021 |
0x0022 => (),
e => return Err(VbaError::Unknown { typ: "module typ", val: e }),
}
loop {
*stream = &stream[4..]; match stream.read_u16::<LittleEndian>() {
Ok(0x0025) | Ok(0x0028) => (),
Ok(0x002B) => break,
Ok(e) => return Err(VbaError::Unknown { typ: "record id", val: e }),
Err(e) => return Err(VbaError::Io(e)),
}
}
*stream = &stream[4..];
modules.push(Module {
name: name,
stream_name: stream_name,
text_offset: offset,
});
}
Ok(modules)
}
fn read_variable_record<'a>(r: &mut &'a [u8], mult: usize) -> Result<&'a [u8], VbaError> {
let len = r.read_u32::<LittleEndian>()? as usize * mult;
let (read, next) = r.split_at(len);
*r = next;
Ok(read)
}
fn check_variable_record<'a>(id: u16, r: &mut &'a [u8]) -> Result<&'a [u8], VbaError> {
check_record(id, r)?;
let record = read_variable_record(r, 1)?;
if log_enabled!(Level::Warn) && record.len() > 100_000 {
warn!(
"record id {} as a suspicious huge length of {} (hex: {:x})",
id,
record.len(),
record.len() as u32
);
}
Ok(record)
}
fn check_record(id: u16, r: &mut &[u8]) -> Result<(), VbaError> {
debug!("check record {:x}", id);
let record_id = r.read_u16::<LittleEndian>()?;
if record_id != id {
Err(VbaError::InvalidRecordId {
expected: id,
found: record_id,
})
} else {
Ok(())
}
}