1use std::collections::BTreeMap;
12use std::io::Read;
13use std::path::PathBuf;
14
15use byteorder::{LittleEndian, ReadBytesExt};
16use log::{debug, log_enabled, warn, Level};
17
18use crate::cfb::{Cfb, XlsEncoding};
19use crate::utils::read_u16;
20
21#[derive(Debug)]
23pub enum VbaError {
24 Cfb(crate::cfb::CfbError),
26 Io(std::io::Error),
28
29 ModuleNotFound(String),
31 Unknown {
33 typ: &'static str,
35 val: u16,
37 },
38 LibId,
40 InvalidRecordId {
42 expected: u16,
44 found: u16,
46 },
47}
48
49from_err!(crate::cfb::CfbError, VbaError, Cfb);
50from_err!(std::io::Error, VbaError, Io);
51
52impl std::fmt::Display for VbaError {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 VbaError::Io(e) => write!(f, "I/O error: {e}"),
56 VbaError::Cfb(e) => write!(f, "Cfb error: {e}"),
57
58 VbaError::ModuleNotFound(e) => write!(f, "Cannot find module '{e}'"),
59 VbaError::Unknown { typ, val } => write!(f, "Unknown {typ} '{val:X}'"),
60 VbaError::LibId => write!(f, "Unexpected libid format"),
61 VbaError::InvalidRecordId { expected, found } => write!(
62 f,
63 "Invalid record id: expecting {expected:X} found {found:X}"
64 ),
65 }
66 }
67}
68
69impl std::error::Error for VbaError {
70 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
71 match self {
72 VbaError::Io(e) => Some(e),
73 VbaError::Cfb(e) => Some(e),
74 _ => None,
75 }
76 }
77}
78
79#[derive(Clone, Debug, PartialEq, Eq)]
81pub struct VbaProject {
82 references: Vec<Reference>,
83 modules: BTreeMap<String, Vec<u8>>,
84 encoding: XlsEncoding,
85}
86
87impl VbaProject {
88 pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject, VbaError> {
92 let mut cfb = Cfb::new(r, len)?;
93 VbaProject::from_cfb(r, &mut cfb)
94 }
95
96 pub(crate) fn from_cfb<R: Read>(r: &mut R, cfb: &mut Cfb) -> Result<VbaProject, VbaError> {
98 let stream = cfb.get_stream("dir", r)?;
100 let stream = crate::cfb::decompress_stream(&stream)?;
101 let stream = &mut &*stream;
102
103 let encoding = read_dir_information(stream)?;
105
106 let refs = Reference::from_stream(stream, &encoding)?;
108
109 let mods: Vec<Module> = read_modules(stream, &encoding)?;
111
112 let modules: BTreeMap<String, Vec<u8>> = mods
114 .into_iter()
115 .map(|m| {
116 cfb.get_stream(&m.stream_name, r).and_then(|s| {
117 crate::cfb::decompress_stream(&s[m.text_offset..]).map(move |s| (m.name, s))
118 })
119 })
120 .collect::<Result<_, _>>()?;
121
122 Ok(VbaProject {
123 references: refs,
124 modules,
125 encoding,
126 })
127 }
128
129 pub fn get_references(&self) -> &[Reference] {
131 &self.references
132 }
133
134 pub fn get_module_names(&self) -> Vec<&str> {
136 self.modules.keys().map(|k| &**k).collect()
137 }
138
139 pub fn get_module(&self, name: &str) -> Result<String, VbaError> {
162 debug!("read module {name}");
163 let data = self.get_module_raw(name)?;
164 Ok(self.encoding.decode_all(data))
165 }
166
167 pub fn get_module_raw(&self, name: &str) -> Result<&[u8], VbaError> {
169 match self.modules.get(name) {
170 Some(m) => Ok(&**m),
171 None => Err(VbaError::ModuleNotFound(name.into())),
172 }
173 }
174}
175
176#[derive(Debug, Clone, Hash, Eq, PartialEq)]
178pub struct Reference {
179 pub name: String,
181 pub description: String,
183 pub path: PathBuf,
185}
186
187impl Reference {
188 pub fn is_missing(&self) -> bool {
190 !self.path.exists()
191 }
192
193 fn from_stream(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Reference>, VbaError> {
195 debug!("read all references metadata");
196
197 let mut references = Vec::new();
198 let mut reference = Reference {
199 name: "".to_string(),
200 description: "".to_string(),
201 path: "".into(),
202 };
203
204 loop {
205 let check = stream.read_u16::<LittleEndian>();
206 match check? {
207 0x000F => {
208 if !reference.name.is_empty() {
210 references.push(reference);
211 }
212 break;
213 }
214 0x0016 => {
215 if !reference.name.is_empty() {
217 references.push(reference);
218 }
219 let name = read_variable_record(stream, 1)?;
220 let name = encoding.decode_all(name);
221 reference = Reference {
222 name: name.clone(),
223 description: name,
224 path: "".into(),
225 };
226 check_variable_record(0x003E, stream)?; }
228 0x0033 => {
229 reference.set_libid(stream, encoding)?;
231 }
232 0x002F => {
233 *stream = &stream[4..]; reference.set_libid(stream, encoding)?;
236
237 *stream = &stream[6..];
238 match stream.read_u16::<LittleEndian>()? {
239 0x0016 => {
240 read_variable_record(stream, 1)?; check_variable_record(0x003E, stream)?; check_record(0x0030, stream)?;
244 }
245 0x0030 => (),
246 e => {
247 return Err(VbaError::Unknown {
248 typ: "token in reference control",
249 val: e,
250 });
251 }
252 }
253 *stream = &stream[4..];
254 reference.set_libid(stream, encoding)?;
255 *stream = &stream[26..];
256 }
257 0x000D => {
258 *stream = &stream[4..];
260 reference.set_libid(stream, encoding)?;
261 *stream = &stream[6..];
262 }
263 0x000E => {
264 *stream = &stream[4..];
266 let absolute = read_variable_record(stream, 1)?; {
268 let absolute = encoding.decode_all(absolute);
269 reference.path = if let Some(stripped) = absolute.strip_prefix("*\\C") {
270 stripped.into()
271 } else {
272 absolute.into()
273 };
274 }
275 read_variable_record(stream, 1)?; *stream = &stream[6..];
277 }
278 c => {
279 return Err(VbaError::Unknown {
280 typ: "check id",
281 val: c,
282 });
283 }
284 }
285 }
286
287 debug!("references: {references:#?}");
288 Ok(references)
289 }
290
291 fn set_libid(&mut self, stream: &mut &[u8], encoding: &XlsEncoding) -> Result<(), VbaError> {
292 let libid = read_variable_record(stream, 1)?; if libid.is_empty() || libid.ends_with(b"##") {
294 return Ok(());
295 }
296 let libid = encoding.decode_all(libid);
297 let mut parts = libid.rsplit('#');
298 match (parts.next(), parts.next()) {
299 (Some(desc), Some(path)) => {
300 self.description = desc.into();
301 if !path.is_empty() && self.path.as_os_str().is_empty() {
303 self.path = path.into();
304 }
305 Ok(())
306 }
307 _ => Err(VbaError::LibId),
308 }
309 }
310}
311
312#[derive(Debug, Clone, Default)]
314struct Module {
315 name: String,
317 stream_name: String,
318 text_offset: usize,
319}
320
321fn read_dir_information(stream: &mut &[u8]) -> Result<XlsEncoding, VbaError> {
322 debug!("read dir header");
323
324 *stream = &stream[10..];
326
327 if read_u16(&stream[0..2]) == 0x004A {
329 *stream = &stream[10..];
330 }
331
332 *stream = &stream[20..];
334
335 let encoding = XlsEncoding::from_codepage(read_u16(&stream[6..8]))?;
337 *stream = &stream[8..];
338
339 check_variable_record(0x0004, stream)?;
341
342 check_variable_record(0x0005, stream)?;
344 check_variable_record(0x0040, stream)?; check_variable_record(0x0006, stream)?;
348 check_variable_record(0x003D, stream)?;
349
350 *stream = &stream[32..];
352
353 check_variable_record(0x000C, stream)?;
355 check_variable_record(0x003C, stream)?; Ok(encoding)
358}
359
360fn read_modules(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Module>, VbaError> {
361 debug!("read all modules metadata");
362 *stream = &stream[4..];
363
364 let module_len = stream.read_u16::<LittleEndian>()? as usize;
365
366 *stream = &stream[8..]; let mut modules = Vec::with_capacity(module_len);
368
369 for _ in 0..module_len {
370 let name = check_variable_record(0x0019, stream)?;
372 let name = encoding.decode_all(name);
373
374 check_variable_record(0x0047, stream)?; let stream_name = check_variable_record(0x001A, stream)?; let stream_name = encoding.decode_all(stream_name);
378
379 check_variable_record(0x0032, stream)?; check_variable_record(0x001C, stream)?; check_variable_record(0x0048, stream)?; check_record(0x0031, stream)?;
385 *stream = &stream[4..];
386 let offset = stream.read_u32::<LittleEndian>()? as usize;
387
388 check_record(0x001E, stream)?;
390 *stream = &stream[8..];
391
392 check_record(0x002C, stream)?;
394 *stream = &stream[6..];
395
396 match stream.read_u16::<LittleEndian>()? {
397 0x0021 |
398 0x0022 => (),
399 e => return Err(VbaError::Unknown { typ: "module typ", val: e }),
400 }
401
402 loop {
403 *stream = &stream[4..]; match stream.read_u16::<LittleEndian>() {
405 Ok(0x0025 | 0x0028 ) => (),
406 Ok(0x002B) => break,
407 Ok(e) => return Err(VbaError::Unknown { typ: "record id", val: e }),
408 Err(e) => return Err(VbaError::Io(e)),
409 }
410 }
411 *stream = &stream[4..]; modules.push(Module {
414 name,
415 stream_name,
416 text_offset: offset,
417 });
418 }
419
420 Ok(modules)
421}
422
423fn read_variable_record<'a>(r: &mut &'a [u8], mult: usize) -> Result<&'a [u8], VbaError> {
427 let len = r.read_u32::<LittleEndian>()? as usize * mult;
428 let (read, next) = r.split_at(len);
429 *r = next;
430 Ok(read)
431}
432
433fn check_variable_record<'a>(id: u16, r: &mut &'a [u8]) -> Result<&'a [u8], VbaError> {
435 check_record(id, r)?;
436 let record = read_variable_record(r, 1)?;
437 if log_enabled!(Level::Warn) && record.len() > 100_000 {
438 warn!(
439 "record id {} as a suspicious huge length of {} (hex: {:x})",
440 id,
441 record.len(),
442 record.len() as u32
443 );
444 }
445 Ok(record)
446}
447
448fn check_record(id: u16, r: &mut &[u8]) -> Result<(), VbaError> {
450 debug!("check record {id:x}");
451 let record_id = r.read_u16::<LittleEndian>()?;
452 if record_id == id {
453 Ok(())
454 } else {
455 Err(VbaError::InvalidRecordId {
456 expected: id,
457 found: record_id,
458 })
459 }
460}