Skip to main content

calamine_styles/
vba.rs

1// SPDX-License-Identifier: MIT
2//
3// Copyright 2016-2025, Johann Tuffe.
4
5//! Parse vbaProject.bin file
6//!
7//! Retranscription from [`OfficeParser`].
8//!
9//! [`OfficeParser`]: https://github.com/unixfreak0037/officeparser/blob/master/officeparser.py
10
11use std::collections::BTreeMap;
12use std::io::Read;
13use std::path::PathBuf;
14
15use byteorder::{LittleEndian, ReadBytesExt};
16use log::{debug, log_enabled, warn, Level};
17
18use crate::cfb::{Cfb, XlsEncoding};
19use crate::utils::read_u16;
20
21/// A VBA specific error enum
22#[derive(Debug)]
23pub enum VbaError {
24    /// Error comes from a cfb parsing
25    Cfb(crate::cfb::CfbError),
26    /// Io error
27    Io(std::io::Error),
28
29    /// Cannot find module
30    ModuleNotFound(String),
31    /// Generic unknown u16 value
32    Unknown {
33        /// error type
34        typ: &'static str,
35        /// value found
36        val: u16,
37    },
38    /// Invalid libid format
39    LibId,
40    /// Invalid record id
41    InvalidRecordId {
42        /// expected record id
43        expected: u16,
44        /// record if found
45        found: u16,
46    },
47}
48
49from_err!(crate::cfb::CfbError, VbaError, Cfb);
50from_err!(std::io::Error, VbaError, Io);
51
52impl std::fmt::Display for VbaError {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        match self {
55            VbaError::Io(e) => write!(f, "I/O error: {e}"),
56            VbaError::Cfb(e) => write!(f, "Cfb error: {e}"),
57
58            VbaError::ModuleNotFound(e) => write!(f, "Cannot find module '{e}'"),
59            VbaError::Unknown { typ, val } => write!(f, "Unknown {typ} '{val:X}'"),
60            VbaError::LibId => write!(f, "Unexpected libid format"),
61            VbaError::InvalidRecordId { expected, found } => write!(
62                f,
63                "Invalid record id: expecting {expected:X} found {found:X}"
64            ),
65        }
66    }
67}
68
69impl std::error::Error for VbaError {
70    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
71        match self {
72            VbaError::Io(e) => Some(e),
73            VbaError::Cfb(e) => Some(e),
74            _ => None,
75        }
76    }
77}
78
79/// A struct for managing VBA reading
80#[derive(Clone, Debug, PartialEq, Eq)]
81pub struct VbaProject {
82    references: Vec<Reference>,
83    modules: BTreeMap<String, Vec<u8>>,
84    encoding: XlsEncoding,
85}
86
87impl VbaProject {
88    /// Create a new `VbaProject` out of the vbaProject.bin `ZipFile` or xls file
89    ///
90    /// Starts reading project metadata (header, directories, sectors and minisectors).
91    pub fn new<R: Read>(r: &mut R, len: usize) -> Result<VbaProject, VbaError> {
92        let mut cfb = Cfb::new(r, len)?;
93        VbaProject::from_cfb(r, &mut cfb)
94    }
95
96    /// Creates a new `VbaProject` out of a Compound File Binary and the corresponding reader
97    pub(crate) fn from_cfb<R: Read>(r: &mut R, cfb: &mut Cfb) -> Result<VbaProject, VbaError> {
98        // dir stream
99        let stream = cfb.get_stream("dir", r)?;
100        let stream = crate::cfb::decompress_stream(&stream)?;
101        let stream = &mut &*stream;
102
103        // read dir information record (not used)
104        let encoding = read_dir_information(stream)?;
105
106        // array of REFERENCE records
107        let refs = Reference::from_stream(stream, &encoding)?;
108
109        // modules
110        let mods: Vec<Module> = read_modules(stream, &encoding)?;
111
112        // read all modules
113        let modules: BTreeMap<String, Vec<u8>> = mods
114            .into_iter()
115            .map(|m| {
116                cfb.get_stream(&m.stream_name, r).and_then(|s| {
117                    crate::cfb::decompress_stream(&s[m.text_offset..]).map(move |s| (m.name, s))
118                })
119            })
120            .collect::<Result<_, _>>()?;
121
122        Ok(VbaProject {
123            references: refs,
124            modules,
125            encoding,
126        })
127    }
128
129    /// Gets the list of `Reference`s
130    pub fn get_references(&self) -> &[Reference] {
131        &self.references
132    }
133
134    /// Gets the list of `Module` names
135    pub fn get_module_names(&self) -> Vec<&str> {
136        self.modules.keys().map(|k| &**k).collect()
137    }
138
139    /// Reads module content and tries to convert to utf8
140    ///
141    /// While it works most of the time, the modules are MBCS encoding and the conversion
142    /// may fail. If this is the case you should revert to `read_module_raw` as there is
143    /// no built in decoding provided in this crate
144    ///
145    /// # Examples
146    /// ```
147    /// use calamine::{Reader, open_workbook, Xlsx};
148    ///
149    /// # let path = format!("{}/tests/vba.xlsm", env!("CARGO_MANIFEST_DIR"));
150    /// let mut xl: Xlsx<_> = open_workbook(path).expect("Cannot find excel file");
151    /// if let Ok(Some(vba)) = xl.vba_project() {
152    ///     let modules = vba.get_module_names().into_iter()
153    ///                      .map(|s| s.to_string()).collect::<Vec<_>>();
154    ///     for m in modules {
155    ///         println!("Module {m}:");
156    ///         println!("{}", vba.get_module(&m)
157    ///                           .unwrap_or_else(|_| panic!("cannot read {m:?} module")));
158    ///     }
159    /// }
160    /// ```
161    pub fn get_module(&self, name: &str) -> Result<String, VbaError> {
162        debug!("read module {name}");
163        let data = self.get_module_raw(name)?;
164        Ok(self.encoding.decode_all(data))
165    }
166
167    /// Reads module content (MBCS encoded) and output it as-is (binary output)
168    pub fn get_module_raw(&self, name: &str) -> Result<&[u8], VbaError> {
169        match self.modules.get(name) {
170            Some(m) => Ok(&**m),
171            None => Err(VbaError::ModuleNotFound(name.into())),
172        }
173    }
174}
175
176/// A vba reference
177#[derive(Debug, Clone, Hash, Eq, PartialEq)]
178pub struct Reference {
179    /// name
180    pub name: String,
181    /// description
182    pub description: String,
183    /// location of the reference
184    pub path: PathBuf,
185}
186
187impl Reference {
188    /// Check if the reference location is accessible
189    pub fn is_missing(&self) -> bool {
190        !self.path.exists()
191    }
192
193    /// Gets the list of references from the `dir_stream` relevant part
194    fn from_stream(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Reference>, VbaError> {
195        debug!("read all references metadata");
196
197        let mut references = Vec::new();
198        let mut reference = Reference {
199            name: "".to_string(),
200            description: "".to_string(),
201            path: "".into(),
202        };
203
204        loop {
205            let check = stream.read_u16::<LittleEndian>();
206            match check? {
207                0x000F => {
208                    // termination of references array
209                    if !reference.name.is_empty() {
210                        references.push(reference);
211                    }
212                    break;
213                }
214                0x0016 => {
215                    // REFERENCENAME
216                    if !reference.name.is_empty() {
217                        references.push(reference);
218                    }
219                    let name = read_variable_record(stream, 1)?;
220                    let name = encoding.decode_all(name);
221                    reference = Reference {
222                        name: name.clone(),
223                        description: name,
224                        path: "".into(),
225                    };
226                    check_variable_record(0x003E, stream)?; // unicode
227                }
228                0x0033 => {
229                    // REFERENCEORIGINAL (followed by REFERENCECONTROL)
230                    reference.set_libid(stream, encoding)?;
231                }
232                0x002F => {
233                    // REFERENCECONTROL
234                    *stream = &stream[4..]; // SizeTwiddled: len of total ref control
235                    reference.set_libid(stream, encoding)?;
236
237                    *stream = &stream[6..];
238                    match stream.read_u16::<LittleEndian>()? {
239                        0x0016 => {
240                            // optional name record extended
241                            read_variable_record(stream, 1)?; // name extended
242                            check_variable_record(0x003E, stream)?; // name extended unicode
243                            check_record(0x0030, stream)?;
244                        }
245                        0x0030 => (),
246                        e => {
247                            return Err(VbaError::Unknown {
248                                typ: "token in reference control",
249                                val: e,
250                            });
251                        }
252                    }
253                    *stream = &stream[4..];
254                    reference.set_libid(stream, encoding)?;
255                    *stream = &stream[26..];
256                }
257                0x000D => {
258                    // REFERENCEREGISTERED
259                    *stream = &stream[4..];
260                    reference.set_libid(stream, encoding)?;
261                    *stream = &stream[6..];
262                }
263                0x000E => {
264                    // REFERENCEPROJECT
265                    *stream = &stream[4..];
266                    let absolute = read_variable_record(stream, 1)?; // project libid absolute
267                    {
268                        let absolute = encoding.decode_all(absolute);
269                        reference.path = if let Some(stripped) = absolute.strip_prefix("*\\C") {
270                            stripped.into()
271                        } else {
272                            absolute.into()
273                        };
274                    }
275                    read_variable_record(stream, 1)?; // project libid relative
276                    *stream = &stream[6..];
277                }
278                c => {
279                    return Err(VbaError::Unknown {
280                        typ: "check id",
281                        val: c,
282                    });
283                }
284            }
285        }
286
287        debug!("references: {references:#?}");
288        Ok(references)
289    }
290
291    fn set_libid(&mut self, stream: &mut &[u8], encoding: &XlsEncoding) -> Result<(), VbaError> {
292        let libid = read_variable_record(stream, 1)?; //libid twiddled
293        if libid.is_empty() || libid.ends_with(b"##") {
294            return Ok(());
295        }
296        let libid = encoding.decode_all(libid);
297        let mut parts = libid.rsplit('#');
298        match (parts.next(), parts.next()) {
299            (Some(desc), Some(path)) => {
300                self.description = desc.into();
301                // use original path if already set
302                if !path.is_empty() && self.path.as_os_str().is_empty() {
303                    self.path = path.into();
304                }
305                Ok(())
306            }
307            _ => Err(VbaError::LibId),
308        }
309    }
310}
311
312/// A vba module
313#[derive(Debug, Clone, Default)]
314struct Module {
315    /// module name as it appears in vba project
316    name: String,
317    stream_name: String,
318    text_offset: usize,
319}
320
321fn read_dir_information(stream: &mut &[u8]) -> Result<XlsEncoding, VbaError> {
322    debug!("read dir header");
323
324    // PROJECTSYSKIND
325    *stream = &stream[10..];
326
327    // PROJECTCOMPATVERSION (optional)
328    if read_u16(&stream[0..2]) == 0x004A {
329        *stream = &stream[10..];
330    }
331
332    // PROJECTLCID and PROJECTLCIDINVOKE Records
333    *stream = &stream[20..];
334
335    // PROJECT Codepage
336    let encoding = XlsEncoding::from_codepage(read_u16(&stream[6..8]))?;
337    *stream = &stream[8..];
338
339    // PROJECTNAME Record
340    check_variable_record(0x0004, stream)?;
341
342    // PROJECTDOCSTRING Record
343    check_variable_record(0x0005, stream)?;
344    check_variable_record(0x0040, stream)?; // unicode
345
346    // PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
347    check_variable_record(0x0006, stream)?;
348    check_variable_record(0x003D, stream)?;
349
350    // PROJECTHELPCONTEXT PROJECTLIBFLAGS and PROJECTVERSION Records
351    *stream = &stream[32..];
352
353    // PROJECTCONSTANTS Record
354    check_variable_record(0x000C, stream)?;
355    check_variable_record(0x003C, stream)?; // unicode
356
357    Ok(encoding)
358}
359
360fn read_modules(stream: &mut &[u8], encoding: &XlsEncoding) -> Result<Vec<Module>, VbaError> {
361    debug!("read all modules metadata");
362    *stream = &stream[4..];
363
364    let module_len = stream.read_u16::<LittleEndian>()? as usize;
365
366    *stream = &stream[8..]; // PROJECTCOOKIE record
367    let mut modules = Vec::with_capacity(module_len);
368
369    for _ in 0..module_len {
370        // name
371        let name = check_variable_record(0x0019, stream)?;
372        let name = encoding.decode_all(name);
373
374        check_variable_record(0x0047, stream)?; // unicode
375
376        let stream_name = check_variable_record(0x001A, stream)?; // stream name
377        let stream_name = encoding.decode_all(stream_name);
378
379        check_variable_record(0x0032, stream)?; // stream name unicode
380        check_variable_record(0x001C, stream)?; // doc string
381        check_variable_record(0x0048, stream)?; // doc string unicode
382
383        // offset
384        check_record(0x0031, stream)?;
385        *stream = &stream[4..];
386        let offset = stream.read_u32::<LittleEndian>()? as usize;
387
388        // help context
389        check_record(0x001E, stream)?;
390        *stream = &stream[8..];
391
392        // cookie
393        check_record(0x002C, stream)?;
394        *stream = &stream[6..];
395
396        match stream.read_u16::<LittleEndian>()? {
397            0x0021 /* procedural module */ |
398            0x0022 /* document, class or designer module */ => (),
399            e => return Err(VbaError::Unknown { typ: "module typ", val: e }),
400        }
401
402        loop {
403            *stream = &stream[4..]; // reserved
404            match stream.read_u16::<LittleEndian>() {
405                Ok(0x0025 /* readonly */ | 0x0028 /* private */) => (),
406                Ok(0x002B) => break,
407                Ok(e) => return Err(VbaError::Unknown { typ: "record id", val: e }),
408                Err(e) => return Err(VbaError::Io(e)),
409            }
410        }
411        *stream = &stream[4..]; // reserved
412
413        modules.push(Module {
414            name,
415            stream_name,
416            text_offset: offset,
417        });
418    }
419
420    Ok(modules)
421}
422
423/// Reads a variable length record
424///
425/// `mult` is a multiplier of the length (e.g 2 when parsing `XLWideString`)
426fn read_variable_record<'a>(r: &mut &'a [u8], mult: usize) -> Result<&'a [u8], VbaError> {
427    let len = r.read_u32::<LittleEndian>()? as usize * mult;
428    let (read, next) = r.split_at(len);
429    *r = next;
430    Ok(read)
431}
432
433/// Check that next record matches `id` and returns a variable length record
434fn check_variable_record<'a>(id: u16, r: &mut &'a [u8]) -> Result<&'a [u8], VbaError> {
435    check_record(id, r)?;
436    let record = read_variable_record(r, 1)?;
437    if log_enabled!(Level::Warn) && record.len() > 100_000 {
438        warn!(
439            "record id {} as a suspicious huge length of {} (hex: {:x})",
440            id,
441            record.len(),
442            record.len() as u32
443        );
444    }
445    Ok(record)
446}
447
448/// Check that next record matches `id`
449fn check_record(id: u16, r: &mut &[u8]) -> Result<(), VbaError> {
450    debug!("check record {id:x}");
451    let record_id = r.read_u16::<LittleEndian>()?;
452    if record_id == id {
453        Ok(())
454    } else {
455        Err(VbaError::InvalidRecordId {
456            expected: id,
457            found: record_id,
458        })
459    }
460}