xlsbye-biff12 0.1.0

BIFF12 binary record parser for XLSB files
Documentation
use xlsbye_core::error::{Result, XlsByeError};
use xlsbye_core::types::{DefinedName, SheetMeta, SheetType, SheetVisibility, WorkbookMeta};
use xlsbye_formula::decompile::decompile_formula;

use super::cursor::RecordCursor;
use super::header::RecordIter;
use super::ids::{BRT_BEGIN_EXTERN_SHEET, BRT_BUNDLE_SH, BRT_DEFINED_NAME, BRT_WB_PROP};

const GLOBAL_DEFINED_NAME_SCOPE: u32 = u32::MAX;

pub fn parse_workbook(data: &[u8]) -> Result<WorkbookMeta> {
    let mut sheets = Vec::new();
    let mut formula_sheet_names = Vec::new();
    let mut defined_names = Vec::new();
    let mut date1904 = false;

    for record in RecordIter::new(data) {
        let (record_type, payload) = record?;
        match record_type {
            ty if ty == BRT_WB_PROP.as_u16() => {
                date1904 = parse_workbook_properties(payload)?;
            }
            ty if ty == BRT_BUNDLE_SH.as_u16() => {
                sheets.push(parse_bundle_sheet(payload)?);
            }
            ty if ty == BRT_BEGIN_EXTERN_SHEET.as_u16() => {
                formula_sheet_names = parse_extern_sheet_names(payload, &sheets);
            }
            ty if ty == BRT_DEFINED_NAME.as_u16() => {
                let names = defined_names
                    .iter()
                    .map(|name: &DefinedName| (name.name.clone(), name.formula.clone()))
                    .collect::<Vec<_>>();
                let sheet_names = if formula_sheet_names.is_empty() {
                    sheets.iter().map(|sheet| sheet.name.clone()).collect::<Vec<_>>()
                } else {
                    formula_sheet_names.clone()
                };
                defined_names.push(parse_defined_name(payload, &sheet_names, &names)?);
            }
            _ => {}
        }
    }

    if formula_sheet_names.is_empty() {
        formula_sheet_names = sheets.iter().map(|sheet| sheet.name.clone()).collect();
    }

    Ok(WorkbookMeta {
        sheets,
        formula_sheet_names,
        defined_names,
        has_vba: false,
        date1904,
    })
}

fn parse_extern_sheet_names(payload: &[u8], sheets: &[SheetMeta]) -> Vec<String> {
    if payload.len() < 4 {
        return Vec::new();
    }

    let count = usize::try_from(u32::from_le_bytes(payload[0..4].try_into().unwrap())).unwrap_or(0);
    let mut names = Vec::with_capacity(count);

    for index in 0..count {
        let offset = 4 + index * 12;
        if offset + 12 > payload.len() {
            break;
        }

        let _sup_book = i32::from_le_bytes(payload[offset..offset + 4].try_into().unwrap());
        let first_tab = i32::from_le_bytes(payload[offset + 4..offset + 8].try_into().unwrap());
        let _last_tab = i32::from_le_bytes(payload[offset + 8..offset + 12].try_into().unwrap());

        let name = usize::try_from(first_tab)
            .ok()
            .and_then(|sheet_index| sheets.get(sheet_index))
            .map(|sheet| sheet.name.clone())
            .unwrap_or_default();
        names.push(name);
    }

    names
}

fn parse_workbook_properties(payload: &[u8]) -> Result<bool> {
    let mut cursor = RecordCursor::new(payload);
    let flags = cursor.read_u32()?;
    Ok(flags & 0x1 != 0)
}

fn parse_bundle_sheet(payload: &[u8]) -> Result<SheetMeta> {
    let mut cursor = RecordCursor::new(payload);
    let hs_state = cursor.read_u32()?;
    let i_tab_id = cursor.read_u32()?;
    let rel_id = cursor.read_wide_string()?;
    let name = cursor.read_wide_string()?;

    let state = match hs_state {
        0 => SheetVisibility::Visible,
        1 => SheetVisibility::Hidden,
        2 => SheetVisibility::VeryHidden,
        value => {
            return Err(XlsByeError::Biff12(format!(
                "invalid sheet visibility value: {value}"
            )))
        }
    };

    Ok(SheetMeta {
        name,
        sheet_id: i_tab_id,
        rel_id,
        state,
        sheet_type: SheetType::Worksheet,
    })
}

fn parse_defined_name(
    payload: &[u8],
    sheets: &[String],
    names: &[(String, String)],
) -> Result<DefinedName> {
    let mut cursor = RecordCursor::new(payload);
    let flags = cursor.read_u32()?;
    let hidden = flags & 0x1 != 0;
    let _keyboard_shortcut = cursor.read_u8()?;
    let itab = cursor.read_u32()?;
    let name = cursor.read_wide_string()?;
    let formula_len = usize::try_from(cursor.read_u32()?)
        .map_err(|_| XlsByeError::Biff12("defined name formula length out of range".to_string()))?;
    let formula_bytes = cursor.read_bytes(formula_len)?;

    let formula = decompile_formula(formula_bytes, sheets, names)
        .map_err(|err| XlsByeError::Formula(format!("defined name '{name}': {err}")))?;

    let sheet_index = if itab == GLOBAL_DEFINED_NAME_SCOPE {
        None
    } else {
        Some(itab)
    };

    Ok(DefinedName {
        name,
        formula,
        sheet_index,
        hidden,
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    fn push_wide_string(buf: &mut Vec<u8>, value: &str) {
        let chars = value.encode_utf16().collect::<Vec<_>>();
        buf.extend_from_slice(&(chars.len() as u32).to_le_bytes());
        for ch in chars {
            buf.extend_from_slice(&ch.to_le_bytes());
        }
    }

    fn encode_varint(mut value: u32) -> Vec<u8> {
        let mut out = Vec::new();
        loop {
            let mut byte = (value & 0x7F) as u8;
            value >>= 7;
            if value != 0 {
                byte |= 0x80;
            }
            out.push(byte);
            if value == 0 {
                break;
            }
        }
        out
    }

    fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
        let mut out = Vec::new();
        out.extend_from_slice(&encode_varint(u32::from(record_type)));
        out.extend_from_slice(&encode_varint(payload.len() as u32));
        out.extend_from_slice(payload);
        out
    }

    #[test]
    fn parses_workbook_properties_sheets_and_defined_names() {
        let wb_prop = encode_record(BRT_WB_PROP.as_u16(), &1u32.to_le_bytes());

        let mut bundle_payload = Vec::new();
        bundle_payload.extend_from_slice(&2u32.to_le_bytes());
        bundle_payload.extend_from_slice(&7u32.to_le_bytes());
        push_wide_string(&mut bundle_payload, "rId5");
        push_wide_string(&mut bundle_payload, "Config");
        let bundle = encode_record(BRT_BUNDLE_SH.as_u16(), &bundle_payload);

        let mut name_payload = Vec::new();
        name_payload.extend_from_slice(&1u32.to_le_bytes());
        name_payload.push(0);
        name_payload.extend_from_slice(&GLOBAL_DEFINED_NAME_SCOPE.to_le_bytes());
        push_wide_string(&mut name_payload, "MyName");
        let formula = [0x1E, 0x01, 0x00];
        name_payload.extend_from_slice(&(formula.len() as u32).to_le_bytes());
        name_payload.extend_from_slice(&formula);
        let defined = encode_record(BRT_DEFINED_NAME.as_u16(), &name_payload);

        let mut data = Vec::new();
        data.extend_from_slice(&wb_prop);
        data.extend_from_slice(&bundle);
        data.extend_from_slice(&defined);

        let meta = parse_workbook(&data).expect("workbook should parse");
        assert!(meta.date1904);
        assert!(!meta.has_vba);
        assert_eq!(meta.sheets.len(), 1);
        assert_eq!(meta.sheets[0].name, "Config");
        assert_eq!(meta.sheets[0].sheet_id, 7);
        assert_eq!(meta.sheets[0].rel_id, "rId5");
        assert_eq!(meta.sheets[0].state, SheetVisibility::VeryHidden);
        assert_eq!(meta.sheets[0].sheet_type, SheetType::Worksheet);
        assert_eq!(meta.formula_sheet_names, vec!["Config".to_string()]);

        assert_eq!(meta.defined_names.len(), 1);
        assert_eq!(meta.defined_names[0].name, "MyName");
        assert_eq!(meta.defined_names[0].formula, "1");
        assert_eq!(meta.defined_names[0].sheet_index, None);
        assert!(meta.defined_names[0].hidden);
    }

    #[test]
    fn parses_extern_sheet_name_mapping() {
        let mut first_sheet = Vec::new();
        first_sheet.extend_from_slice(&0u32.to_le_bytes());
        first_sheet.extend_from_slice(&0u32.to_le_bytes());
        push_wide_string(&mut first_sheet, "rId1");
        push_wide_string(&mut first_sheet, "SheetA");

        let mut second_sheet = Vec::new();
        second_sheet.extend_from_slice(&0u32.to_le_bytes());
        second_sheet.extend_from_slice(&1u32.to_le_bytes());
        push_wide_string(&mut second_sheet, "rId2");
        push_wide_string(&mut second_sheet, "SheetB");

        let mut extern_sheet = Vec::new();
        extern_sheet.extend_from_slice(&2u32.to_le_bytes());
        extern_sheet.extend_from_slice(&0i32.to_le_bytes());
        extern_sheet.extend_from_slice(&1i32.to_le_bytes());
        extern_sheet.extend_from_slice(&1i32.to_le_bytes());
        extern_sheet.extend_from_slice(&0i32.to_le_bytes());
        extern_sheet.extend_from_slice(&0i32.to_le_bytes());
        extern_sheet.extend_from_slice(&0i32.to_le_bytes());

        let mut data = Vec::new();
        data.extend_from_slice(&encode_record(BRT_BUNDLE_SH.as_u16(), &first_sheet));
        data.extend_from_slice(&encode_record(BRT_BUNDLE_SH.as_u16(), &second_sheet));
        data.extend_from_slice(&encode_record(BRT_BEGIN_EXTERN_SHEET.as_u16(), &extern_sheet));

        let meta = parse_workbook(&data).expect("extern sheet mapping should parse");
        assert_eq!(meta.formula_sheet_names, vec!["SheetB".to_string(), "SheetA".to_string()]);
    }

    #[test]
    fn skips_unknown_records() {
        let unknown = encode_record(0x0123, &[1, 2, 3, 4]);
        let meta = parse_workbook(&unknown).expect("unknown records should be skipped");
        assert_eq!(meta.sheets.len(), 0);
        assert_eq!(meta.defined_names.len(), 0);
        assert!(!meta.date1904);
    }

    #[test]
    fn rejects_invalid_sheet_visibility() {
        let mut payload = Vec::new();
        payload.extend_from_slice(&99u32.to_le_bytes());
        payload.extend_from_slice(&1u32.to_le_bytes());
        push_wide_string(&mut payload, "rId1");
        push_wide_string(&mut payload, "Sheet1");

        let data = encode_record(BRT_BUNDLE_SH.as_u16(), &payload);
        let err = parse_workbook(&data).expect_err("invalid visibility should fail");
        assert!(format!("{err}").contains("invalid sheet visibility"));
    }
}