xlsbye-biff12 0.1.0

BIFF12 binary record parser for XLSB files
Documentation
use xlsbye_core::error::{Result, XlsByeError};
use xlsbye_core::types::{ParsedTable, RangeRef, TableColumn};

use crate::record::cursor::RecordCursor;
use crate::record::header::RecordIter;
use crate::record::ids::{BRT_BEGIN_LIST, BRT_BEGIN_LIST_COL, BRT_END_LIST};

const BRT_BEGIN_LIST_ALT: u16 = 0x0191;
const BRT_END_LIST_ALT: u16 = 0x0192;
const BRT_LIST_COLUMN_ALT: u16 = 0x019B;
const BRT_BEGIN_AFILTER: u16 = 0x00A1;

pub fn parse_table(data: &[u8]) -> Result<ParsedTable> {
    let mut table: Option<ParsedTable> = None;

    for record in RecordIter::new(data) {
        let (record_type, payload) = record?;
        if is_begin_list(record_type) {
            table = Some(parse_begin_list(payload)?);
            continue;
        }

        if is_end_list(record_type) {
            break;
        }

        if let Some(current) = table.as_mut() {
            if is_list_column(record_type) {
                current.columns.push(parse_list_column(payload)?);
            } else if record_type == BRT_BEGIN_AFILTER {
                current.has_auto_filter = true;
            }
        }
    }

    let mut table = table
        .ok_or_else(|| XlsByeError::Biff12("missing table begin record in table*.bin".to_string()))?;

    if table.name.is_empty() {
        table.name = format!("Table{}", table.id);
    }
    if table.display_name.is_empty() {
        table.display_name = table.name.clone();
    }

    if table.columns.is_empty() {
        let count = table
            .range
            .last_col
            .saturating_sub(table.range.first_col)
            .saturating_add(1);
        for index in 0..count {
            let id = index.saturating_add(1);
            table.columns.push(TableColumn {
                id,
                name: format!("Column{id}"),
            });
        }
    }

    Ok(table)
}

fn parse_begin_list(payload: &[u8]) -> Result<ParsedTable> {
    let mut cursor = RecordCursor::new(payload);

    let range = RangeRef {
        first_row: one_based(cursor.read_u32()?, "table first row")?,
        last_row: one_based(cursor.read_u32()?, "table last row")?,
        first_col: one_based(cursor.read_u32()?, "table first column")?,
        last_col: one_based(cursor.read_u32()?, "table last column")?,
    };

    let id = if cursor.remaining() >= 4 {
        cursor.read_u32()?
    } else {
        1
    };
    let header_row_count = if cursor.remaining() >= 4 {
        cursor.read_u32()?
    } else {
        1
    };
    let totals_row_count = if cursor.remaining() >= 4 {
        cursor.read_u32()?
    } else {
        0
    };

    let mut strings = Vec::new();
    while !cursor.is_empty() {
        let before = cursor.remaining();
        match read_string_with_resync(&mut cursor)? {
            Some(value) => strings.push(value),
            None => {
                if cursor.remaining() == before {
                    break;
                }
            }
        }
    }

    let name = strings.first().cloned().unwrap_or_default();
    let display_name = strings.get(1).cloned().unwrap_or_else(|| name.clone());
    let style_name = strings.get(2).cloned().filter(|value| !value.is_empty());

    Ok(ParsedTable {
        id,
        name,
        display_name,
        range,
        columns: Vec::new(),
        has_auto_filter: false,
        style_name,
        header_row_count,
        totals_row_count,
    })
}

fn parse_list_column(payload: &[u8]) -> Result<TableColumn> {
    let mut cursor = RecordCursor::new(payload);
    let id = cursor.read_u32()?;

    let mut name = String::new();
    while !cursor.is_empty() {
        let before = cursor.remaining();
        if let Some(value) = read_string_with_resync(&mut cursor)? {
            name = value;
            break;
        }
        if cursor.remaining() == before {
            break;
        }
    }

    if name.is_empty() {
        name = format!("Column{id}");
    }

    Ok(TableColumn { id, name })
}

fn read_string_with_resync(cursor: &mut RecordCursor<'_>) -> Result<Option<String>> {
    if cursor.is_empty() {
        return Ok(None);
    }

    let checkpoint = *cursor;
    match cursor.read_wide_string() {
        Ok(value) => Ok(Some(value)),
        Err(_) => {
            *cursor = checkpoint;
            if cursor.remaining() < 4 {
                return Ok(None);
            }
            cursor.skip(4)?;
            Ok(None)
        }
    }
}

fn is_begin_list(record_type: u16) -> bool {
    record_type == BRT_BEGIN_LIST.as_u16() || record_type == BRT_BEGIN_LIST_ALT
}

fn is_end_list(record_type: u16) -> bool {
    record_type == BRT_END_LIST.as_u16() || record_type == BRT_END_LIST_ALT
}

fn is_list_column(record_type: u16) -> bool {
    record_type == BRT_BEGIN_LIST_COL.as_u16() || record_type == BRT_LIST_COLUMN_ALT
}

fn one_based(value: u32, field: &str) -> Result<u32> {
    value.checked_add(1).ok_or_else(|| {
        XlsByeError::Biff12(format!(
            "{field} value {value} overflows when converting to 1-based index"
        ))
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    fn encode_varint(mut value: u32) -> Vec<u8> {
        let mut out = Vec::new();
        loop {
            let mut byte = (value & 0x7F) as u8;
            value >>= 7;
            if value != 0 {
                byte |= 0x80;
            }
            out.push(byte);
            if value == 0 {
                break;
            }
        }
        out
    }

    fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
        let mut out = Vec::new();
        out.extend_from_slice(&encode_varint(u32::from(record_type)));
        out.extend_from_slice(&encode_varint(payload.len() as u32));
        out.extend_from_slice(payload);
        out
    }

    fn encode_wide_string(value: &str) -> Vec<u8> {
        let utf16 = value.encode_utf16().collect::<Vec<_>>();
        let mut out = Vec::new();
        out.extend_from_slice(&(utf16.len() as u32).to_le_bytes());
        for unit in utf16 {
            out.extend_from_slice(&unit.to_le_bytes());
        }
        out
    }

    #[test]
    fn parses_table_with_columns_autofilter_and_style() {
        let mut begin = Vec::new();
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&9u32.to_le_bytes());
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&3u32.to_le_bytes());
        begin.extend_from_slice(&1u32.to_le_bytes());
        begin.extend_from_slice(&1u32.to_le_bytes());
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&encode_wide_string("SalesTable"));
        begin.extend_from_slice(&encode_wide_string("SalesTable"));
        begin.extend_from_slice(&encode_wide_string("TableStyleMedium2"));

        let mut col1 = Vec::new();
        col1.extend_from_slice(&1u32.to_le_bytes());
        col1.extend_from_slice(&encode_wide_string("Region"));

        let mut col2 = Vec::new();
        col2.extend_from_slice(&2u32.to_le_bytes());
        col2.extend_from_slice(&encode_wide_string("Amount"));

        let mut data = Vec::new();
        data.extend_from_slice(&encode_record(BRT_BEGIN_LIST_ALT, &begin));
        data.extend_from_slice(&encode_record(BRT_BEGIN_AFILTER, &[]));
        data.extend_from_slice(&encode_record(BRT_LIST_COLUMN_ALT, &col1));
        data.extend_from_slice(&encode_record(BRT_LIST_COLUMN_ALT, &col2));
        data.extend_from_slice(&encode_record(BRT_END_LIST_ALT, &[]));

        let table = parse_table(&data).expect("table should parse");
        assert_eq!(table.id, 1);
        assert_eq!(table.name, "SalesTable");
        assert_eq!(table.display_name, "SalesTable");
        assert_eq!(
            table.range,
            RangeRef {
                first_row: 1,
                last_row: 10,
                first_col: 1,
                last_col: 4,
            }
        );
        assert!(table.has_auto_filter);
        assert_eq!(table.style_name, Some("TableStyleMedium2".to_string()));
        assert_eq!(table.header_row_count, 1);
        assert_eq!(table.totals_row_count, 0);
        assert_eq!(table.columns.len(), 2);
        assert_eq!(table.columns[0].name, "Region");
        assert_eq!(table.columns[1].name, "Amount");
    }

    #[test]
    fn synthesizes_columns_when_list_columns_missing() {
        let mut begin = Vec::new();
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&2u32.to_le_bytes());
        begin.extend_from_slice(&7u32.to_le_bytes());
        begin.extend_from_slice(&1u32.to_le_bytes());
        begin.extend_from_slice(&0u32.to_le_bytes());
        begin.extend_from_slice(&encode_wide_string(""));

        let mut data = Vec::new();
        data.extend_from_slice(&encode_record(BRT_BEGIN_LIST.as_u16(), &begin));
        data.extend_from_slice(&encode_record(BRT_END_LIST.as_u16(), &[]));

        let table = parse_table(&data).expect("table should parse");
        assert_eq!(table.name, "Table7");
        assert_eq!(table.display_name, "Table7");
        assert_eq!(table.columns.len(), 3);
        assert_eq!(table.columns[0].name, "Column1");
        assert_eq!(table.columns[2].id, 3);
    }
}