use xlsbye_core::error::{Result, XlsByeError};
use xlsbye_core::types::{DefinedName, SheetMeta, SheetType, SheetVisibility, WorkbookMeta};
use xlsbye_formula::decompile::decompile_formula;
use super::cursor::RecordCursor;
use super::header::RecordIter;
use super::ids::{BRT_BEGIN_EXTERN_SHEET, BRT_BUNDLE_SH, BRT_DEFINED_NAME, BRT_WB_PROP};
const GLOBAL_DEFINED_NAME_SCOPE: u32 = u32::MAX;
pub fn parse_workbook(data: &[u8]) -> Result<WorkbookMeta> {
let mut sheets = Vec::new();
let mut formula_sheet_names = Vec::new();
let mut defined_names = Vec::new();
let mut date1904 = false;
for record in RecordIter::new(data) {
let (record_type, payload) = record?;
match record_type {
ty if ty == BRT_WB_PROP.as_u16() => {
date1904 = parse_workbook_properties(payload)?;
}
ty if ty == BRT_BUNDLE_SH.as_u16() => {
sheets.push(parse_bundle_sheet(payload)?);
}
ty if ty == BRT_BEGIN_EXTERN_SHEET.as_u16() => {
formula_sheet_names = parse_extern_sheet_names(payload, &sheets);
}
ty if ty == BRT_DEFINED_NAME.as_u16() => {
let names = defined_names
.iter()
.map(|name: &DefinedName| (name.name.clone(), name.formula.clone()))
.collect::<Vec<_>>();
let sheet_names = if formula_sheet_names.is_empty() {
sheets.iter().map(|sheet| sheet.name.clone()).collect::<Vec<_>>()
} else {
formula_sheet_names.clone()
};
defined_names.push(parse_defined_name(payload, &sheet_names, &names)?);
}
_ => {}
}
}
if formula_sheet_names.is_empty() {
formula_sheet_names = sheets.iter().map(|sheet| sheet.name.clone()).collect();
}
Ok(WorkbookMeta {
sheets,
formula_sheet_names,
defined_names,
has_vba: false,
date1904,
})
}
fn parse_extern_sheet_names(payload: &[u8], sheets: &[SheetMeta]) -> Vec<String> {
if payload.len() < 4 {
return Vec::new();
}
let count = usize::try_from(u32::from_le_bytes(payload[0..4].try_into().unwrap())).unwrap_or(0);
let mut names = Vec::with_capacity(count);
for index in 0..count {
let offset = 4 + index * 12;
if offset + 12 > payload.len() {
break;
}
let _sup_book = i32::from_le_bytes(payload[offset..offset + 4].try_into().unwrap());
let first_tab = i32::from_le_bytes(payload[offset + 4..offset + 8].try_into().unwrap());
let _last_tab = i32::from_le_bytes(payload[offset + 8..offset + 12].try_into().unwrap());
let name = usize::try_from(first_tab)
.ok()
.and_then(|sheet_index| sheets.get(sheet_index))
.map(|sheet| sheet.name.clone())
.unwrap_or_default();
names.push(name);
}
names
}
fn parse_workbook_properties(payload: &[u8]) -> Result<bool> {
let mut cursor = RecordCursor::new(payload);
let flags = cursor.read_u32()?;
Ok(flags & 0x1 != 0)
}
fn parse_bundle_sheet(payload: &[u8]) -> Result<SheetMeta> {
let mut cursor = RecordCursor::new(payload);
let hs_state = cursor.read_u32()?;
let i_tab_id = cursor.read_u32()?;
let rel_id = cursor.read_wide_string()?;
let name = cursor.read_wide_string()?;
let state = match hs_state {
0 => SheetVisibility::Visible,
1 => SheetVisibility::Hidden,
2 => SheetVisibility::VeryHidden,
value => {
return Err(XlsByeError::Biff12(format!(
"invalid sheet visibility value: {value}"
)))
}
};
Ok(SheetMeta {
name,
sheet_id: i_tab_id,
rel_id,
state,
sheet_type: SheetType::Worksheet,
})
}
fn parse_defined_name(
payload: &[u8],
sheets: &[String],
names: &[(String, String)],
) -> Result<DefinedName> {
let mut cursor = RecordCursor::new(payload);
let flags = cursor.read_u32()?;
let hidden = flags & 0x1 != 0;
let _keyboard_shortcut = cursor.read_u8()?;
let itab = cursor.read_u32()?;
let name = cursor.read_wide_string()?;
let formula_len = usize::try_from(cursor.read_u32()?)
.map_err(|_| XlsByeError::Biff12("defined name formula length out of range".to_string()))?;
let formula_bytes = cursor.read_bytes(formula_len)?;
let formula = decompile_formula(formula_bytes, sheets, names)
.map_err(|err| XlsByeError::Formula(format!("defined name '{name}': {err}")))?;
let sheet_index = if itab == GLOBAL_DEFINED_NAME_SCOPE {
None
} else {
Some(itab)
};
Ok(DefinedName {
name,
formula,
sheet_index,
hidden,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn push_wide_string(buf: &mut Vec<u8>, value: &str) {
let chars = value.encode_utf16().collect::<Vec<_>>();
buf.extend_from_slice(&(chars.len() as u32).to_le_bytes());
for ch in chars {
buf.extend_from_slice(&ch.to_le_bytes());
}
}
fn encode_varint(mut value: u32) -> Vec<u8> {
let mut out = Vec::new();
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}
out.push(byte);
if value == 0 {
break;
}
}
out
}
fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(&encode_varint(u32::from(record_type)));
out.extend_from_slice(&encode_varint(payload.len() as u32));
out.extend_from_slice(payload);
out
}
#[test]
fn parses_workbook_properties_sheets_and_defined_names() {
let wb_prop = encode_record(BRT_WB_PROP.as_u16(), &1u32.to_le_bytes());
let mut bundle_payload = Vec::new();
bundle_payload.extend_from_slice(&2u32.to_le_bytes());
bundle_payload.extend_from_slice(&7u32.to_le_bytes());
push_wide_string(&mut bundle_payload, "rId5");
push_wide_string(&mut bundle_payload, "Config");
let bundle = encode_record(BRT_BUNDLE_SH.as_u16(), &bundle_payload);
let mut name_payload = Vec::new();
name_payload.extend_from_slice(&1u32.to_le_bytes());
name_payload.push(0);
name_payload.extend_from_slice(&GLOBAL_DEFINED_NAME_SCOPE.to_le_bytes());
push_wide_string(&mut name_payload, "MyName");
let formula = [0x1E, 0x01, 0x00];
name_payload.extend_from_slice(&(formula.len() as u32).to_le_bytes());
name_payload.extend_from_slice(&formula);
let defined = encode_record(BRT_DEFINED_NAME.as_u16(), &name_payload);
let mut data = Vec::new();
data.extend_from_slice(&wb_prop);
data.extend_from_slice(&bundle);
data.extend_from_slice(&defined);
let meta = parse_workbook(&data).expect("workbook should parse");
assert!(meta.date1904);
assert!(!meta.has_vba);
assert_eq!(meta.sheets.len(), 1);
assert_eq!(meta.sheets[0].name, "Config");
assert_eq!(meta.sheets[0].sheet_id, 7);
assert_eq!(meta.sheets[0].rel_id, "rId5");
assert_eq!(meta.sheets[0].state, SheetVisibility::VeryHidden);
assert_eq!(meta.sheets[0].sheet_type, SheetType::Worksheet);
assert_eq!(meta.formula_sheet_names, vec!["Config".to_string()]);
assert_eq!(meta.defined_names.len(), 1);
assert_eq!(meta.defined_names[0].name, "MyName");
assert_eq!(meta.defined_names[0].formula, "1");
assert_eq!(meta.defined_names[0].sheet_index, None);
assert!(meta.defined_names[0].hidden);
}
#[test]
fn parses_extern_sheet_name_mapping() {
let mut first_sheet = Vec::new();
first_sheet.extend_from_slice(&0u32.to_le_bytes());
first_sheet.extend_from_slice(&0u32.to_le_bytes());
push_wide_string(&mut first_sheet, "rId1");
push_wide_string(&mut first_sheet, "SheetA");
let mut second_sheet = Vec::new();
second_sheet.extend_from_slice(&0u32.to_le_bytes());
second_sheet.extend_from_slice(&1u32.to_le_bytes());
push_wide_string(&mut second_sheet, "rId2");
push_wide_string(&mut second_sheet, "SheetB");
let mut extern_sheet = Vec::new();
extern_sheet.extend_from_slice(&2u32.to_le_bytes());
extern_sheet.extend_from_slice(&0i32.to_le_bytes());
extern_sheet.extend_from_slice(&1i32.to_le_bytes());
extern_sheet.extend_from_slice(&1i32.to_le_bytes());
extern_sheet.extend_from_slice(&0i32.to_le_bytes());
extern_sheet.extend_from_slice(&0i32.to_le_bytes());
extern_sheet.extend_from_slice(&0i32.to_le_bytes());
let mut data = Vec::new();
data.extend_from_slice(&encode_record(BRT_BUNDLE_SH.as_u16(), &first_sheet));
data.extend_from_slice(&encode_record(BRT_BUNDLE_SH.as_u16(), &second_sheet));
data.extend_from_slice(&encode_record(BRT_BEGIN_EXTERN_SHEET.as_u16(), &extern_sheet));
let meta = parse_workbook(&data).expect("extern sheet mapping should parse");
assert_eq!(meta.formula_sheet_names, vec!["SheetB".to_string(), "SheetA".to_string()]);
}
#[test]
fn skips_unknown_records() {
let unknown = encode_record(0x0123, &[1, 2, 3, 4]);
let meta = parse_workbook(&unknown).expect("unknown records should be skipped");
assert_eq!(meta.sheets.len(), 0);
assert_eq!(meta.defined_names.len(), 0);
assert!(!meta.date1904);
}
#[test]
fn rejects_invalid_sheet_visibility() {
let mut payload = Vec::new();
payload.extend_from_slice(&99u32.to_le_bytes());
payload.extend_from_slice(&1u32.to_le_bytes());
push_wide_string(&mut payload, "rId1");
push_wide_string(&mut payload, "Sheet1");
let data = encode_record(BRT_BUNDLE_SH.as_u16(), &payload);
let err = parse_workbook(&data).expect_err("invalid visibility should fail");
assert!(format!("{err}").contains("invalid sheet visibility"));
}
}