xlsbye-biff12 0.1.0

BIFF12 binary record parser for XLSB files
Documentation
use std::collections::HashSet;

use xlsbye_core::error::Result;
use xlsbye_core::types::ParsedExternalLink;

use crate::record::header::RecordIter;
use crate::record::ids::{BRT_BEGIN_EXTERN_SHEET, BRT_BEGIN_SUP_BOOK};
use crate::strings::{decode_short_string, decode_short_string_u16, decode_wide_string};

pub fn parse_external_link(data: &[u8]) -> Result<ParsedExternalLink> {
    let mut rel_id = None;
    let mut sheet_names = Vec::new();
    let mut seen_sheet_names = HashSet::new();

    for record in RecordIter::new(data) {
        let (record_type, payload) = record?;
        if record_type == BRT_BEGIN_SUP_BOOK.as_u16() {
            if rel_id.is_none() {
                rel_id = extract_rel_id(payload);
            }
            continue;
        }

        if record_type == BRT_BEGIN_EXTERN_SHEET.as_u16() {
            for candidate in extract_string_candidates(payload) {
                if candidate.is_empty() || is_rid(&candidate) {
                    continue;
                }
                if seen_sheet_names.insert(candidate.clone()) {
                    sheet_names.push(candidate);
                }
            }
        }
    }

    Ok(ParsedExternalLink {
        rel_id,
        sheet_names,
    })
}

fn extract_rel_id(payload: &[u8]) -> Option<String> {
    extract_string_candidates(payload)
        .into_iter()
        .find(|value| is_rid(value))
}

fn is_rid(value: &str) -> bool {
    let Some(suffix) = value.strip_prefix("rId") else {
        return false;
    };
    !suffix.is_empty() && suffix.chars().all(|ch| ch.is_ascii_digit())
}

fn extract_string_candidates(payload: &[u8]) -> Vec<String> {
    let mut values = Vec::new();
    for offset in 0..payload.len() {
        if let Some((value, _consumed)) = decode_wide_string(&payload[offset..]) {
            if !value.is_empty() && !value.contains('\0') {
                values.push(value);
            }
        }
        if let Some((value, _consumed)) = decode_short_string(&payload[offset..]) {
            if !value.is_empty() && !value.contains('\0') {
                values.push(value);
            }
        }
        if let Some((value, _consumed)) = decode_short_string_u16(&payload[offset..]) {
            if !value.is_empty() && !value.contains('\0') {
                values.push(value);
            }
        }
    }
    values
}

#[cfg(test)]
mod tests {
    use super::*;

    fn encode_varint(mut value: u32) -> Vec<u8> {
        let mut out = Vec::new();
        loop {
            let mut byte = (value & 0x7F) as u8;
            value >>= 7;
            if value != 0 {
                byte |= 0x80;
            }
            out.push(byte);
            if value == 0 {
                break;
            }
        }
        out
    }

    fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
        let mut out = Vec::new();
        out.extend_from_slice(&encode_varint(u32::from(record_type)));
        out.extend_from_slice(&encode_varint(payload.len() as u32));
        out.extend_from_slice(payload);
        out
    }

    fn encode_wide_string(value: &str) -> Vec<u8> {
        let utf16 = value.encode_utf16().collect::<Vec<_>>();
        let mut out = Vec::new();
        out.extend_from_slice(&(utf16.len() as u32).to_le_bytes());
        for unit in utf16 {
            out.extend_from_slice(&unit.to_le_bytes());
        }
        out
    }

    #[test]
    fn parses_external_link_rel_id_and_sheet_names() {
        let mut sup_book = Vec::new();
        sup_book.extend_from_slice(&0u16.to_le_bytes());
        sup_book.extend_from_slice(&encode_wide_string("rId7"));

        let mut extern_sheet = Vec::new();
        extern_sheet.extend_from_slice(&5u32.to_le_bytes());
        extern_sheet.extend_from_slice(&encode_wide_string("ExternalSheet"));

        let mut data = Vec::new();
        data.extend_from_slice(&encode_record(BRT_BEGIN_SUP_BOOK.as_u16(), &sup_book));
        data.extend_from_slice(&encode_record(
            BRT_BEGIN_EXTERN_SHEET.as_u16(),
            &extern_sheet,
        ));

        let parsed = parse_external_link(&data).expect("external link should parse");
        assert_eq!(parsed.rel_id.as_deref(), Some("rId7"));
        assert_eq!(parsed.sheet_names, vec!["ExternalSheet".to_string()]);
    }
}