xlsbye-biff12 0.1.0

BIFF12 binary record parser for XLSB files
Documentation
use xlsbye_core::error::{Result, XlsByeError};

use crate::strings::{decode_short_string, decode_short_string_u16, decode_wide_string};

#[derive(Debug, Clone, Copy)]
pub struct RecordCursor<'a> {
    data: &'a [u8],
    pos: usize,
}

impl<'a> RecordCursor<'a> {
    pub fn new(data: &'a [u8]) -> Self {
        Self { data, pos: 0 }
    }

    pub fn remaining(&self) -> usize {
        self.data.len().saturating_sub(self.pos)
    }

    pub fn is_empty(&self) -> bool {
        self.remaining() == 0
    }

    pub fn read_bytes(&mut self, n: usize) -> Result<&'a [u8]> {
        let end = self.pos.checked_add(n).ok_or_else(|| {
            XlsByeError::Biff12("cursor overflow while advancing payload position".to_string())
        })?;

        if end > self.data.len() {
            return Err(XlsByeError::Biff12(format!(
                "record payload underrun: requested {n} byte(s), {} remaining",
                self.remaining()
            )));
        }

        let out = &self.data[self.pos..end];
        self.pos = end;
        Ok(out)
    }

    pub fn skip(&mut self, n: usize) -> Result<()> {
        let _ = self.read_bytes(n)?;
        Ok(())
    }

    pub fn read_u8(&mut self) -> Result<u8> {
        Ok(self.read_bytes(1)?[0])
    }

    pub fn read_u16(&mut self) -> Result<u16> {
        let bytes = self.read_bytes(2)?;
        Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
    }

    pub fn read_u32(&mut self) -> Result<u32> {
        let bytes = self.read_bytes(4)?;
        Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
    }

    pub fn read_i32(&mut self) -> Result<i32> {
        let bytes = self.read_bytes(4)?;
        Ok(i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
    }

    pub fn read_f64(&mut self) -> Result<f64> {
        let bytes = self.read_bytes(8)?;
        Ok(f64::from_le_bytes([
            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
        ]))
    }

    pub fn read_bool(&mut self) -> Result<bool> {
        match self.read_u8()? {
            0 => Ok(false),
            1 => Ok(true),
            value => Err(XlsByeError::Biff12(format!(
                "invalid boolean value {value}; expected 0 or 1"
            ))),
        }
    }

    pub fn read_wide_string(&mut self) -> Result<String> {
        let data = &self.data[self.pos..];
        let (value, consumed) = decode_wide_string(data)
            .ok_or_else(|| XlsByeError::Biff12("invalid or truncated XLWideString".to_string()))?;
        self.pos += consumed;
        Ok(value)
    }

    pub fn read_short_string(&mut self) -> Result<String> {
        let data = &self.data[self.pos..];
        if data.len() >= 2 && data[1] == 0 {
            if let Some((value, consumed)) = decode_short_string_u16(data) {
                self.pos += consumed;
                return Ok(value);
            }
        }

        if let Some((value, consumed)) = decode_short_string(data) {
            self.pos += consumed;
            return Ok(value);
        }

        if let Some((value, consumed)) = decode_short_string_u16(data) {
            self.pos += consumed;
            return Ok(value);
        }

        Err(XlsByeError::Biff12(
            "invalid or truncated short XLWideString".to_string(),
        ))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn utf16le(s: &str) -> Vec<u8> {
        s.encode_utf16()
            .flat_map(u16::to_le_bytes)
            .collect::<Vec<u8>>()
    }

    #[test]
    fn reads_primitives_in_sequence() {
        let mut data = Vec::new();
        data.push(0xAB);
        data.extend_from_slice(&0x1234u16.to_le_bytes());
        data.extend_from_slice(&0x89ABCDEFu32.to_le_bytes());
        data.extend_from_slice(&(-7i32).to_le_bytes());
        data.extend_from_slice(&42.5f64.to_le_bytes());
        data.push(1);

        let mut cursor = RecordCursor::new(&data);
        assert_eq!(cursor.read_u8().unwrap(), 0xAB);
        assert_eq!(cursor.read_u16().unwrap(), 0x1234);
        assert_eq!(cursor.read_u32().unwrap(), 0x89ABCDEF);
        assert_eq!(cursor.read_i32().unwrap(), -7);
        assert_eq!(cursor.read_f64().unwrap(), 42.5);
        assert!(cursor.read_bool().unwrap());
        assert!(cursor.is_empty());
    }

    #[test]
    fn read_bytes_skip_and_remaining_are_bounded() {
        let mut cursor = RecordCursor::new(&[1, 2, 3, 4, 5]);
        assert_eq!(cursor.remaining(), 5);
        assert_eq!(cursor.read_bytes(2).unwrap(), &[1, 2]);
        cursor.skip(2).unwrap();
        assert_eq!(cursor.remaining(), 1);
        assert_eq!(cursor.read_u8().unwrap(), 5);
        assert!(cursor.is_empty());
        assert!(cursor.read_u8().is_err());
    }

    #[test]
    fn read_bool_rejects_invalid_values() {
        let mut cursor = RecordCursor::new(&[2]);
        assert!(cursor.read_bool().is_err());
    }

    #[test]
    fn reads_wide_string_and_short_string_forms() {
        let mut wide = Vec::new();
        wide.extend_from_slice(&(2u32).to_le_bytes());
        wide.extend_from_slice(&utf16le("Hi"));

        let mut short_u8 = Vec::new();
        short_u8.push(2u8);
        short_u8.extend_from_slice(&utf16le("Yo"));

        let mut short_u16 = Vec::new();
        short_u16.extend_from_slice(&(2u16).to_le_bytes());
        short_u16.extend_from_slice(&utf16le("Ok"));

        let mut c1 = RecordCursor::new(&wide);
        let mut c2 = RecordCursor::new(&short_u8);
        let mut c3 = RecordCursor::new(&short_u16);

        assert_eq!(c1.read_wide_string().unwrap(), "Hi");
        assert_eq!(c2.read_short_string().unwrap(), "Yo");
        assert_eq!(c3.read_short_string().unwrap(), "Ok");
    }

    #[test]
    fn read_string_fails_on_truncated_data() {
        let mut cursor = RecordCursor::new(&[2, b'A', 0]);
        assert!(cursor.read_short_string().is_err());

        let mut cursor = RecordCursor::new(&[1, 0, 0, 0, b'X']);
        assert!(cursor.read_wide_string().is_err());
    }
}