dxfscan 0.1.0

Binary DXF parser with typed entity data and lookup indices
Documentation
// SPDX-License-Identifier: ISC
use crate::error::Error;
use crate::group_code::{ValueType, value_type_of};
use crate::value::GroupValue;

const SENTINEL: &[u8] = b"AutoCAD Binary DXF\r\n\x1a\0";

/// Reads (group_code, value) pairs from a binary DXF byte slice.
///
/// Strings and binary chunks borrow directly from the input,
/// so the reader and its values share the input's lifetime.
#[derive(Debug)]
pub struct BinaryReader<'a> {
    data: &'a [u8],
    pos: usize,
}

impl<'a> BinaryReader<'a> {
    /// Creates a new reader, validating the 22-byte binary DXF sentinel.
    pub fn new(data: &'a [u8]) -> Result<Self, Error> {
        if data.len() < SENTINEL.len() || &data[..SENTINEL.len()] != SENTINEL {
            return Err(Error::InvalidSentinel);
        }
        Ok(Self {
            data,
            pos: SENTINEL.len(),
        })
    }

    /// Returns the current byte offset in the input.
    pub fn pos(&self) -> usize {
        self.pos
    }

    /// Reads the next (group_code, value) pair, or `None` at end of input.
    pub fn next_pair(&mut self) -> Result<Option<(u16, GroupValue<'a>)>, Error> {
        if self.pos >= self.data.len() {
            return Ok(None);
        }

        let code = self.read_u16()?;
        let value = match value_type_of(code) {
            ValueType::String => {
                let start = self.pos;
                let null_pos = self.data[start..]
                    .iter()
                    .position(|&b| b == 0)
                    .ok_or(Error::UnterminatedString)?;
                self.pos = start + null_pos + 1; // skip past the null
                GroupValue::String(&self.data[start..start + null_pos])
            }
            ValueType::Boolean => {
                let b = self.read_bytes(1)?;
                GroupValue::Bool(b[0] != 0)
            }
            ValueType::Int16 => {
                let b = self.read_bytes(2)?;
                GroupValue::Int16(i16::from_le_bytes([b[0], b[1]]))
            }
            ValueType::Int32 => {
                let b = self.read_bytes(4)?;
                GroupValue::Int32(i32::from_le_bytes([b[0], b[1], b[2], b[3]]))
            }
            ValueType::Int64 => {
                let b = self.read_bytes(8)?;
                GroupValue::Int64(i64::from_le_bytes([
                    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
                ]))
            }
            ValueType::Double => {
                let b = self.read_bytes(8)?;
                GroupValue::Double(f64::from_le_bytes([
                    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
                ]))
            }
            ValueType::BinaryChunk => {
                let len_byte = self.read_bytes(1)?[0] as usize;
                let chunk = self.read_bytes(len_byte)?;
                GroupValue::BinaryChunk(chunk)
            }
        };

        Ok(Some((code, value)))
    }

    fn read_u16(&mut self) -> Result<u16, Error> {
        let b = self.read_bytes(2)?;
        Ok(u16::from_le_bytes([b[0], b[1]]))
    }

    fn read_bytes(&mut self, n: usize) -> Result<&'a [u8], Error> {
        if self.pos + n > self.data.len() {
            return Err(Error::UnexpectedEof);
        }
        let slice = &self.data[self.pos..self.pos + n];
        self.pos += n;
        Ok(slice)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    extern crate alloc;
    use alloc::vec::Vec;

    /// Build a minimal binary DXF from (code, raw_value_bytes) pairs.
    fn build_binary_dxf(pairs: &[(u16, &[u8])]) -> Vec<u8> {
        let mut buf = Vec::from(SENTINEL);
        for &(code, value) in pairs {
            buf.extend_from_slice(&code.to_le_bytes());
            buf.extend_from_slice(value);
        }
        buf
    }

    fn null_str(s: &str) -> Vec<u8> {
        let mut v = Vec::from(s.as_bytes());
        v.push(0);
        v
    }

    #[test]
    fn sentinel_validation() {
        assert_eq!(BinaryReader::new(b"").unwrap_err(), Error::InvalidSentinel);
        assert_eq!(
            BinaryReader::new(b"not a dxf file at all!!").unwrap_err(),
            Error::InvalidSentinel
        );
        assert!(BinaryReader::new(SENTINEL).is_ok());
    }

    #[test]
    fn empty_after_sentinel() {
        let mut r = BinaryReader::new(SENTINEL).unwrap();
        assert_eq!(r.next_pair().unwrap(), None);
    }

    #[test]
    fn string_value() {
        let section = null_str("SECTION");
        let data = build_binary_dxf(&[(0, &section)]);
        let mut r = BinaryReader::new(&data).unwrap();
        let (code, val) = r.next_pair().unwrap().unwrap();
        assert_eq!(code, 0);
        assert_eq!(val.as_str_bytes().unwrap(), b"SECTION");
        assert_eq!(r.next_pair().unwrap(), None);
    }

    #[test]
    fn double_value() {
        let data = build_binary_dxf(&[(10, &1.5f64.to_le_bytes())]);
        let mut r = BinaryReader::new(&data).unwrap();
        let (code, val) = r.next_pair().unwrap().unwrap();
        assert_eq!(code, 10);
        assert_eq!(val.as_f64().unwrap(), 1.5);
    }

    #[test]
    fn int16_value() {
        let data = build_binary_dxf(&[(70, &42i16.to_le_bytes())]);
        let mut r = BinaryReader::new(&data).unwrap();
        let (code, val) = r.next_pair().unwrap().unwrap();
        assert_eq!(code, 70);
        assert_eq!(val.as_i16().unwrap(), 42);
    }

    #[test]
    fn int32_value() {
        let data = build_binary_dxf(&[(90, &100000i32.to_le_bytes())]);
        let mut r = BinaryReader::new(&data).unwrap();
        let (code, val) = r.next_pair().unwrap().unwrap();
        assert_eq!(code, 90);
        assert_eq!(val.as_i32().unwrap(), 100000);
    }

    #[test]
    fn bool_value() {
        let data = build_binary_dxf(&[(290, &[1])]);
        let mut r = BinaryReader::new(&data).unwrap();
        let (code, val) = r.next_pair().unwrap().unwrap();
        assert_eq!(code, 290);
        assert!(val.as_bool().unwrap());
    }

    #[test]
    fn multiple_pairs() {
        let section = null_str("SECTION");
        let header = null_str("HEADER");
        let data = build_binary_dxf(&[(0, &section), (2, &header), (70, &1i16.to_le_bytes())]);
        let mut r = BinaryReader::new(&data).unwrap();

        let (c, v) = r.next_pair().unwrap().unwrap();
        assert_eq!(c, 0);
        assert_eq!(v.as_str_bytes().unwrap(), b"SECTION");

        let (c, v) = r.next_pair().unwrap().unwrap();
        assert_eq!(c, 2);
        assert_eq!(v.as_str_bytes().unwrap(), b"HEADER");

        let (c, v) = r.next_pair().unwrap().unwrap();
        assert_eq!(c, 70);
        assert_eq!(v.as_i16().unwrap(), 1);

        assert_eq!(r.next_pair().unwrap(), None);
    }

    #[test]
    fn truncated_data() {
        // Group code present but value truncated
        let mut data = Vec::from(SENTINEL);
        data.extend_from_slice(&10u16.to_le_bytes()); // double expected (8 bytes)
        data.extend_from_slice(&[0, 0, 0]); // only 3 bytes
        let mut r = BinaryReader::new(&data).unwrap();
        assert_eq!(r.next_pair().unwrap_err(), Error::UnexpectedEof);
    }

    #[test]
    fn unterminated_string() {
        let mut data = Vec::from(SENTINEL);
        data.extend_from_slice(&0u16.to_le_bytes());
        data.extend_from_slice(b"no null"); // no \0
        let mut r = BinaryReader::new(&data).unwrap();
        assert_eq!(r.next_pair().unwrap_err(), Error::UnterminatedString);
    }
}