dxfbin 0.1.0

Streaming text DXF to binary DXF converter
Documentation
// SPDX-License-Identifier: ISC
use crate::error::Error;
use crate::group_code::{ValueType, value_type_of};
use crate::sink::Sink;

#[derive(Clone, Copy)]
enum State {
    Sentinel,
    GroupCode,
    Value,
}

/// Streaming text-DXF to binary-DXF converter.
///
/// Feed byte slices via [`feed`](Self::feed); the converter parses
/// complete newline-delimited group/value pairs and dispatches typed
/// calls to the provided [`Sink`].
pub struct Converter {
    state: State,
    code: u16,
}

impl Default for Converter {
    fn default() -> Self {
        Self::new()
    }
}

impl Converter {
    /// Creates a new converter in its initial state.
    pub fn new() -> Self {
        Self {
            state: State::Sentinel,
            code: 0,
        }
    }

    /// Returns `true` if the converter is not expecting a value line.
    pub fn is_complete(&self) -> bool {
        !matches!(self.state, State::Value)
    }

    /// Processes as many complete lines in `input` as possible, dispatching
    /// typed values to `sink`. Returns the number of bytes consumed.
    pub fn feed<S: Sink>(&mut self, input: &[u8], sink: &mut S) -> Result<usize, Error<S::Error>> {
        let mut consumed = 0;

        if matches!(self.state, State::Sentinel) {
            sink.sentinel().map_err(Error::Sink)?;
            self.state = State::GroupCode;
        }

        loop {
            let remaining = &input[consumed..];
            let Some(nl) = remaining.iter().position(|&b| b == b'\n') else {
                break;
            };

            let line = strip_cr(&remaining[..nl]);
            self.process_line(line, sink)?;
            consumed += nl + 1;
        }

        Ok(consumed)
    }

    fn process_line<S: Sink>(&mut self, line: &[u8], sink: &mut S) -> Result<(), Error<S::Error>> {
        match self.state {
            State::Sentinel => unreachable!(),
            State::GroupCode => {
                let s = core::str::from_utf8(line).map_err(|_| Error::InvalidUtf8)?;
                self.code = s.trim().parse().map_err(|_| Error::InvalidGroupCode)?;
                sink.group_code(self.code).map_err(Error::Sink)?;
                self.state = State::Value;
            }
            State::Value => {
                match value_type_of(self.code) {
                    ValueType::String => {
                        sink.string(line).map_err(Error::Sink)?;
                    }
                    ValueType::Boolean => {
                        let s = ascii_trimmed(line).map_err(|()| Error::InvalidUtf8)?;
                        let v: i16 = s.parse().map_err(|_| Error::InvalidValue)?;
                        sink.boolean(v != 0).map_err(Error::Sink)?;
                    }
                    ValueType::Int16 => {
                        let s = ascii_trimmed(line).map_err(|()| Error::InvalidUtf8)?;
                        let v: i16 = s.parse().map_err(|_| Error::InvalidValue)?;
                        sink.int16(v).map_err(Error::Sink)?;
                    }
                    ValueType::Int32 => {
                        let s = ascii_trimmed(line).map_err(|()| Error::InvalidUtf8)?;
                        let v: i32 = s.parse().map_err(|_| Error::InvalidValue)?;
                        sink.int32(v).map_err(Error::Sink)?;
                    }
                    ValueType::Int64 => {
                        let s = ascii_trimmed(line).map_err(|()| Error::InvalidUtf8)?;
                        let v: i64 = s.parse().map_err(|_| Error::InvalidValue)?;
                        sink.int64(v).map_err(Error::Sink)?;
                    }
                    ValueType::Double => {
                        let s = ascii_trimmed(line).map_err(|()| Error::InvalidUtf8)?;
                        let v: f64 = s.parse().map_err(|_| Error::InvalidValue)?;
                        sink.double(v).map_err(Error::Sink)?;
                    }
                    ValueType::BinaryChunk => {
                        let s = ascii_trimmed(line).map_err(|()| Error::InvalidUtf8)?;
                        let mut buf = [0u8; 255];
                        let len =
                            decode_hex(s.as_bytes(), &mut buf).map_err(|()| Error::InvalidHex)?;
                        sink.binary_chunk(&buf[..len]).map_err(Error::Sink)?;
                    }
                }
                self.state = State::GroupCode;
            }
        }
        Ok(())
    }
}

/// Converts an entire text DXF byte slice to binary via the given sink.
///
/// Handles inputs with or without a trailing newline.
pub fn convert_all<S: Sink>(input: &[u8], sink: &mut S) -> Result<(), Error<S::Error>> {
    let mut conv = Converter::new();
    let consumed = conv.feed(input, sink)?;

    // Handle a possible final line without a trailing newline.
    if consumed < input.len() {
        let remaining = &input[consumed..];
        let line = strip_cr(remaining);
        if !line.is_empty() {
            conv.process_line(line, sink)?;
        }
    }

    if !conv.is_complete() {
        return Err(Error::InvalidValue);
    }
    Ok(())
}

fn strip_cr(line: &[u8]) -> &[u8] {
    line.strip_suffix(b"\r").unwrap_or(line)
}

/// Convert bytes to a trimmed UTF-8 str. Used only for numeric/code values (always ASCII).
fn ascii_trimmed(line: &[u8]) -> Result<&str, ()> {
    core::str::from_utf8(line).map(str::trim).map_err(|_| ())
}

fn decode_hex(hex: &[u8], out: &mut [u8; 255]) -> Result<usize, ()> {
    if !hex.len().is_multiple_of(2) {
        return Err(());
    }
    let len = hex.len() / 2;
    if len > 255 {
        return Err(());
    }
    for i in 0..len {
        let hi = match hex[i * 2] {
            b @ b'0'..=b'9' => b - b'0',
            b @ b'A'..=b'F' => b - b'A' + 10,
            b @ b'a'..=b'f' => b - b'a' + 10,
            _ => return Err(()),
        };
        let lo = match hex[i * 2 + 1] {
            b @ b'0'..=b'9' => b - b'0',
            b @ b'A'..=b'F' => b - b'A' + 10,
            b @ b'a'..=b'f' => b - b'a' + 10,
            _ => return Err(()),
        };
        out[i] = (hi << 4) | lo;
    }
    Ok(len)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::binary_sink::BinarySink;
    use alloc::vec::Vec;

    #[test]
    fn minimal_dxf() {
        let input = b"  0\nSECTION\n  2\nHEADER\n  0\nENDSEC\n  0\nEOF\n";
        let mut out = Vec::new();
        let mut sink = BinarySink::new(&mut out);
        convert_all(input, &mut sink).unwrap();

        // Sentinel (22 bytes)
        assert_eq!(&out[..22], b"AutoCAD Binary DXF\r\n\x1a\0");
        let mut p = 22;
        // Group 0 (2 bytes) + "SECTION\0"
        assert_eq!(&out[p..p + 2], &[0x00, 0x00]);
        assert_eq!(&out[p + 2..p + 10], b"SECTION\0");
        p += 10;
        // Group 2 (2 bytes) + "HEADER\0"
        assert_eq!(&out[p..p + 2], &[0x02, 0x00]);
        assert_eq!(&out[p + 2..p + 9], b"HEADER\0");
        p += 9;
        // Group 0 (2 bytes) + "ENDSEC\0"
        assert_eq!(&out[p..p + 2], &[0x00, 0x00]);
        assert_eq!(&out[p + 2..p + 9], b"ENDSEC\0");
        p += 9;
        // Group 0 (2 bytes) + "EOF\0"
        assert_eq!(&out[p..p + 2], &[0x00, 0x00]);
        assert_eq!(&out[p + 2..p + 6], b"EOF\0");
        p += 6;
        assert_eq!(out.len(), p);
    }

    #[test]
    fn numeric_types() {
        let input = b" 10\n1.5\n 70\n42\n 90\n100000\n160\n9876543210\n290\n1\n";
        let mut out = Vec::new();
        let mut sink = BinarySink::new(&mut out);
        convert_all(input, &mut sink).unwrap();

        let mut p = 22;
        // group 10 (double 1.5): 2-byte code + 8-byte value
        assert_eq!(&out[p..p + 2], &10u16.to_le_bytes());
        assert_eq!(&out[p + 2..p + 10], &1.5f64.to_le_bytes());
        p += 10;
        // group 70 (i16 42)
        assert_eq!(&out[p..p + 2], &70u16.to_le_bytes());
        assert_eq!(&out[p + 2..p + 4], &42i16.to_le_bytes());
        p += 4;
        // group 90 (i32 100000)
        assert_eq!(&out[p..p + 2], &90u16.to_le_bytes());
        assert_eq!(&out[p + 2..p + 6], &100000i32.to_le_bytes());
        p += 6;
        // group 160 (i64 9876543210)
        assert_eq!(&out[p..p + 2], &160u16.to_le_bytes());
        assert_eq!(&out[p + 2..p + 10], &9876543210i64.to_le_bytes());
        p += 10;
        // group 290 (bool true)
        assert_eq!(&out[p..p + 2], &290u16.to_le_bytes());
        assert_eq!(out[p + 2], 0x01);
    }

    #[test]
    fn binary_chunk() {
        let input = b"310\n48656C6C6F\n";
        let mut out = Vec::new();
        let mut sink = BinarySink::new(&mut out);
        convert_all(input, &mut sink).unwrap();

        let p = 22;
        // group 310 = 0x0136 LE
        assert_eq!(&out[p..p + 2], &310u16.to_le_bytes());
        // binary chunk: length 5, then "Hello"
        assert_eq!(out[p + 2], 5);
        assert_eq!(&out[p + 3..p + 8], b"Hello");
    }

    #[test]
    fn xdata_binary_chunk() {
        let input = b"1004\nFF00\n";
        let mut out = Vec::new();
        let mut sink = BinarySink::new(&mut out);
        convert_all(input, &mut sink).unwrap();

        let p = 22;
        // group 1004 = 0x03EC LE
        assert_eq!(&out[p..p + 2], &1004u16.to_le_bytes());
        // binary chunk: length 2, then 0xFF, 0x00
        assert_eq!(&out[p + 2..p + 5], &[2, 0xFF, 0x00]);
    }

    #[test]
    fn partial_feeding() {
        let full_input = b"  0\nSECTION\n  2\nHEADER\n  0\nENDSEC\n  0\nEOF\n";

        // Feed one line at a time
        let mut out_partial = Vec::new();
        let mut sink = BinarySink::new(&mut out_partial);
        let mut conv = Converter::new();
        for line in full_input.split(|&b| b == b'\n') {
            if line.is_empty() {
                continue;
            }
            let mut chunk = alloc::vec::Vec::from(line);
            chunk.push(b'\n');
            conv.feed(&chunk, &mut sink).unwrap();
        }
        assert!(conv.is_complete());

        // Feed all at once
        let mut out_full = Vec::new();
        let mut sink2 = BinarySink::new(&mut out_full);
        convert_all(full_input, &mut sink2).unwrap();

        assert_eq!(out_partial, out_full);
    }

    #[test]
    fn crlf_handling() {
        let input = b"  0\r\nSECTION\r\n  0\r\nEOF\r\n";
        let mut out = Vec::new();
        let mut sink = BinarySink::new(&mut out);
        convert_all(input, &mut sink).unwrap();

        assert_eq!(
            &out[22..],
            &[
                0x00, 0x00, b'S', b'E', b'C', b'T', b'I', b'O', b'N', 0, 0x00, 0x00, b'E', b'O',
                b'F', 0
            ]
        );
    }

    #[test]
    fn no_trailing_newline() {
        let input = b"  0\nEOF";
        let mut out = Vec::new();
        let mut sink = BinarySink::new(&mut out);
        convert_all(input, &mut sink).unwrap();

        assert_eq!(&out[22..], &[0x00, 0x00, b'E', b'O', b'F', 0]);
    }
}