b3-rs 0.1.0

A Rust implementation of B3 (Better Binary Buffers)
//! Item header encoding and decoding

use crate::alloc_prelude::*;
use crate::datatypes::*;
use crate::ItemKey;
use nano_leb128::ULEB128;

// Item header format:
// (largely copied from b3/item_header.py)
//
// ---------------------------- item_header -----------------------------  --- codecs ---
// [header BYTE] [15+ type# UVARINT] [key (see below)] [data len UVARINT]  [ data BYTES ]
//
// Item control byte:
//
// +------------+------------+------------+------------+------------+------------+------------+------------+
// | is null    | has data   | key type   | key type   | data type  | data type  | data type  | data type  |
// +------------+------------+------------+------------+------------+------------+------------+------------+
//
// The first two bits:
//
// +------------+------------+
// | is null    | has data   |
// +------------+------------+
//     1   x  (2)    Value is NULL - data len & has data ignored
//     0   0  (0)    Codec zero-value for given data type (0, "", 0.0 etc)
//     0   1  (1)    Data len present, followed by encoded data bytes

/// A representation of an item's header.
///
/// # Encoding and decoding
///
/// ```
/// # use b3_rs::ItemHeader;
/// use b3_rs::ItemKey;
/// use b3_rs::datatypes::{DataType, KnownType};
///
/// # fn main() -> Result<(), b3_rs::Error> {
/// let header = ItemHeader::new(
///     DataType::from(KnownType::Bytes), // Create a byte array,
///     ItemKey::NoKey,                   // with no key,
///     true,                             // that is a null entry, and
///     0,                                // has zero length.
/// );
///
/// // Encode the header to a `Vec<u8>`
/// let bytes = header.encode()?;
/// assert_eq!(bytes.len(), 1);
///
/// // And decode that header back into an object
/// let (decoded, _len) = ItemHeader::decode(&bytes)?;
/// assert_eq!(decoded, header);
/// # Ok(())
/// # }
#[derive(Debug, Clone, PartialEq)]
pub struct ItemHeader {
    pub data_type: DataType,
    pub key: ItemKey,
    pub is_null: bool,
    pub data_len: u64,
}

impl ItemHeader {
    /// Creates a new ItemHeader.
    pub fn new(data_type: DataType, key: ItemKey, is_null: bool, data_len: u64) -> Self {
        Self {
            data_type,
            key,
            is_null,
            data_len,
        }
    }

    /// Encode the header into it's byte representation.
    pub fn encode(&self) -> Result<Vec<u8>, crate::Error> {
        let mut control_byte = 0u8;
        let mut len_bytes: Vec<u8> = Vec::with_capacity(10);

        // Handle control bits for "is null" and "has data", and encode the data
        // length if we have data
        if self.is_null {
            // set the "is null" bit
            control_byte |= 0b10000000;
        } else if self.data_len > 0 {
            // set the "has data" bit
            control_byte |= 0b01000000;

            // data length encoding - we have to use a temporary array and then
            // put the right number of bytes from the encode into `len_bytes`
            let mut tmp = [0u8; 10];
            let count = ULEB128::from(self.data_len).write_into(&mut tmp)?;
            len_bytes.extend(&tmp[0..count]);
        }

        // Handle control bits for key type, and encode the key (which will return
        // an empty Vec<u8> if there's no key, and the bytes to put straight into
        // the output if there is a key)
        control_byte |= self.key.type_bits() & 0b00110000;
        let key_bytes = self.key.encode()?;

        // Encode data type
        let (type_base, type_ext_bytes) = self.data_type.encode()?;
        control_byte |= type_base & 0b00001111;

        // Build header
        let mut output: Vec<u8> = Vec::new();
        output.push(control_byte);
        output.extend(&type_ext_bytes);
        output.extend(&key_bytes);
        output.extend(&len_bytes);

        Ok(output)
    }

    /// Takes a byte representation of an item and decodes it's header,
    /// returning the decoded header and the number of bytes consumed by the
    /// decode operation.
    pub fn decode(input: &[u8]) -> Result<(ItemHeader, usize), crate::Error> {
        let mut count = 0;
        if input.len() < 1 {
            return Err(crate::Error::UnexpectedEof);
        }

        // Get the control byte
        let control_byte = input[count];
        count += 1;

        // Check for "is null" and "has data" bits
        let is_null = (control_byte & 0b10000000) != 0;
        let has_data = (control_byte & 0b01000000) != 0;

        // "is null" and "has data" are mutually exclusive
        if is_null && has_data {
            return Err(crate::Error::InvalidHeader);
        }

        // Check for extended type, and retrieve it
        let type_base = control_byte & 0b00001111;
        let mut type_ext: u64 = 0;
        if type_base == 0b00001111 {
            let (val, len) = ULEB128::read_from(&input[count..])?;
            type_ext = u64::from(val);
            count += len;
        }

        // Decode data type
        let data_type = match type_ext {
            0 => DataType::from(type_base as u64),
            _ => DataType::from(type_ext),
        };

        // Check for key, and decode it if there is one
        let mut key = ItemKey::NoKey;
        let key_base = control_byte & 0b00110000;
        if key_base != 0 {
            let (d_key, len) = ItemKey::decode(key_base, &input[count..])?;
            key = d_key;
            count += len;
        }

        // Retrieve item length if we have data
        let mut data_len = 0;
        if has_data {
            let (val, len) = ULEB128::read_from(&input[count..])?;
            data_len = u64::from(val);
            count += len;
        }

        let header = ItemHeader {
            data_type,
            key,
            is_null,
            data_len,
        };

        Ok((header, count))
    }
}

impl Default for ItemHeader {
    fn default() -> Self {
        Self {
            data_type: DataType::from(0),
            key: ItemKey::NoKey,
            is_null: true,
            data_len: 0,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn header_encode_null() {
        let header = ItemHeader {
            data_type: DataType::from(0),
            key: ItemKey::NoKey,
            is_null: true,
            data_len: 0,
        };

        assert_eq!(header.encode().unwrap(), vec![0b10000000]);
    }

    #[test]
    fn header_encode_null_ignores_data() {
        let header = ItemHeader {
            data_type: DataType::from(0),
            key: ItemKey::NoKey,
            is_null: true,
            data_len: 5,
        };

        assert_eq!(header.encode().unwrap(), vec![0b10000000]);
    }

    #[test]
    fn header_encode_null_with_key() {
        let header = ItemHeader {
            data_type: DataType::from(0),
            key: ItemKey::IntegerKey(1),
            is_null: true,
            data_len: 0,
        };

        assert_eq!(header.encode().unwrap(), vec![0b10010000, 0x01]);
    }

    #[test]
    fn header_encode_null_with_key_ignores_data() {
        let header = ItemHeader {
            data_type: DataType::from(0),
            key: ItemKey::IntegerKey(1),
            is_null: true,
            data_len: 5,
        };

        assert_eq!(header.encode().unwrap(), vec![0b10010000, 0x01]);
    }

    #[test]
    fn header_encode_has_data() {
        let header = ItemHeader {
            data_type: DataType::from(0),
            key: ItemKey::NoKey,
            is_null: false,
            data_len: 5,
        };

        assert_eq!(header.encode().unwrap(), vec![0b01000000, 0x05]);
    }

    #[test]
    fn header_encode_type_with_data() {
        let header = ItemHeader {
            data_type: DataType::from(KnownType::CompositeDict),
            key: ItemKey::NoKey,
            is_null: false,
            data_len: 1,
        };

        assert_eq!(header.encode().unwrap(), vec![0b01000001, 0x01]);

        let header = ItemHeader {
            data_type: DataType::from(KnownType::CompositeDict),
            key: ItemKey::NoKey,
            is_null: false,
            data_len: 10,
        };

        assert_eq!(header.encode().unwrap(), vec![0b01000001, 0x0A]);
    }

    #[test]
    fn header_encode_type_with_data_and_key() {
        let header = ItemHeader {
            data_type: DataType::from(KnownType::CompositeDict),
            key: ItemKey::IntegerKey(1),
            is_null: false,
            data_len: 1,
        };

        assert_eq!(header.encode().unwrap(), vec![0b01010001, 0x01, 0x01]);

        let header = ItemHeader {
            data_type: DataType::from(KnownType::CompositeDict),
            key: ItemKey::StringKey(String::from("AAA")),
            is_null: false,
            data_len: 1,
        };

        assert_eq!(
            header.encode().unwrap(),
            vec![0b01100001, 0x03, 0x41, 0x41, 0x41, 0x01]
        );
    }

    #[test]
    fn header_decode_null() {
        let (header, count) = ItemHeader::decode(&[0b10000000]).unwrap();
        assert_eq!(header.is_null, true);
        assert_eq!(count, 1);
    }

    #[test]
    fn header_decode_null_with_data_errors() {
        let err = ItemHeader::decode(&[0b11000000, 0x01]).err().unwrap();
        assert_eq!(err, crate::Error::InvalidHeader);
    }

    #[test]
    fn header_decode_null_with_key() {
        let (header, count) = ItemHeader::decode(&[0b10010000, 0x01]).unwrap();
        assert_eq!(header.is_null, true);
        assert_eq!(header.key, ItemKey::IntegerKey(1));
        assert_eq!(count, 2);
    }

    #[test]
    fn header_decode_has_data() {
        let (header, count) = ItemHeader::decode(&[0b01000000, 0x05]).unwrap();
        assert_eq!(header.is_null, false);
        assert_eq!(header.data_len, 0x05);
        assert_eq!(count, 2);

        let (header, count) = ItemHeader::decode(&[0b01000000, 0x10]).unwrap();
        assert_eq!(header.is_null, false);
        assert_eq!(header.data_len, 0x10);
        assert_eq!(count, 2);
    }

    #[test]
    fn header_decode_type_with_data() {
        let (header, count) = ItemHeader::decode(&[0b01000001, 0x05]).unwrap();
        assert_eq!(header.is_null, false);
        assert_eq!(header.data_type, DataType::from(KnownType::CompositeDict));
        assert_eq!(header.data_len, 0x05);
        assert_eq!(count, 2);

        let (header, count) = ItemHeader::decode(&[0b01001111, 0x10, 0x05]).unwrap();
        assert_eq!(header.is_null, false);
        assert_eq!(header.data_type, DataType::from(KnownExtendedType::Complex));
        assert_eq!(header.data_len, 0x05);
        assert_eq!(count, 3);
    }

    #[test]
    fn header_decode_type_with_data_and_key() {
        let (header, count) = ItemHeader::decode(&[0b01010001, 0x01, 0x05]).unwrap();
        assert_eq!(header.is_null, false);
        assert_eq!(header.data_type, DataType::from(KnownType::CompositeDict));
        assert_eq!(header.key, ItemKey::IntegerKey(1));
        assert_eq!(header.data_len, 0x05);
        assert_eq!(count, 3);

        let (header, count) =
            ItemHeader::decode(&[0b01100001, 0x03, 0x41, 0x41, 0x41, 0x05]).unwrap();
        assert_eq!(header.is_null, false);
        assert_eq!(header.data_type, DataType::from(KnownType::CompositeDict));
        assert_eq!(header.key, ItemKey::StringKey(String::from("AAA")));
        assert_eq!(header.data_len, 0x05);
        assert_eq!(count, 6);
    }

    #[test]
    fn header_decode_then_encode_matches() {
        let original = ItemHeader {
            data_type: DataType::from(KnownType::CompositeDict),
            key: ItemKey::StringKey(String::from("AAA")),
            is_null: false,
            data_len: 1,
        };

        let encoded = original.encode().unwrap();
        let (decoded, count) = ItemHeader::decode(&encoded).unwrap();
        assert_eq!(count, encoded.len());
        assert_eq!(original, decoded);
    }
}