neodyn_xc 0.4.0

Neodyn Exchange is the serialization format for the Neodyn database engine
Documentation
//! Deserializing from the compact binary representation.

pub mod slice;
pub mod stream;

use std::fmt::Display;
use std::convert::{ TryFrom, TryInto };
use byteorder::{ ByteOrder, LittleEndian };
use serde::de::{ Deserializer, Visitor, Expected, Unexpected, Error as _ };
use ordered_float::NotNan;
use crate::error::{ Error, ErrorExt };
use crate::format::*;

pub use self::{
    slice::{ from_bytes, BinarySliceDeserializer },
    stream::{ from_reader, from_reader_buffered, BinaryStreamDeserializer },
};

/// Creates a type error based on an actual and expected value.
fn type_error<'a, T, U>(actual: U, expected: &dyn Expected) -> Result<T, Error>
    where
        U: Into<Unexpected<'a>>,
{
    Err(Error::invalid_type(actual.into(), expected))
}

/// Return an error signaling that the binary serialized value is corrupted.
fn corrupted<T, S>(message: S) -> Result<T, Error>
    where
        S: Display,
{
    Err(Error::custom(format_args!("serialized binary corrupted: {}", message)))
}

/// Extract the payload (lower bits) from the packed small integer.
const fn decode_small_uint(b: u8) -> u8 {
    b & SMALL_PAYLOAD_MASK
}

/// Sign extend the payload from something less than 8 bits to a full byte.
#[allow(clippy::cast_possible_wrap)]
const fn decode_small_int(b: u8) -> i8 {
    ((b & SMALL_PAYLOAD_MASK) as i8 ^ SMALL_INT_SIGN_BIT) - SMALL_INT_SIGN_BIT
}

/// Reads an unsigned integer of length `length` and converts it to
/// the desired type.
///
/// Upon success, it returns the converted number
/// and the remainder of the buffer.
///
/// If the buffer is too short, returns an error.
fn uint_from_parts<T>(length: usize, buf: &[u8]) -> Result<(T, &[u8]), Error>
    where
        T: TryFrom<u64>,
        T::Error: Into<Error> + ErrorExt + 'static,
{
    if length <= buf.len() {
        LittleEndian::read_uint(buf, length)
            .try_into()
            .map(|x| (x, &buf[length..]))
            .map_err(|e| Error::custom(format_args!(
                "{}; maybe a size or an index exceeds range of `usize`?", e
            )))
    } else {
        corrupted(format_args!(
            "expected buffer of length {}, got {}", length, buf.len()
        ))
    }
}

/// Start reading the next value. Here `b` is the first byte of the next value.
///
/// `read_buf()` is only called if we find a "big" (multi-byte-encoded) value.
/// Its `usize` argument is guaranteed to be between 0 and 8.
fn read_value_header<'a, F>(b: u8, read_buf: F) -> Result<ValueHeader, Error>
    where
        F: FnOnce(usize) -> Result<&'a [u8], Error>,
{
    // `null`, `opt`, `false`, `true`
    if b.is_major_minor(MAJOR_TYPE_SIMPLE, MINOR_TYPE_VALUE) {
        return match b & VALUE_TAG_MASK {
            VALUE_TAG_NULL  => Ok(ValueHeader::Null),
            VALUE_TAG_OPT   => Ok(ValueHeader::Opt),
            VALUE_TAG_FALSE => Ok(ValueHeader::Bool(false)),
            VALUE_TAG_TRUE  => Ok(ValueHeader::Bool(true)),
            tag @ _ => corrupted(format_args!(
                "invalid simple value tag: {:08b}", tag
            )),
        }
    }

    // Empty string and blob, inline
    if b.is_major_minor(MAJOR_TYPE_SIMPLE, MINOR_TYPE_EMPTY) {
        return match b & VALUE_TAG_MASK {
            VALUE_TAG_EMPTY_STRING => Ok(ValueHeader::EmptyString),
            VALUE_TAG_EMPTY_BLOB   => Ok(ValueHeader::EmptyBlob),
            tag @ _ => corrupted(format_args!(
                "invalid empty value tag: {:08b}", tag
            ))
        }
    }

    // Small and Big Values
    match b & MAJOR_TYPE_MASK {
        MAJOR_TYPE_SMALL_INT  => Ok(ValueHeader::I8(decode_small_int(b))),
        MAJOR_TYPE_SMALL_UINT => Ok(ValueHeader::U8(decode_small_uint(b))),
        MAJOR_TYPE_SMALL_STRING => {
            let index = decode_small_uint(b).into();
            Ok(ValueHeader::String(index))
        }
        MAJOR_TYPE_SMALL_BLOB => {
            let index = decode_small_uint(b).into();
            Ok(ValueHeader::Blob(index))
        }
        MAJOR_TYPE_SMALL_ARRAY => {
            let count = decode_small_uint(b).into();
            Ok(ValueHeader::Array(count))
        }
        MAJOR_TYPE_SMALL_MAP => {
            let count = decode_small_uint(b).into();
            Ok(ValueHeader::Map(count))
        }
        MAJOR_TYPE_BIG_VALUE => {
            let len = b.decode_log_length();
            let buf = read_buf(len)?;
            read_big_value_header(b, buf)
        }
        major @ _ => corrupted(format_args!("bad major type: {:08b}", major)),
    }
}

/// Helper for `read_value_header()`.
#[allow(clippy::cast_possible_wrap, clippy::map_err_ignore)]
fn read_big_value_header(tag: u8, buf: &[u8]) -> Result<ValueHeader, Error> {
    if tag & MAJOR_TYPE_MASK != MAJOR_TYPE_BIG_VALUE {
        return Err(Error::new("bug: invalid major type for big value"));
    }
    if tag.decode_log_length() != buf.len() {
        return Err(Error::new("bug: inconsistent log length for big value"));
    }

    let len = buf.len();

    match tag & MINOR_TYPE_MASK {
        MINOR_TYPE_INT => match len {
            1 => Ok(ValueHeader::I8(buf[0] as i8)),
            2 => Ok(ValueHeader::I16(LittleEndian::read_u16(buf) as i16)),
            4 => Ok(ValueHeader::I32(LittleEndian::read_u32(buf) as i32)),
            8 => Ok(ValueHeader::I64(LittleEndian::read_u64(buf) as i64)),
            _ => corrupted(format_args!("invalid integer length: {}", len)),
        },
        MINOR_TYPE_UINT => match len {
            1 => Ok(ValueHeader::U8(buf[0])),
            2 => Ok(ValueHeader::U16(LittleEndian::read_u16(buf))),
            4 => Ok(ValueHeader::U32(LittleEndian::read_u32(buf))),
            8 => Ok(ValueHeader::U64(LittleEndian::read_u64(buf))),
            _ => corrupted(format_args!("invalid integer length: {}", len)),
        },
        MINOR_TYPE_STRING => {
            let index = LittleEndian::read_uint(buf, len).try_into()?;
            Ok(ValueHeader::String(index))
        },
        MINOR_TYPE_BLOB => {
            let index = LittleEndian::read_uint(buf, len).try_into()?;
            Ok(ValueHeader::Blob(index))
        },
        MINOR_TYPE_ARRAY => {
            let count = LittleEndian::read_uint(buf, len).try_into()?;
            Ok(ValueHeader::Array(count))
        },
        MINOR_TYPE_MAP => {
            let count = LittleEndian::read_uint(buf, len).try_into()?;
            Ok(ValueHeader::Map(count))
        },
        MINOR_TYPE_FLOAT => match len {
            4 => {
                let bits = LittleEndian::read_u32(buf);
                let x = f32::from_bits(bits);

                NotNan::try_from(x).map(ValueHeader::F32).map_err(|_| {
                    Error::custom("serialized binary corrupted: \
                                  NaN should have been encoded as `null`")
                })
            }
            8 => {
                let bits = LittleEndian::read_u64(buf);
                let x = f64::from_bits(bits);

                NotNan::try_from(x).map(ValueHeader::F64).map_err(|_| {
                    Error::custom("serialized binary corrupted: \
                                  NaN should have been encoded as `null`")
                })
            }
            _ => corrupted(format_args!("invalid float size: {}", len))
        },
        minor @ _ => corrupted(format_args!(
            "invalid minor value type: {:08b}", minor
        ))
    }
}

/// Visit any numeric value.
fn visit_number<'de, V>(value: ValueHeader, visitor: V) -> Result<V::Value, Error>
    where
        V: Visitor<'de>,
{
    use ValueHeader::*;

    match value {
        I8(x)  => visitor.visit_i8(x),
        I16(x) => visitor.visit_i16(x),
        I32(x) => visitor.visit_i32(x),
        I64(x) => visitor.visit_i64(x),
        U8(x)  => visitor.visit_u8(x),
        U16(x) => visitor.visit_u16(x),
        U32(x) => visitor.visit_u32(x),
        U64(x) => visitor.visit_u64(x),
        F32(x) => visitor.visit_f32(x.into()),
        F64(x) => visitor.visit_f64(x.into()),
        value @ _ => type_error(value, &visitor),
    }
}

/// Visit an optional value.
fn visit_option<'de, D, V>(
    value: ValueHeader,
    deserializer: D,
    visitor: V,
) -> Result<V::Value, Error>
    where
        D: Deserializer<'de, Error=Error>,
        V: Visitor<'de>,
{
    match value {
        ValueHeader::Null => visitor.visit_none(),
        ValueHeader::Opt  => visitor.visit_some(deserializer),
        value @ _ => type_error(value, &visitor),
    }
}

/// Visit a unit-like value (including `()` and unit structs).
fn visit_unit<'de, V: Visitor<'de>>(value: ValueHeader, visitor: V) -> Result<V::Value, Error> {
    match value {
        ValueHeader::Null => visitor.visit_unit(),
        value @ _ => type_error(value, &visitor),
    }
}

/// Various flags concerning symbol table entries.
#[derive(Debug, Clone, Copy)]
struct SymbolFlags {
    /// This is a "big" symbol, i.e. its length is multi-byte encoded.
    is_big: bool,
    /// This symbol is valid UTF-8 and may be used as a string.
    is_string: bool,
    /// This symbol has multiple uses in the value body.
    is_multi: bool,
}

/// Parse the flags from a symbol metadata tag.
impl TryFrom<u8> for SymbolFlags {
    type Error = Error;

    fn try_from(tag: u8) -> Result<Self, Error> {
        let (is_big, is_string, is_multi) = match tag & MAJOR_TYPE_MASK {
            MAJOR_TYPE_SMALL_BLOB_ONCE    => (false, false, false),
            MAJOR_TYPE_SMALL_BLOB_MULTI   => (false, false, true),
            MAJOR_TYPE_SMALL_STRING_ONCE  => (false, true,  false),
            MAJOR_TYPE_SMALL_STRING_MULTI => (false, true,  true),
            MAJOR_TYPE_BIG_SYMBOL => match tag & MINOR_TYPE_MASK {
                MINOR_TYPE_BLOB_ONCE      => (true,  false, false),
                MINOR_TYPE_BLOB_MULTI     => (true,  false, true),
                MINOR_TYPE_STRING_ONCE    => (true,  true,  false),
                MINOR_TYPE_STRING_MULTI   => (true,  true,  true),
                minor @ _ => return corrupted(format_args!(
                    "invalid minor type for symbol: {:08b}", minor
                ))
            },
            major @ _ => return corrupted(format_args!(
                "invalid major type for symbol: {:08b}", major
            ))
        };

        Ok(SymbolFlags { is_big, is_string, is_multi })
    }
}

/// An atomic value or the "header" of a structured value.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum ValueHeader {
    /// `null`
    Null,
    /// An optional that is present follows.
    Opt,
    /// Boolean.
    Bool(bool),
    /// Signed integer, fits into 8 bits.
    I8(i8),
    /// Signed integer, fits into 16 bits.
    I16(i16),
    /// Signed integer, fits into 32 bits.
    I32(i32),
    /// Signed integer, fits into 64 bits.
    I64(i64),
    /// Unsigned integer, fits into 8 bits.
    U8(u8),
    /// Unsigned integer, fits into 16 bits.
    U16(u16),
    /// Unsigned integer, fits into 32 bits.
    U32(u32),
    /// Unsigned integer, fits into 64 bits.
    U64(u64),
    /// 32-bit IEEE-754 floating-point number.
    F32(NotNan<f32>),
    /// 64-bit IEEE-754 floating-point number.
    F64(NotNan<f64>),
    /// Empty string.
    EmptyString,
    /// Non-empty interned string and the associated symbol table index.
    String(usize),
    /// Empty blob.
    EmptyBlob,
    /// Non-empty interned blob and the associated symbol table index.
    Blob(usize),
    /// Array and its item count.
    Array(usize),
    /// Map and its entry count.
    Map(usize),
}

impl<'a> From<ValueHeader> for Unexpected<'a> {
    fn from(value: ValueHeader) -> Self {
        use ValueHeader::*;

        match value {
            Null => Unexpected::Unit,
            Opt  => Unexpected::Option,
            Bool(b) => Unexpected::Bool(b),
            I8(x)  => Unexpected::Signed(x.into()),
            I16(x) => Unexpected::Signed(x.into()),
            I32(x) => Unexpected::Signed(x.into()),
            I64(x) => Unexpected::Signed(x),
            U8(x)  => Unexpected::Unsigned(x.into()),
            U16(x) => Unexpected::Unsigned(x.into()),
            U32(x) => Unexpected::Unsigned(x.into()),
            U64(x) => Unexpected::Unsigned(x),
            F32(x) => Unexpected::Float(x.into_inner().into()),
            F64(x) => Unexpected::Float(x.into_inner()),
            EmptyString => Unexpected::Str(""),
            EmptyBlob   => Unexpected::Bytes(&[]),
            String(_) => Unexpected::Other("string"),
            Blob(_)   => Unexpected::Other("bytes"),
            Array(_) => Unexpected::Seq,
            Map(_)   => Unexpected::Map,
        }
    }
}

/// Convenience methods on bytes.
trait ByteExt {
    /// Check whether this byte describes a type of the given major type tag.
    #[allow(clippy::wrong_self_convention)]
    fn is_major(self, major: u8) -> bool;

    /// Check whether this byte describes a type of the given major and minor
    /// type tags.
    #[allow(clippy::wrong_self_convention)]
    fn is_major_minor(self, major: u8, minor: u8) -> bool;

    /// Returns the buffer length encoded in the "log-length" format.
    fn decode_log_length(self) -> usize;
}

impl ByteExt for u8 {
    fn is_major(self, major: u8) -> bool {
        self & MAJOR_TYPE_MASK == major
    }

    fn is_major_minor(self, major: u8, minor: u8) -> bool {
        self & MAJOR_TYPE_MASK == major && self & MINOR_TYPE_MASK == minor
    }

    fn decode_log_length(self) -> usize {
        1 << (self & LOG_LENGTH_MASK)
    }
}