neodyn_xc 0.4.0

Neodyn Exchange is the serialization format for the Neodyn database engine
Documentation
//! Human-readable textual representation of the Neodyn Exchange format.

use std::io::{ Write, BufWriter };
use std::convert::TryInto;
use std::fmt::Display;
use serde::ser::{
    Serialize, Serializer, Error as SerError,
    SerializeSeq, SerializeTuple, SerializeTupleStruct,
    SerializeMap, SerializeStruct,
    SerializeTupleVariant, SerializeStructVariant,
};
use crate::error::{ Error, ResultExt };

/// Serializes a value to a writer as the Neodyn Exchange text representation.
/// `indent` will be used to pretty-print it if it is `Some`. Otherwise, if
/// it is `None`, a compact representation will be emitted.
///
/// Does not perform buffering.
pub fn to_writer<'a, W, T, I>(
    writer: W,
    value: &T,
    indent: I,
) -> Result<(), Error>
    where
        W: Write,
        T: ?Sized + Serialize,
        I: Into<Option<&'a str>>,
{
    let mut serializer = TextSerializer::new(writer, indent);
    value.serialize(&mut serializer)?;
    serializer.finalize()
}

/// The same as `to_writer()` except that it automatically performs buffering.
pub fn to_writer_buffered<'a, W, T, I>(
    writer: W,
    value: &T,
    indent: I,
) -> Result<(), Error>
    where
        W: Write,
        T: ?Sized + Serialize,
        I: Into<Option<&'a str>>,
{
    to_writer(BufWriter::new(writer), value, indent)
}

/// Serializes a value to a byte buffer as the Neodyn Exchange text representation.
/// `indent` will be used to pretty-print it if it is `Some`. Otherwise, if
/// it is `None`, a compact representation will be emitted.
pub fn to_bytes<'a, T, I>(value: &T, indent: I) -> Result<Vec<u8>, Error>
    where
        T: ?Sized + Serialize,
        I: Into<Option<&'a str>>,
{
    // Rationale for a capacity of 4 kB: it's the page size on most platforms
    // one generally cares about, so many allocations will reserve at least this
    // amount of memory by default. It's also not excessively large, and we will
    // shrink the buffer to its real length after writing to it anyway.
    // We also expect most serialized data to be smaller than 4k, so this
    // should help us avoid the majority of reallocations and `memcpy()`ing.
    let mut buf = Vec::with_capacity(4096);
    to_writer(&mut buf, value, indent)?;
    buf.shrink_to_fit();
    Ok(buf)
}

/// Serializes a value to a string as the Neodyn Exchange text representation.
/// `indent` will be used to pretty-print it if it is `Some`. Otherwise, if
/// it is `None`, a compact representation will be emitted.
pub fn to_string<'a, T, I>(value: &T, indent: I) -> Result<String, Error>
    where
        T: ?Sized + Serialize,
        I: Into<Option<&'a str>>,
{
    to_bytes(value, indent).then_conv_err(String::from_utf8)
}

/// Internal helper trait for `Serializer::write_float()`.
/// See its documentation below.
trait Float: Sized + Copy + PartialEq + Default + Display {
    /// Returns the sign bit of this floating-point number.
    /// I.e. this returns `true` even for `-0.0`, `-inf`
    /// and for `NaN`s with a negative sign.
    fn is_sign_negative(self) -> bool;

    /// Returns `true` if the floating-point value is `NaN`.
    fn is_nan(self) -> bool;

    /// Returns the fractional part `[0...1)` of the number.
    fn fract(self) -> Self;
}

/// Private macro for implementing the above private `Float` trait.
macro_rules! impl_float {
    ($ty:ty) => {
        impl Float for $ty {
            fn is_sign_negative(self) -> bool {
                <$ty>::is_sign_negative(self)
            }

            fn is_nan(self) -> bool {
                <$ty>::is_nan(self)
            }

            fn fract(self) -> Self {
                <$ty>::fract(self)
            }
        }
    }
}

impl_float!{ f32 }
impl_float!{ f64 }

/// Serde `Serializer` for the human-readable format.
///
/// **The serializer must always be `finalize()`d explicitly
/// after the value is serialized!**
#[derive(Debug)]
pub struct TextSerializer<'a, W> {
    /// The text representation will be written here.
    writer: W,
    /// Current indent level.
    indent_level: usize,
    /// String to indent with.
    indent_string: Option<&'a str>,
}

impl<'a, W: Write> TextSerializer<'a, W> {
    /// Initialize a serializer.
    ///
    /// **The serializer must always be `finalize()`d explicitly
    /// after the value is serialized!**
    #[must_use]
    pub fn new<I>(writer: W, indent: I) -> Self
        where
            I: Into<Option<&'a str>>,
    {
        TextSerializer {
            writer: writer,
            indent_level: 0,
            indent_string: indent.into(),
        }
    }

    /// Check invariants that need to hold at the end of the serialization.
    pub fn finalize(mut self) -> Result<(), Error> {
        if self.indent_level == 0 {
            self.writer.flush().conv_err()
        } else {
            Err(Error::custom(format_args!(
                "non-zero indent level `{}` after serializing value",
                self.indent_level
            )))
        }
    }

    /// Write a literal string.
    fn write_str(&mut self, s: &str) -> Result<(), Error> {
        self.writer.write_all(s.as_bytes()).conv_err()
    }

    /// Writes the appropriate amount of indentation if needed.
    fn write_indent(&mut self) -> Result<(), Error> {
        if let Some(indent_string) = self.indent_string {
            for _ in 0..self.indent_level {
                self.write_str(indent_string)?;
            }
        }

        Ok(())
    }

    /// Writes a newline if necessary (i.e. when we are pretty-printing).
    fn write_newline(&mut self) -> Result<(), Error> {
        if self.indent_string.is_some() {
            self.write_str("\n")?;
        }

        Ok(())
    }

    /// Starts serializing a sequence.
    fn start_seq(&mut self) -> Result<&mut Self, Error> {
        self.write_str("[")?;
        self.write_newline()?;
        self.incr_indent()?;
        Ok(self)
    }

    /// Serializes a single element of a sequence.
    fn write_seq_element<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Error> {
        self.write_indent()?;
        value.serialize(&mut *self)?;
        self.write_str(",")?;
        self.write_newline()
    }

    /// Finishes serialization of the sequence.
    fn end_seq(&mut self) -> Result<(), Error> {
        self.decr_indent()?;
        self.write_indent()?;
        self.write_str("]")
    }

    /// Starts serializing a map.
    fn start_map(&mut self) -> Result<&mut Self, Error> {
        self.write_str("{")?;
        self.write_newline()?;
        self.incr_indent()?;
        Ok(self)
    }

    /// Serializes a single key of the map.
    /// It does **not** check that keys and values come in the right order,
    /// nor that there are no keys without values or values without keys.
    fn write_map_key<T: ?Sized + Serialize>(&mut self, key: &T) -> Result<(), Error> {
        self.write_indent()?;
        key.serialize(&mut *self)?;
        self.write_str(":")?;

        if self.indent_string.is_some() {
            self.write_str(" ")?;
        }

        Ok(())
    }

    /// Serializes a single value of the map.
    /// It does **not** check that keys and values come in the right order,
    /// nor that there are no keys without values or values without keys.
    fn write_map_value<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Error> {
        value.serialize(&mut *self)?;
        self.write_str(",")?;
        self.write_newline()
    }

    /// Finishes serialization of the map.
    fn end_map(&mut self) -> Result<(), Error> {
        self.decr_indent()?;
        self.write_indent()?;
        self.write_str("}")
    }

    /// Formatting floating-point numbers slightly better than `std` does.
    /// `{:+}` does not print a fractional part if the number represents
    /// an integer. Therefore, we check for a zero fractional part, and
    /// append the string `".0"` if it is indeed zero.
    ///
    /// NB: this correctly handles `±inf`, because `inf.fract()` is `NaN`,
    /// so it is not equal to zero.
    ///
    /// For `f32`, I have verified exhaustively that `format!("{:+}")`
    /// prints a decimal point _if and only if_ `x.fract() > 0.0`, using
    /// the following piece of code:
    ///
    /// ```
    /// # use std::fmt::Write;
    /// #
    /// let mut buf = String::new();
    ///
    /// for i in 0..=u32::MAX {
    ///     let x = f32::from_bits(i);
    ///
    ///     if !x.is_finite() {
    ///         continue;
    ///     }
    ///
    ///     buf.clear();
    ///     write!(buf, "{:+}", x).unwrap();
    ///
    ///     let is_frac_num = x.fract() != 0.0;
    ///     let is_frac_str = buf.contains('.');
    ///
    ///     if is_frac_num != is_frac_str {
    ///         panic!("Exception: {} ({})", x, i);
    ///     }
    /// #
    /// #   if i >= 10_000_000 {
    /// #      break; // so that doc-test completes in reasonable time
    /// #   }
    /// }
    /// ```
    ///
    /// For `f64`, such verification is not feasible, but I still checked
    /// this property for 2^32 random `f64`s, both integral and fractional.
    ///
    /// The `std::fmt` routine also discards the sign of zero, so `-0.0`
    /// would be formatted as `+0.0`. Therefore, we need to special-case
    /// serializing zero as well.
    fn write_float<T: Float>(&mut self, value: T) -> Result<(), Error> {
        if value.is_nan() {
            return self.serialize_none();
        }

        let zero = T::default();

        if value == zero {
            return self.write_str(if value.is_sign_negative() {
                "-0.0"
            } else {
                "+0.0"
            });
        }

        write!(self.writer, "{:+}", value)?;

        // Integer and finite?
        if value.fract() == zero {
            self.write_str(".0")?;
        }

        Ok(())
    }

    /// Increases the indentation by one level.
    fn incr_indent(&mut self) -> Result<(), Error> {
        self.indent_level = self.indent_level.checked_add(1)
            .ok_or_else(|| Error::custom("indent level overflow"))?;

        Ok(())
    }

    /// Decreases the indentation by one level.
    fn decr_indent(&mut self) -> Result<(), Error> {
        self.indent_level = self.indent_level.checked_sub(1)
            .ok_or_else(|| Error::custom("indent level underflow"))?;

        Ok(())
    }
}

impl<'a, W: Write> Serializer for &mut TextSerializer<'a, W> {
    type Ok = ();
    type Error = Error;

    type SerializeSeq = Self;
    type SerializeTuple = Self;
    type SerializeTupleStruct = Self;
    type SerializeTupleVariant = Self;
    type SerializeMap = Self;
    type SerializeStruct = Self;
    type SerializeStructVariant = Self;

    fn is_human_readable(&self) -> bool {
        true
    }

    fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
        write!(self.writer, "{}", v).conv_err()
    }

    fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
        self.serialize_i64(v.into())
    }

    fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
        self.serialize_i64(v.into())
    }

    fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
        self.serialize_i64(v.into())
    }

    fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
        write!(self.writer, "{:+}", v).conv_err()
    }

    fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> {
        self.serialize_i64(v.try_into()?)
    }

    fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> {
        self.serialize_u64(v.into())
    }

    fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> {
        self.serialize_u64(v.into())
    }

    fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> {
        self.serialize_u64(v.into())
    }

    fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> {
        write!(self.writer, "{}", v).conv_err()
    }

    fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> {
        self.serialize_u64(v.try_into()?)
    }

    fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
        self.write_float(v)
    }

    fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
        self.write_float(v)
    }

    fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> {
        self.serialize_str(v.encode_utf8(&mut [0; 4]))
    }

    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
        write!(self.writer, "\"{}\"", v.escape_default()).conv_err()
    }

    fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
        self.write_str("#")?;

        // When pretty printing, separate bytes by spaces,
        // and separate groups of 4 bytes by more spaces.
        // Explicitly splitting the iteration to "head"
        // (first group of 4) and "tail" parts avoids the
        // need to recompute the padding on each iteration,
        // probably sparing a couple of branches compared
        // to `.chain()` and checking the index and its
        // remainder modulo 4 every time.
        if self.indent_string.is_some() {
            let head = ["", " ", " ", " "].iter();
            let tail = ["   ", " ", " ", " "].iter();
            let mut iter = v.iter();

            // We need to make the padding the first iterator
            // and the bytes the second, because otherwise,
            // `zip` would try to step the iterator of `v`
            // before noticing that `head` is exhausted. This
            // would cause the 4th byte of the blob to be lost.
            for (padding, byte) in head.zip(iter.by_ref()) {
                write!(self.writer, "{}{:02x}", padding, byte)?;
            }

            for (padding, byte) in tail.cycle().zip(iter) {
                write!(self.writer, "{}{:02x}", padding, byte)?;
            }
        } else {
            for byte in v {
                write!(self.writer, "{:02x}", byte)?;
            }
        }

        self.write_str("#")
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
        self.write_str("null")
    }

    fn serialize_some<T: ?Sized + Serialize>(self, value: &T) -> Result<Self::Ok, Self::Error> {
        self.write_str("?")?;
        value.serialize(self)
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
        self.serialize_none()
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
        self.serialize_unit()
    }

    fn serialize_unit_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        variant: &'static str,
    ) -> Result<Self::Ok, Self::Error> {
        self.serialize_str(variant)
    }

    fn serialize_newtype_struct<T: ?Sized + Serialize>(
        self,
        _name: &'static str,
        value: &T,
    ) -> Result<Self::Ok, Self::Error> {
        value.serialize(self)
    }

    fn serialize_newtype_variant<T: ?Sized + Serialize>(
        self,
        _name: &'static str,
        _variant_index: u32,
        variant: &'static str,
        value: &T,
    ) -> Result<Self::Ok, Self::Error> {
        let mut map = self.serialize_map(Some(1))?;
        map.serialize_entry(variant, value)?;
        SerializeMap::end(map)
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
        self.start_seq()
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
        self.start_seq()
    }

    fn serialize_tuple_struct(
        self,
        _name: &'static str,
        len: usize,
    ) -> Result<Self::SerializeTupleStruct, Self::Error> {
        self.serialize_tuple(len)
    }

    fn serialize_tuple_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        variant: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeTupleVariant, Self::Error> {
        self.start_map()?;
        self.write_map_key(variant)?;
        self.start_seq()
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
        self.start_map()
    }

    fn serialize_struct(
        self,
        _name: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeStruct, Self::Error> {
        self.start_map()
    }

    fn serialize_struct_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        variant: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeStructVariant, Self::Error> {
        self.start_map()?;
        self.write_map_key(variant)?;
        self.start_map()
    }
}

impl<'a, W: Write> SerializeSeq for &mut TextSerializer<'a, W> {
    type Ok = <Self as Serializer>::Ok;
    type Error = <Self as Serializer>::Error;

    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Self::Error> {
        self.write_seq_element(value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        self.end_seq()
    }
}

impl<'a, W: Write> SerializeTuple for &mut TextSerializer<'a, W> {
    type Ok = <Self as SerializeSeq>::Ok;
    type Error = <Self as SerializeSeq>::Error;

    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Self::Error> {
        SerializeSeq::serialize_element(self, value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        SerializeSeq::end(self)
    }
}

impl<'a, W: Write> SerializeTupleStruct for &mut TextSerializer<'a, W> {
    type Ok = <Self as SerializeSeq>::Ok;
    type Error = <Self as SerializeSeq>::Error;

    fn serialize_field<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Self::Error> {
        SerializeSeq::serialize_element(self, value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        SerializeSeq::end(self)
    }
}

impl<'a, W: Write> SerializeMap for &mut TextSerializer<'a, W> {
    type Ok = <Self as Serializer>::Ok;
    type Error = <Self as Serializer>::Error;

    fn serialize_key<T: ?Sized + Serialize>(&mut self, key: &T) -> Result<(), Self::Error> {
        self.write_map_key(key)
    }

    fn serialize_value<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Self::Error> {
        self.write_map_value(value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        self.end_map()
    }
}

impl<'a, W: Write> SerializeStruct for &mut TextSerializer<'a, W> {
    type Ok = <Self as SerializeMap>::Ok;
    type Error = <Self as SerializeMap>::Error;

    fn serialize_field<T: ?Sized + Serialize>(
        &mut self,
        key: &'static str,
        value: &T,
    ) -> Result<(), Self::Error> {
        self.serialize_entry(key, value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        SerializeMap::end(self)
    }
}

impl<'a, W: Write> SerializeTupleVariant for &mut TextSerializer<'a, W> {
    type Ok = <Self as Serializer>::Ok;
    type Error = <Self as Serializer>::Error;

    fn serialize_field<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Self::Error> {
        self.write_seq_element(value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        self.end_seq()?;
        // no need to write comma separator as the map only contains one item
        self.write_newline()?;
        self.end_map()
    }
}

impl<'a, W: Write> SerializeStructVariant for &mut TextSerializer<'a, W> {
    type Ok = <Self as Serializer>::Ok;
    type Error = <Self as Serializer>::Error;

    fn serialize_field<T: ?Sized + Serialize>(
        &mut self,
        key: &'static str,
        value: &T,
    ) -> Result<(), Self::Error> {
        self.serialize_entry(key, value)
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
        self.end_map()?;
        // no need to write comma separator as the map only contains one item
        self.write_newline()?;
        self.end_map()
    }
}