tantivy-common 0.4.0

common traits and utility functions used by multiple tantivy subcrates
Documentation
use std::io::{Read, Write};
use std::{fmt, io};

use byteorder::{ReadBytesExt, WriteBytesExt};

use crate::{Endianness, VInt};

/// Trait for a simple binary serialization.
pub trait BinarySerializable: fmt::Debug + Sized {
    /// Serialize
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>;
    /// Deserialize
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
}

pub trait DeserializeFrom<T: BinarySerializable> {
    fn deserialize(&mut self) -> io::Result<T>;
}

/// Implement deserialize from &[u8] for all types which implement BinarySerializable.
///
/// TryFrom would actually be preferable, but not possible because of the orphan
/// rules (not completely sure if this could be resolved)
impl<T: BinarySerializable> DeserializeFrom<T> for &[u8] {
    fn deserialize(&mut self) -> io::Result<T> {
        T::deserialize(self)
    }
}

/// `FixedSize` marks a `BinarySerializable` as
/// always serializing to the same size.
pub trait FixedSize: BinarySerializable {
    const SIZE_IN_BYTES: usize;
}

impl BinarySerializable for () {
    fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> {
        Ok(())
    }
    fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
        Ok(())
    }
}

impl FixedSize for () {
    const SIZE_IN_BYTES: usize = 0;
}

impl<T: BinarySerializable> BinarySerializable for Vec<T> {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.len() as u64).serialize(writer)?;
        for it in self {
            it.serialize(writer)?;
        }
        Ok(())
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
        let num_items = VInt::deserialize(reader)?.val();
        let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
        for _ in 0..num_items {
            let item = T::deserialize(reader)?;
            items.push(item);
        }
        Ok(items)
    }
}

impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) {
    fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
        self.0.serialize(write)?;
        self.1.serialize(write)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        Ok((Left::deserialize(reader)?, Right::deserialize(reader)?))
    }
}
impl<Left: BinarySerializable + FixedSize, Right: BinarySerializable + FixedSize> FixedSize
    for (Left, Right)
{
    const SIZE_IN_BYTES: usize = Left::SIZE_IN_BYTES + Right::SIZE_IN_BYTES;
}

impl BinarySerializable for u32 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u32::<Endianness>(*self)
    }

    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> {
        reader.read_u32::<Endianness>()
    }
}

impl FixedSize for u32 {
    const SIZE_IN_BYTES: usize = 4;
}

impl BinarySerializable for u16 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u16::<Endianness>(*self)
    }

    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u16> {
        reader.read_u16::<Endianness>()
    }
}

impl FixedSize for u16 {
    const SIZE_IN_BYTES: usize = 2;
}

impl BinarySerializable for u64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u64::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        reader.read_u64::<Endianness>()
    }
}

impl FixedSize for u64 {
    const SIZE_IN_BYTES: usize = 8;
}

impl BinarySerializable for u128 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u128::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        reader.read_u128::<Endianness>()
    }
}

impl FixedSize for u128 {
    const SIZE_IN_BYTES: usize = 16;
}

impl BinarySerializable for f32 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_f32::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        reader.read_f32::<Endianness>()
    }
}

impl FixedSize for f32 {
    const SIZE_IN_BYTES: usize = 4;
}

impl BinarySerializable for i64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_i64::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        reader.read_i64::<Endianness>()
    }
}

impl FixedSize for i64 {
    const SIZE_IN_BYTES: usize = 8;
}

impl BinarySerializable for f64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_f64::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        reader.read_f64::<Endianness>()
    }
}

impl FixedSize for f64 {
    const SIZE_IN_BYTES: usize = 8;
}

impl BinarySerializable for u8 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
        reader.read_u8()
    }
}

impl FixedSize for u8 {
    const SIZE_IN_BYTES: usize = 1;
}

impl BinarySerializable for bool {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(u8::from(*self))
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<bool> {
        let val = reader.read_u8()?;
        match val {
            0 => Ok(false),
            1 => Ok(true),
            _ => Err(io::Error::new(
                io::ErrorKind::InvalidData,
                "invalid bool value on deserialization, data corrupted",
            )),
        }
    }
}

impl FixedSize for bool {
    const SIZE_IN_BYTES: usize = 1;
}

impl BinarySerializable for String {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
        VInt(data.len() as u64).serialize(writer)?;
        writer.write_all(data)
    }

    fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
        let string_length = VInt::deserialize(reader)?.val() as usize;
        let mut result = String::with_capacity(string_length);
        reader
            .take(string_length as u64)
            .read_to_string(&mut result)?;
        Ok(result)
    }
}

#[cfg(test)]
pub mod test {

    use super::{VInt, *};
    use crate::serialize::BinarySerializable;
    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
        O::default().serialize(&mut buffer).unwrap();
        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
    }

    fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
        let mut buffer: Vec<u8> = Vec::new();
        v.serialize(&mut buffer).unwrap();
        let num_bytes = buffer.len();
        let mut cursor = &buffer[..];
        let deser = T::deserialize(&mut cursor).unwrap();
        assert_eq!(deser, v);
        num_bytes
    }

    #[test]
    fn test_serialize_u8() {
        fixed_size_test::<u8>();
    }

    #[test]
    fn test_serialize_u32() {
        fixed_size_test::<u32>();
        assert_eq!(4, serialize_test(3u32));
        assert_eq!(4, serialize_test(5u32));
        assert_eq!(4, serialize_test(u32::MAX));
    }

    #[test]
    fn test_serialize_i64() {
        fixed_size_test::<i64>();
    }

    #[test]
    fn test_serialize_f64() {
        fixed_size_test::<f64>();
    }

    #[test]
    fn test_serialize_u64() {
        fixed_size_test::<u64>();
    }

    #[test]
    fn test_serialize_bool() {
        fixed_size_test::<bool>();
    }

    #[test]
    fn test_serialize_string() {
        assert_eq!(serialize_test(String::from("")), 1);
        assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
        assert_eq!(serialize_test(String::from("富士さん見える。")), 1 + 3 * 8);
    }

    #[test]
    fn test_serialize_vec() {
        assert_eq!(serialize_test(Vec::<u8>::new()), 1);
        assert_eq!(serialize_test(vec![1u32, 3u32]), 1 + 4 * 2);
    }

    #[test]
    fn test_serialize_vint() {
        for i in 0..10_000 {
            serialize_test(VInt(i as u64));
        }
        assert_eq!(serialize_test(VInt(7u64)), 1);
        assert_eq!(serialize_test(VInt(127u64)), 1);
        assert_eq!(serialize_test(VInt(128u64)), 2);
        assert_eq!(serialize_test(VInt(129u64)), 2);
        assert_eq!(serialize_test(VInt(1234u64)), 2);
        assert_eq!(serialize_test(VInt(16_383u64)), 2);
        assert_eq!(serialize_test(VInt(16_384u64)), 3);
        assert_eq!(serialize_test(VInt(u64::MAX)), 10);
    }
}