bitcode 0.2.0

bitcode is a bitwise binary serializer
Documentation
use crate::Buffer;
use bincode::Options;
use flate2::read::DeflateDecoder;
use flate2::write::DeflateEncoder;
use flate2::Compression;
use lz4_flex::{compress_prepend_size, decompress_size_prepended};
use paste::paste;
use rand::distributions::Alphanumeric;
use rand::prelude::*;
use rand_chacha::ChaCha20Rng;
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use std::ops::RangeInclusive;
use test::{black_box, Bencher};

// type StringImpl = arrayvec::ArrayString<16>;
type StringImpl = String;

#[derive(Debug, Default, PartialEq, Serialize, Deserialize)]
struct Data {
    x: Option<f32>,
    y: Option<i8>,
    z: u16,
    s: StringImpl,
    e: DataEnum,
}

fn gen_len(r: &mut (impl Rng + ?Sized)) -> usize {
    (r.gen::<f32>().powi(4) * 16.0) as usize
}

impl Distribution<Data> for rand::distributions::Standard {
    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Data {
        let n = gen_len(rng);
        Data {
            x: rng.gen_bool(0.15).then(|| rng.gen()),
            y: rng.gen_bool(0.3).then(|| rng.gen()),
            z: rng.gen(),
            s: StringImpl::try_from(
                rng.sample_iter(Alphanumeric)
                    .take(n)
                    .map(char::from)
                    .collect::<String>()
                    .as_str(),
            )
            .unwrap(),
            e: rng.gen(),
        }
    }
}

#[derive(Debug, Default, PartialEq, Serialize, Deserialize)]
enum DataEnum {
    #[default]
    Bar,
    Baz(StringImpl),
    Foo(Option<u8>),
}

impl Distribution<DataEnum> for rand::distributions::Standard {
    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> DataEnum {
        if rng.gen_bool(0.9) {
            DataEnum::Bar
        } else if rng.gen_bool(0.5) {
            let n = gen_len(rng);
            DataEnum::Baz(
                StringImpl::try_from(
                    rng.sample_iter(Alphanumeric)
                        .take(n)
                        .map(char::from)
                        .collect::<String>()
                        .as_str(),
                )
                .unwrap(),
            )
        } else {
            DataEnum::Foo(rng.gen_bool(0.5).then(|| rng.gen()))
        }
    }
}

fn random_data(n: usize) -> Vec<Data> {
    let mut rng = ChaCha20Rng::from_seed(Default::default());
    (0..n).map(|_| rng.gen()).collect()
}

fn bitcode_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    crate::serialize(v).unwrap()
}

fn bitcode_deserialize<T: DeserializeOwned>(v: &[u8]) -> T {
    crate::deserialize(v).unwrap()
}

fn bincode_fixint_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    bincode::serialize(v).unwrap()
}

fn bincode_fixint_deserialize<T: DeserializeOwned>(v: &[u8]) -> T {
    bincode::deserialize(v).unwrap()
}

fn bincode_varint_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    bincode::DefaultOptions::new().serialize(v).unwrap()
}

fn bincode_varint_deserialize<T: DeserializeOwned>(v: &[u8]) -> T {
    bincode::DefaultOptions::new().deserialize(v).unwrap()
}

fn bincode_lz4_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    compress_prepend_size(&bincode::DefaultOptions::new().serialize(v).unwrap())
}

fn bincode_lz4_deserialize<T: DeserializeOwned>(v: &[u8]) -> T {
    bincode::DefaultOptions::new()
        .deserialize(&decompress_size_prepended(v).unwrap())
        .unwrap()
}

fn bincode_flate2_fast_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    let mut e = DeflateEncoder::new(Vec::new(), Compression::fast());
    bincode::DefaultOptions::new()
        .serialize_into(&mut e, v)
        .unwrap();
    e.finish().unwrap()
}

fn bincode_flate2_fast_deserialize<T: DeserializeOwned>(v: &[u8]) -> T {
    bincode::DefaultOptions::new()
        .deserialize_from(DeflateDecoder::new(v))
        .unwrap()
}

fn bincode_flate2_best_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    let mut e = DeflateEncoder::new(Vec::new(), Compression::best());
    bincode::DefaultOptions::new()
        .serialize_into(&mut e, v)
        .unwrap();
    e.finish().unwrap()
}

fn bincode_flate2_best_deserialize<T: DeserializeOwned>(v: &[u8]) -> T {
    bincode_flate2_fast_deserialize(v)
}

fn postcard_serialize(v: &(impl Serialize + ?Sized)) -> Vec<u8> {
    postcard::to_allocvec(v).unwrap()
}

fn postcard_deserialize<T: DeserializeOwned>(buf: &[u8]) -> T {
    postcard::from_bytes(buf).unwrap()
}

fn bench_data() -> Vec<Data> {
    random_data(1000)
}

fn bench_serialize(b: &mut Bencher, ser: fn(&[Data]) -> Vec<u8>) {
    let data = bench_data();
    b.iter(|| {
        black_box(ser(black_box(&data)));
    })
}

fn bench_deserialize(b: &mut Bencher, ser: fn(&[Data]) -> Vec<u8>, de: fn(&[u8]) -> Vec<Data>) {
    let data = bench_data();
    let ref serialized_data = ser(&data);
    assert_eq!(de(serialized_data), data);
    b.iter(|| {
        black_box(de(black_box(serialized_data)));
    })
}

#[bench]
fn bench_bitcode_buffer_serialize(b: &mut Bencher) {
    let data = bench_data();
    let mut buf = Buffer::new();
    buf.serialize(&data).unwrap();
    let initial_cap = buf.capacity();
    b.iter(|| {
        black_box(buf.serialize(black_box(&data)).unwrap());
    });
    assert_eq!(buf.capacity(), initial_cap);
}

#[bench]
fn bench_bitcode_buffer_deserialize(b: &mut Bencher) {
    let data = bench_data();
    let ref bytes = crate::serialize(&data).unwrap();
    let mut buf = Buffer::new();
    assert_eq!(buf.deserialize::<Vec<Data>>(bytes).unwrap(), data);
    let initial_cap = buf.capacity();
    b.iter(|| {
        black_box(buf.deserialize::<Vec<Data>>(black_box(bytes)).unwrap());
    });
    assert_eq!(buf.capacity(), initial_cap);
}

#[bench]
fn bench_bitcode_long_string_serialize(b: &mut Bencher) {
    let data = "abcde12345".repeat(1000);
    let mut buf = Buffer::new();
    buf.serialize(&data).unwrap();
    b.iter(|| {
        black_box(buf.serialize(black_box(&data)).unwrap());
    });
}

#[bench]
fn bench_bitcode_long_string_deserialize(b: &mut Bencher) {
    let data = "abcde12345".repeat(1000);
    let mut buf = Buffer::new();
    let bytes = buf.serialize(&data).unwrap().to_vec();
    assert_eq!(buf.deserialize::<String>(&bytes).unwrap(), data);
    b.iter(|| {
        black_box(buf.deserialize::<String>(black_box(&bytes)).unwrap());
    });
}

macro_rules! bench {
    ($($name:ident),*) => {
        paste! {
            $(
                #[bench]
                fn [<bench_ $name _serialize>] (b: &mut Bencher) {
                    bench_serialize(b, [<$name _serialize>])
                }

                #[bench]
                fn [<bench_ $name _deserialize>] (b: &mut Bencher) {
                    bench_deserialize(b, [<$name _serialize>], [<$name _deserialize>])
                }
            )*
        }
    }
}

bench!(
    bitcode,
    bincode_fixint,
    bincode_varint,
    bincode_lz4,
    bincode_flate2_fast,
    bincode_flate2_best,
    postcard
);

#[test]
fn comparison1() {
    let ref data = random_data(10000);

    let print_results = |name: &'static str, b: Vec<u8>| {
        let zeros = 100.0 * b.iter().filter(|&&b| b == 0).count() as f32 / b.len() as f32;
        let precision = 2 - (zeros.log10().ceil() as usize).min(1);

        println!(
            "| {name:<22} | {:<12.1} | {zeros:>4.1$}%      |",
            b.len() as f32 / data.len() as f32,
            precision,
        );
    };

    println!("| Format                 | Size (bytes) | Zero Bytes |");
    println!("|------------------------|--------------|------------|");
    print_results("Bitcode", bitcode_serialize(data));
    print_results("Bincode", bincode_fixint_serialize(data));
    print_results("Bincode (Varint)", bincode_varint_serialize(data));

    // These use varint since it makes the result smaller and actually speeds up compression.
    print_results("Bincode (LZ4)", bincode_lz4_serialize(data));
    print_results(
        "Bincode (Deflate Fast)",
        bincode_flate2_fast_serialize(data),
    );
    print_results(
        "Bincode (Deflate Best)",
        bincode_flate2_best_serialize(data),
    );

    // TODO compressed postcard.
    print_results("Postcard", postcard_serialize(data));

    println!(
        "| ideal (max entropy)    |              | {:.2}%      |",
        100.0 / 255.0
    );
}

#[test]
fn comparison2() {
    fn compare<T: Serialize + Clone>(name: &str, r: RangeInclusive<T>) {
        fn measure<T: Serialize + Clone>(t: T) -> [usize; 4] {
            const COUNT: usize = 8;
            let many: [T; COUNT] = std::array::from_fn(|_| t.clone());
            let bitcode = bitcode_serialize(&many).len();
            let bincode = bincode_fixint_serialize(&many).len();
            let bincode_varint = bincode_varint_serialize(&many).len();
            let postcard = postcard_serialize(&many).len();
            [bitcode, bincode, bincode_varint, postcard].map(|b| 8 * b / COUNT)
        }

        let lo = measure(r.start().clone());
        let hi = measure(r.end().clone());

        let v: Vec<_> = lo
            .into_iter()
            .zip(hi)
            .map(|(lo, hi)| {
                if lo == hi {
                    format!("{lo}")
                } else {
                    format!("{lo}-{hi}")
                }
            })
            .collect();
        println!(
            "| {name:<15} | {:<7} | {:<7} | {:<16} | {:<8} |",
            v[0], v[1], v[2], v[3]
        );
    }

    fn compare_one<T: Serialize + Clone>(name: &str, t: T) {
        compare(name, t.clone()..=t);
    }

    println!("| Type            | Bitcode | Bincode | Bincode (Varint) | Postcard |");
    println!("|-----------------|---------|---------|------------------|----------|");
    compare("bool", false..=true);
    compare("u8", 0u8..=u8::MAX);
    compare("i8", 0i8..=i8::MAX);
    compare("u16", 0u16..=u16::MAX);
    compare("i16", 0i16..=i16::MAX);
    compare("u32", 0u32..=u32::MAX);
    compare("i32", 0i32..=i32::MAX);
    compare("u64", 0u64..=u64::MAX);
    compare("i64", 0i64..=i64::MAX);
    compare_one("f32", 0f32);
    compare_one("f64", 0f64);
    compare("char", (0 as char)..=char::MAX);
    compare("Option<()>", None..=Some(()));
    compare("Result<(), ()>", Ok(())..=Err(()));

    println!();
    println!("| Value           | Bitcode | Bincode | Bincode (Varint) | Postcard |");
    println!("|-----------------|---------|---------|------------------|----------|");
    compare_one("[true; 4]", [true; 4]);
    compare_one("vec![(); 0]", vec![(); 0]);
    compare_one("vec![(); 1]", vec![(); 1]);
    compare_one("vec![(); 256]", vec![(); 256]);
    compare_one("vec![(); 65536]", vec![(); 65536]);
    compare_one(r#""""#, "");
    compare_one(r#""abcd""#, "abcd");
    compare_one(r#""abcd1234""#, "abcd1234");
}