#![feature(maybe_uninit_uninit_array, maybe_uninit_slice)]
use std::mem::MaybeUninit;
use bitvec::{
mem::{
elts,
BitMemory,
},
prelude::*,
};
use criterion::{
criterion_group,
criterion_main,
BenchmarkId,
Criterion,
SamplingMode,
Throughput,
};
use tap::tap::Tap;
const KIBIBYTE: usize = 1024;
const FACTOR: usize = 1 * KIBIBYTE;
const SCALARS: &[usize] = &[1, 2, 4, 8, 16, 24, 32, 40, 52, 64];
const MAX_BITS: usize = 64 * FACTOR * 8;
fn make_slots<T, const LEN: usize>()
-> ([MaybeUninit<T>; LEN], [MaybeUninit<T>; LEN])
where T: BitStore {
(
MaybeUninit::<T>::uninit_array::<LEN>(),
MaybeUninit::<T>::uninit_array::<LEN>(),
)
}
fn view_slots<'a, 'b, T>(
src: &'a [MaybeUninit<T>],
dst: &'b mut [MaybeUninit<T>],
) -> (&'a [T], &'b mut [T])
where
T: BitStore,
{
unsafe {
(
MaybeUninit::slice_assume_init_ref(src),
MaybeUninit::slice_assume_init_mut(dst),
)
}
}
pub fn benchmarks(crit: &mut Criterion) {
fn steps()
-> impl Iterator<Item = (impl Fn(&'static str) -> BenchmarkId, usize, Throughput)>
{
SCALARS.iter().map(|&n| {
(
move |name| BenchmarkId::new(name, n),
n * FACTOR * <u8 as BitMemory>::BITS as usize,
Throughput::Bytes((n * FACTOR) as u64),
)
})
}
let (src_words, mut dst_words) =
make_slots::<usize, { elts::<usize>(MAX_BITS) }>();
let (src_bytes, mut dst_bytes) =
make_slots::<u8, { elts::<u8>(MAX_BITS) }>();
let (src_words, dst_words) = view_slots(&src_words, &mut dst_words);
let (src_bytes, dst_bytes) = view_slots(&src_bytes, &mut dst_bytes);
macro_rules! mkgrp {
($crit:ident, $name:literal) => {
(&mut *$crit).benchmark_group($name).tap_mut(|grp| {
grp.sampling_mode(SamplingMode::Flat).sample_size(2000);
})
};
}
let mut group = mkgrp!(crit, "Element-wise");
for (id, bits, thrpt) in steps() {
group.throughput(thrpt);
let words = bits / <usize as BitMemory>::BITS as usize;
let bytes = bits / <u8 as BitMemory>::BITS as usize;
let (src_words, dst_words) =
(&src_words[.. words], &mut dst_words[.. words]);
let (src_bytes, dst_bytes) =
(&src_bytes[.. bytes], &mut dst_bytes[.. bytes]);
group.bench_function(id("words_plain"), |b| {
let (src, dst) = (src_words, &mut *dst_words);
b.iter(|| dst.copy_from_slice(src))
});
group.bench_function(id("bytes_plain"), |b| {
let (src, dst) = (src_bytes, &mut *dst_bytes);
b.iter(|| dst.copy_from_slice(src))
});
group.bench_function(id("words_manual"), |b| {
let (src, dst) = (src_words, &mut *dst_words);
b.iter(|| {
for (from, to) in src.iter().zip(dst.iter_mut()) {
*to = *from;
}
})
});
group.bench_function(id("bytes_manual"), |b| {
let (src, dst) = (src_bytes, &mut *dst_bytes);
b.iter(|| {
for (from, to) in src.iter().zip(dst.iter_mut()) {
*to = *from;
}
})
});
}
group.finish();
let mut group = mkgrp!(crit, "Bit-wise accelerated");
for (id, bits, thrpt) in steps() {
group.throughput(thrpt);
let words = bits / <usize as BitMemory>::BITS as usize;
let bytes = bits / <u8 as BitMemory>::BITS as usize;
let (src_words, dst_words) =
(&src_words[.. words], &mut dst_words[.. words]);
let (src_bytes, dst_bytes) =
(&src_bytes[.. bytes], &mut dst_bytes[.. bytes]);
group.bench_function(id("bits_words_plain"), |b| {
let (src, dst) = (
src_words.view_bits::<Lsb0>(),
dst_words.view_bits_mut::<Lsb0>(),
);
b.iter(|| dst.copy_from_bitslice(src));
});
group.bench_function(id("bits_bytes_plain"), |b| {
let (src, dst) = (
src_bytes.view_bits::<Lsb0>(),
dst_bytes.view_bits_mut::<Lsb0>(),
);
b.iter(|| dst.copy_from_bitslice(src));
});
group.bench_function(id("bits_words_edges"), |b| {
let src = src_words.view_bits::<Lsb0>();
let len = src.len();
let (src, dst) = (
&src[10 .. len - 10],
&mut dst_words.view_bits_mut::<Lsb0>()[10 .. len - 10],
);
b.iter(|| dst.copy_from_bitslice(src));
});
group.bench_function(id("bits_bytes_edges"), |b| {
let src = src_bytes.view_bits::<Lsb0>();
let len = src.len();
let (src, dst) = (
&src[10 .. len - 10],
&mut dst_bytes.view_bits_mut::<Lsb0>()[10 .. len - 10],
);
b.iter(|| dst.copy_from_bitslice(src));
});
group.bench_function(id("bits_words_misalign"), |b| {
let src = &src_words.view_bits::<Lsb0>()[10 ..];
let dst = &mut dst_words.view_bits_mut::<Lsb0>()[.. src.len()];
b.iter(|| dst.copy_from_bitslice(src));
});
group.bench_function(id("bits_bytes_misalign"), |b| {
let src = &src_bytes.view_bits::<Lsb0>()[10 ..];
let dst = &mut dst_bytes.view_bits_mut::<Lsb0>()[.. src.len()];
b.iter(|| dst.copy_from_bitslice(src));
});
}
group.finish();
let mut group = mkgrp!(crit, "Bit-wise crawl");
for (id, bits, thrpt) in steps() {
group.throughput(thrpt);
let words = bits / <usize as BitMemory>::BITS as usize;
let bytes = bits / <u8 as BitMemory>::BITS as usize;
let (src_words, dst_words) =
(&src_words[.. words], &mut dst_words[.. words]);
let (src_bytes, dst_bytes) =
(&src_bytes[.. bytes], &mut dst_bytes[.. bytes]);
group.bench_function(id("bits_words_mismatched"), |b| {
let (src, dst) = (
src_words.view_bits::<Msb0>(),
dst_words.view_bits_mut::<Lsb0>(),
);
b.iter(|| dst.clone_from_bitslice(src));
});
group.bench_function(id("bits_bytes_mismatched"), |b| {
let (src, dst) = (
src_bytes.view_bits::<Msb0>(),
dst_bytes.view_bits_mut::<Lsb0>(),
);
b.iter(|| dst.clone_from_bitslice(src));
});
group.bench_function(id("bitwise_words"), |b| {
let (src, dst) = (
src_words.view_bits::<Lsb0>(),
dst_words.view_bits_mut::<Lsb0>(),
);
b.iter(|| unsafe {
for (from, to) in
src.as_bitptr_range().zip(dst.as_mut_bitptr_range())
{
to.write(from.read());
}
})
});
group.bench_function(id("bitwise_bytes"), |b| {
let (src, dst) = (
src_bytes.view_bits::<Lsb0>(),
dst_bytes.view_bits_mut::<Lsb0>(),
);
b.iter(|| unsafe {
for (from, to) in
src.as_bitptr_range().zip(dst.as_mut_bitptr_range())
{
to.write(from.read());
}
})
});
group.bench_function(id("bitwise_words_mismatch"), |b| {
let (src, dst) = (
src_words.view_bits::<Lsb0>(),
dst_words.view_bits_mut::<Msb0>(),
);
b.iter(|| unsafe {
for (from, to) in
src.as_bitptr_range().zip(dst.as_mut_bitptr_range())
{
to.write(from.read());
}
})
});
group.bench_function(id("bitwise_bytes_mismatch"), |b| {
let (src, dst) = (
src_bytes.view_bits::<Msb0>(),
dst_bytes.view_bits_mut::<Lsb0>(),
);
b.iter(|| unsafe {
for (from, to) in
src.as_bitptr_range().zip(dst.as_mut_bitptr_range())
{
to.write(from.read());
}
})
});
}
}
criterion_group!(benches, benchmarks);
criterion_main!(benches);