#![doc = include_str!("../README.md")]
mod elias_fano;
mod bitm;
#[cfg(target_pointer_width = "64")] mod sucds;
#[cfg(target_pointer_width = "64")] mod succinct;
#[cfg(target_pointer_width = "64")] mod sux;
#[cfg(feature = "vers-vecs")] mod vers;
use std::{fs::{File, OpenOptions}, hint::black_box, num::{NonZeroU32, NonZeroU64}, ops::Range, time::Instant};
use std::io::Write;
use butils::{UnitPrefix, XorShift64};
use clap::{Parser, Subcommand, ValueEnum};
#[derive(Subcommand)]
pub enum Structure {
EliasFano,
BitmBV,
#[cfg(target_pointer_width = "64")] SucdsBV,
#[cfg(target_pointer_width = "64")]
#[clap(visible_alias = "succ-jacobson")]
SuccinctJacobson,
#[cfg(target_pointer_width = "64")]
#[clap(visible_aliases = ["succ-rank9", "succ-r9"])]
SuccinctRank9,
#[cfg(target_pointer_width = "64")]
#[clap(visible_alias = "sux-adapt")]
SuxSelectAdapt,
#[cfg(target_pointer_width = "64")]
#[clap(visible_alias = "sux-adapt-const")]
SuxSelectAdaptConst,
#[cfg(feature = "vers-vecs")] Vers,
BV
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum Distribution {
#[clap(alias = "u")]
Uniform,
#[clap(alias = "a")]
Adversarial,
#[clap(aliases = ["l", "ld"])]
LinearlyDensified,
}
impl std::fmt::Display for Distribution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", match *self {
Distribution::Uniform => "uniform",
Distribution::Adversarial => "adversarial",
Distribution::LinearlyDensified => "linearly densified",
})
}
}
#[derive(Parser)]
#[command(author, version, about, long_about = None, infer_subcommands=true, infer_long_args=true)]
pub struct Conf {
#[command(subcommand)]
pub structure: Structure,
#[arg(short = 'n', long, default_value_t = 500_000_000)]
pub num: usize,
#[arg(short = 'u', long, default_value_t = 1_000_000_000)]
pub universe: usize,
#[arg(short='d', long, value_enum, default_value_t = Distribution::Uniform)]
pub distribution: Distribution,
#[arg(short='t', long, default_value_t = 5)]
pub time: u16,
#[arg(short='c', long, default_value_t = 0)]
pub cooling_time: u16,
#[arg(long, default_value_t = false)]
pub verify: bool,
#[arg(short='s', long, default_value_t = NonZeroU64::new(1234).unwrap())]
pub seed: NonZeroU64,
#[arg(short='q', long, default_value_t = NonZeroU32::new(1_000_000).unwrap())]
pub queries: NonZeroU32,
#[arg(short='f', long, default_value_t = false)]
pub save_details: bool,
}
const INPUT_HEADER: &'static str = "universe,num,distribution";
const RANK_HEADER: &'static str = "method,size,time_per_query";
const SELECT_HEADER: &'static str = "method,extra_size,time_per_query";
struct Tester<'c> {
conf: &'c Conf,
number_of_ones: usize,
rank_includes_current: bool
}
fn check<R: Into<Option<usize>>>(structure_name: &str, operation_name: &str, argument: usize, expected: usize, got: R) {
if let Some(got) = got.into() {
if got != expected {
eprintln!("{structure_name}: {operation_name}({argument}) returned {got}, but should {expected}");
}
} else {
eprintln!("{structure_name}: {operation_name}({argument}) returned None, but should {expected}")
}
}
impl<'c> Tester<'c> {
#[inline(always)] pub fn raport_rank<R: Into<Option<usize>>, F>(&self, method_name: &str, size_bytes: usize, rank: F)
where F: Fn(usize) -> R
{
print!(" rank: space overhead {:.2}%", self.conf.space_overhead(size_bytes));
let time = self.conf.queries_measure(&self.conf.rand_queries(self.conf.universe), &rank).as_nanos();
println!(" time/query {:.2}ns", time);
self.conf.save_rank(method_name, size_bytes, time);
self.verify_rank(method_name, rank);
}
fn verify_rank<R: Into<Option<usize>>, F>(&self, method_name: &str, rank: F) where F: Fn(usize) -> R {
if self.conf.verify {
self.conf.data_foreach(|index, mut expected_rank, value| {
if self.rank_includes_current && value { expected_rank += 1 }
check(method_name, "rank", index, expected_rank, rank(index))
});
}
}
#[inline(always)] pub fn raport_select1<R: Into<Option<usize>>, F>(&self, method_name: &str, extra_size_bytes: usize, select: F)
where F: Fn(usize) -> R
{
if self.number_of_ones == 0 {
return;
}
print!(" select1:");
if extra_size_bytes != 0 { print!(" space overhead {:.2}%", self.conf.extra_space_overhead(extra_size_bytes)); }
let time = self.conf.queries_measure(&self.conf.rand_queries(self.number_of_ones), &select).as_nanos();
println!(" time/query {:.2}ns", time);
self.conf.save_select1(method_name, extra_size_bytes, time);
self.verify_select1(method_name, select);
}
fn verify_select1<R: Into<Option<usize>>, F>(&self, method_name: &str, select: F) where F: Fn(usize) -> R {
if self.conf.verify {
self.conf.data_foreach(|index, rank, value| if value {
check(method_name, "select", rank, index, select(rank))
});
}
}
#[inline(always)] pub fn raport_select0<R: Into<Option<usize>>, F>(&self, method_name: &str, extra_size_bytes: usize, select0: F)
where F: Fn(usize) -> R
{
if self.conf.universe == self.number_of_ones {
return;
}
print!(" select0:");
if extra_size_bytes != 0 { print!(" space overhead {:.2}%", self.conf.extra_space_overhead(extra_size_bytes)); }
let time = self.conf.queries_measure(
&self.conf.rand_queries(self.conf.universe-self.number_of_ones),
&select0).as_nanos();
println!(" time/query {:.2}ns", time);
self.conf.save_select0(method_name, extra_size_bytes, time);
self.verify_select0(method_name, select0);
}
fn verify_select0<R: Into<Option<usize>>, F>(&self, method_name: &str, select0: F) where F: Fn(usize) -> R {
if self.conf.verify {
self.conf.data_foreach(|index, rank1, value| if !value {
let rank0 = index - rank1;
check(method_name, "select0", rank0, index, select0(rank0))
});
}
}
}
impl Conf {
#[inline(always)] fn uniform_foreach<F: FnMut(usize, usize, bool)>(&self, mut f: F, gen: &mut XorShift64, total_ones: &mut usize, mut num: usize, universe: Range<usize>) {
let mut remain_universe = universe.len();
for i in universe {
let included = gen.get() as usize % remain_universe < num;
f(i, *total_ones, included);
if included {
*total_ones += 1;
num -= 1;
}
remain_universe -= 1;
}
}
fn num(&self) -> usize {
match self.distribution {
Distribution::Uniform|Distribution::Adversarial => self.num,
Distribution::LinearlyDensified => { self.data_foreach(|_, _, _| {}) }
}
}
fn extra_space_overhead(&self, extra_size_bytes: usize) -> f64 {
let raw_space = (self.universe + 7) / 8;
(100 * extra_size_bytes) as f64 / raw_space as f64
}
fn space_overhead(&self, size_bytes: usize) -> f64 {
let raw_space = (self.universe + 7) / 8;
(100 * (size_bytes as isize - raw_space as isize)) as f64 / raw_space as f64
}
#[inline(always)] fn data_foreach<F: FnMut(usize, usize, bool)>(&self, mut f: F) -> usize {
let mut gen = self.rand_gen();
let mut number_of_ones = 0;
match self.distribution {
Distribution::Uniform => self.uniform_foreach(f, &mut gen, &mut number_of_ones, self.num, 0..self.universe),
Distribution::Adversarial => {
let sparse_threshold = self.universe - self.num;
self.uniform_foreach(&mut f, &mut gen, &mut number_of_ones, (self.num+50)/100, 0..sparse_threshold);
let num = self.num - number_of_ones;
self.uniform_foreach(f, &mut gen, &mut number_of_ones, num, sparse_threshold..self.universe);
}
Distribution::LinearlyDensified => { let (reverse, num_dbl) = if self.num * 2 > self.universe {
(true, (self.universe - self.num)*2)
} else {
(false, self.num*2)
};
for i in 0..self.universe {
let j = if reverse { self.universe - i } else { i };
let included = (gen.get() as usize % self.universe * (self.universe-1) < num_dbl * j) ^ reverse;
f(i, number_of_ones, included);
number_of_ones += included as usize;
}
}
}
number_of_ones
}
fn fill_data<F: FnMut(usize, bool)>(&self, mut add: F) -> Tester {
let number_of_ones = self.data_foreach(|index, _, v| add(index, v));
println!(" input: number of bit ones is {} / {} ({:.2}%), {} distribution",
number_of_ones, self.universe, percent_of(number_of_ones, self.universe), self.distribution);
Tester { conf: self, number_of_ones, rank_includes_current: false }
}
#[inline] fn add_data<F: FnMut(usize)>(&self, mut add: F) -> Tester {
self.fill_data(|i, v| if v { add(i) })
}
fn file(&self, file_name: &str, extra_header: &str) -> Option<File> {
if !self.save_details { return None; }
let file_name = format!("{}.csv", file_name);
let file_already_existed = std::path::Path::new(&file_name).exists();
let mut file = OpenOptions::new().append(true).create(true).open(&file_name).unwrap();
if !file_already_existed { writeln!(file, "{},{}", INPUT_HEADER, extra_header).unwrap(); }
Some(file)
}
fn save_rank_or_select(&self, header: &str, file_name: &str, method_name: &str, space: usize, time: f64) {
if let Some(mut file) = self.file(file_name, header) {
writeln!(file, "{},{},{},{},{},{}", self.universe, self.num, self.distribution, method_name, space, time).unwrap();
}
}
pub fn save_rank(&self, method_name: &str, space_bytes: usize, time: f64) {
self.save_rank_or_select(RANK_HEADER, "rank", method_name, space_bytes, time)
}
pub fn save_select1(&self, method_name: &str, extra_size_bytes: usize, time: f64) {
self.save_rank_or_select(SELECT_HEADER, "select1", method_name, extra_size_bytes, time)
}
pub fn save_select0(&self, method_name: &str, extra_size_bytes: usize, time: f64) {
self.save_rank_or_select(SELECT_HEADER, "select0", method_name, extra_size_bytes, time)
}
fn rand_gen(&self) -> XorShift64 { XorShift64(self.seed.get()) }
fn rand_queries(&self, query_universe: usize) -> Box<[usize]> {
self.rand_gen().take(self.queries.get() as usize).map(|v| v as usize % query_universe).collect()
}
#[inline(always)] fn measure<F>(&self, f: F) -> f64
where F: Fn()
{
if self.cooling_time > 0 {
std::thread::sleep(std::time::Duration::from_secs(self.cooling_time as u64));
}
let mut iters = 1;
if self.time > 0 {
let time = Instant::now();
loop {
f();
if time.elapsed().as_secs() > self.time as u64 { break; }
iters += 1;
}
}
let start_moment = Instant::now();
for _ in 0..iters { f(); }
return start_moment.elapsed().as_secs_f64() / iters as f64
}
#[inline(always)] fn queries_measure<R, F>(&self, queries: &[usize], f: F) -> f64
where F: Fn(usize) -> R
{
self.measure(|| for i in queries { black_box(f(*i)); }) / queries.len() as f64
}
}
fn percent_of(overhead: usize, whole: usize) -> f64 { (overhead*100) as f64 / whole as f64 }
fn main() {
let conf: Conf = Conf::parse();
match conf.structure {
Structure::EliasFano => elias_fano::benchmark(&conf),
Structure::BitmBV => bitm::benchmark_rank_select(&conf),
#[cfg(target_pointer_width = "64")] Structure::SucdsBV => sucds::benchmark_rank9_select(&conf),
#[cfg(target_pointer_width = "64")] Structure::SuccinctJacobson => succinct::benchmark_jacobson(&conf),
#[cfg(target_pointer_width = "64")] Structure::SuccinctRank9 => succinct::benchmark_rank9(&conf),
#[cfg(target_pointer_width = "64")] Structure::SuxSelectAdapt => sux::benchmark_select_adapt_const(&conf),
#[cfg(target_pointer_width = "64")] Structure::SuxSelectAdaptConst => sux::benchmark_select_adapt(&conf),
#[cfg(feature = "vers-vecs")] Structure::Vers => vers::benchmark_rank_select(&conf),
Structure::BV => {
bitm::benchmark_rank_select(&conf);
#[cfg(target_pointer_width = "64")] {
sucds::benchmark_rank9_select(&conf);
succinct::benchmark_rank9(&conf);
succinct::benchmark_jacobson(&conf);
}
#[cfg(feature = "vers-vecs")] vers::benchmark_rank_select(&conf);
#[cfg(target_pointer_width = "64")] {
sux::benchmark_select_adapt(&conf);
sux::benchmark_select_adapt_const(&conf);
}
},
}
}