#![doc = include_str!("../README.md")]
use clap::{Args, Parser, Subcommand, ValueEnum};
use csf;
use csf::coding::BuildMinimumRedundancy;
use csf::{fp, GetSize};
use distribution::{Input, kv_dominated_lo_entropy};
use function::{CSFBuilder, PrintParams, CLS_HEADER, CFP_HEADER, FPGO_HEADER, FP_HEADER};
use ph::fmph::Bits;
use std::fs;
use std::fs::File;
use std::io::prelude::*;
use std::ops::RangeInclusive;
use crate::distribution::kv_dominated_lo;
mod distribution;
mod function;
#[allow(non_camel_case_types)]
#[derive(Args)]
pub struct FPConf {
#[arg(short = 'l', long, default_value_t = 0)]
pub level_size: u16,
#[arg(short = 'p', long, default_value_t = false)]
pub level_size_proportional: bool,
}
#[allow(non_camel_case_types)]
#[derive(Args)]
pub struct FPGOConf {
#[arg(short='s', long, value_parser = clap::value_parser!(u8).range(1..16))]
pub bits_per_group_seed: Option<u8>,
#[arg(short='b', long, value_parser = clap::value_parser!(u8).range(1..63))]
pub group_size: Option<u8>,
#[arg(short = 'l', long, default_value_t = 0)]
pub level_size: u16,
#[arg(short = 'p', long, default_value_t = false)]
pub level_size_proportional: bool,
}
#[allow(non_camel_case_types)]
#[derive(Subcommand)]
pub enum Function {
CFPGO_all(FPConf),
CFPGO(FPGOConf),
CFP(FPConf),
FP(FPConf),
CLS,
LS
}
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum Distribution {
Equal,
Dominated,
}
impl Distribution {
fn name(&self) -> &'static str {
match self {
Distribution::Equal => "equal",
Distribution::Dominated => "dominated",
}
}
}
#[derive(Parser)]
#[command(author="Piotr Beling", version, about, long_about = None)]
pub struct Conf {
#[command(subcommand)]
pub function: Function,
#[arg(short='d', long, value_enum, default_value_t = Distribution::Equal)]
pub distribution: Distribution,
#[arg(short = 'n', long, default_value_t = 1024*1024)]
pub keys_num: u32,
#[arg(short = 's', long, default_value_t = false)]
pub save_details: bool,
#[arg(short = 'f', long)]
pub bits_per_fragment: Option<u8>,
#[arg(short = 'r', long, default_value_t = 0.02)]
pub resolution: f64,
#[arg(long, default_value_t = f64::NEG_INFINITY)]
pub from: f64,
#[arg(long, default_value_t = f64::INFINITY)]
pub to: f64,
}
impl Conf {
#[inline] pub fn bits_per_fragments(&self) -> RangeInclusive<u8> {
if let Some(b) = self.bits_per_fragment { b..=b } else { 1..=8 }
}
}
fn bits_per_entry(bytes: usize, entries: usize) -> f64 {
(bytes * 8) as f64 / entries as f64
}
const BENCHMARK_HEADER: &'static str = "bits/entry levels/query";
fn benchmark<CSF: CSFBuilder+PrintParams>(input: Input, csf: CSF, file: &mut Option<File>) {
input.print_params_to(file);
csf.print_params(file);
let map = csf.new(
input.keys.as_ref(),
input.values.as_ref(),
&input.frequencies,
);
let mut levels_searched = 0usize;
for (k, expected_v) in input.keys.iter().copied().zip(input.values.iter().copied()) {
let v = CSF::value(&map, k, &mut levels_searched);
if let Some(v) = v {
if v != expected_v {
eprintln!("error while checking integrity for key {k}, its value is {v}, but should be {expected_v}");
}
} else {
eprintln!("error while checking integrity for key {k}, its value is None, but should be {expected_v}");
}
}
let bits_per_entry = bits_per_entry(map.size_bytes(), input.keys.len());
let levels_per_query = levels_searched as f64 / input.keys.len() as f64;
let overhead = bits_per_entry-input.entropy;
println!("{:.2} (entropy) + {:.2} ({:.0}%) = {:.2} bits/kv {:.2} levels/query", input.entropy, overhead, 100.0*overhead/input.entropy, bits_per_entry, levels_per_query);
if let Some(ref mut f) = file {
writeln!(f, " {} {}", bits_per_entry, levels_per_query).unwrap();
}
}
#[inline] fn rounded_div(a: u32, b: u32) -> u32 { (a+b/2)/b }
fn benchmark_all_functions<CSF, CSFIter, GetFunctions>(conf: &Conf, file: &mut Option<File>, functions: GetFunctions)
where GetFunctions: Fn() -> CSFIter, CSFIter: IntoIterator<Item = CSF>, CSF: CSFBuilder+PrintParams
{
let has_multiple_functions = functions().into_iter().nth(1).is_some();
match conf.distribution {
Distribution::Equal => {
let mut prev_entropy = -1.0f64;
for different_values in 2..=256 {
let each_value_len = rounded_div(conf.keys_num, different_values);
for last_count in 1..=each_value_len {
let total_len = (different_values - 1) * each_value_len + last_count;
let entropy = kv_dominated_lo_entropy(total_len, different_values, each_value_len);
if entropy < conf.from { continue; }
if entropy >= conf.to { return; }
if (different_values == 256 && last_count == each_value_len) || entropy - prev_entropy >= conf.resolution {
print!(
"{}*{}+{}={} key-values: ",
different_values-1, each_value_len, last_count, total_len
);
if has_multiple_functions { println!(); }
prev_entropy = entropy;
for csf in functions() {
if has_multiple_functions { print!("\t"); }
let (k, v) = kv_dominated_lo(total_len, different_values, each_value_len);
benchmark((k, v, entropy).into(), csf, file)
}
}
}
}
},
Distribution::Dominated => {
let different_values = 256;
let mut prev_entropy = -1.0f64;
for lo_count in 1..=conf.keys_num / different_values {
let entropy = kv_dominated_lo_entropy(conf.keys_num, different_values, lo_count);
if entropy < conf.from { continue; }
if entropy >= conf.to { return; }
if lo_count == conf.keys_num / different_values || entropy - prev_entropy >= conf.resolution {
print!("{} {}: ", lo_count, entropy);
if has_multiple_functions { println!(); }
prev_entropy = entropy;
for csf in functions() {
if has_multiple_functions { print!("\t"); }
let (k, v) = kv_dominated_lo(conf.keys_num, different_values, lo_count);
benchmark((k, v, entropy).into(), csf, file)
}
}
}
},
}
}
fn file(conf: &Conf, function_name: &str, function_header: &str) -> Option<File> {
conf.save_details.then(|| {
if let Err(e) = fs::create_dir("csf_benchmark_results") {
println!("create_dir csf_benchmark_results: {}", e);
}
let file_name = format!("csf_benchmark_results/{}_{}.csv", function_name, conf.distribution.name());
let file_already_existed = std::path::Path::new(&file_name).exists();
let mut file = fs::OpenOptions::new().append(true).create(true).open(&file_name).unwrap();
if !file_already_existed {
if function_header.is_empty() {
writeln!(file, "{} {}", Input::HEADER, BENCHMARK_HEADER).unwrap();
} else {
writeln!(file, "{} {} {}", Input::HEADER, function_header, BENCHMARK_HEADER).unwrap();
}
}
file
})
}
fn fpgo(file: &mut Option<File>, conf: &Conf, bits_per_seed: u8, bits_per_group: u8, fpconf: &FPGOConf) {
let b_range = conf.bits_per_fragments();
let goconf = fp::GOConf::bps_bpg(Bits(bits_per_seed), Bits(bits_per_group));
match (fpconf.level_size, fpconf.level_size_proportional) {
(0, true) => benchmark_all_functions(&conf, file, || {
b_range.clone().map(|b| fp::GOCMapConf::groups_lsize_coding(goconf.clone(), fp::ProportionalLevelSize::default(), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
(level_size, true) => benchmark_all_functions(&conf, file, || {
b_range.clone().map(|b| fp::GOCMapConf::groups_lsize_coding(goconf.clone(), fp::ProportionalLevelSize::with_percent(level_size), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
(0, false) => benchmark_all_functions(&conf, file, || {
b_range.clone().map(|b| fp::GOCMapConf::groups_coding(goconf.clone(), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
(level_size, false) => benchmark_all_functions(&conf, file, || {
b_range.clone().map(|b| fp::GOCMapConf::groups_lsize_coding(goconf.clone(), fp::ResizedLevel::new(level_size, fp::OptimalLevelSize::default()), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
}
}
fn fpgo_all<L: fp::LevelSizer+Copy>(conf: &Conf, level_size: L)
where fp::GOCMapConf<BuildMinimumRedundancy, L, Bits, Bits>: PrintParams
{
let mut file = file(&conf, "fpgo_all", FPGO_HEADER);
let b_range = conf.bits_per_fragments();
benchmark_all_functions(&conf, &mut file, || {
b_range.clone().flat_map(|b| {
(1u8..=8u8).flat_map(move |bits_per_seed| {
(2u8..=62u8).map(move |bits_per_group| {
let goconf = fp::GOConf::bps_bpg(Bits(bits_per_seed), Bits(bits_per_group));
fp::GOCMapConf::groups_lsize_coding(goconf.clone(), level_size.clone(), BuildMinimumRedundancy{ bits_per_fragment: b })
})
})
})
});
}
fn main() {
let conf: Conf = Conf::parse();
match conf.function {
Function::CFPGO_all(ref fpconf) => {
match (fpconf.level_size, fpconf.level_size_proportional) {
(0, true) => fpgo_all(&conf, fp::ProportionalLevelSize::default()),
(level_size, true) => fpgo_all(&conf, fp::ProportionalLevelSize::with_percent(level_size)),
(0, false) => fpgo_all(&conf, fp::OptimalLevelSize::default()),
(level_size, false) => fpgo_all(&conf, fp::ResizedLevel::new(level_size, fp::OptimalLevelSize::default())),
}
}
Function::CFPGO(ref fpconf) => {
let mut file = file(&conf, "cfpgo", FPGO_HEADER);
match (fpconf.bits_per_group_seed, fpconf.group_size) {
(None, None) => {
for (bits_per_group_seed, bits_per_group) in [(1, 8), (2, 16), (4, 16), (8, 32)] {
fpgo(&mut file, &conf, bits_per_group_seed, bits_per_group, fpconf);
}
},
(Some(bits_per_group_seed), Some(bits_per_group)) => fpgo(&mut file, &conf, bits_per_group_seed, bits_per_group, fpconf),
(Some(1), None) | (None, Some(8)) => fpgo(&mut file, &conf, 1, 8, fpconf),
(Some(2), None) => fpgo(&mut file, &conf, 2, 16, fpconf),
(Some(4), None) => fpgo(&mut file, &conf, 4, 16, fpconf),
(None, Some(16)) => {
fpgo(&mut file, &conf, 2, 16, fpconf);
fpgo(&mut file, &conf, 4, 16, fpconf);
}
(Some(8), None) | (None, Some(32)) => fpgo(&mut file, &conf, 8, 32, fpconf),
_ => eprintln!("Cannot deduce for which pairs of (bits per group seed, group size) calculate.")
}
},
Function::CFP(ref fpconf) => {
let mut file = file(&conf, "cfp", CFP_HEADER);
let b_range = conf.bits_per_fragments();
match (fpconf.level_size, fpconf.level_size_proportional) {
(0, true) => benchmark_all_functions(&conf, &mut file, || {
b_range.clone().map(|b| fp::CMapConf::lsize_coding(fp::ProportionalLevelSize::default(), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
(level_size, true) => benchmark_all_functions(&conf, &mut file, || {
b_range.clone().map(|b| fp::CMapConf::lsize_coding(fp::ProportionalLevelSize::with_percent(level_size), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
(0, false) => benchmark_all_functions(&conf, &mut file, || {
b_range.clone().map(|b| fp::CMapConf::coding(BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
(level_size, false) => benchmark_all_functions(&conf, &mut file, || {
b_range.clone().map(|b| fp::CMapConf::lsize_coding(fp::ResizedLevel::new(level_size, fp::OptimalLevelSize::default()), BuildMinimumRedundancy{ bits_per_fragment: b }))
}),
}
},
Function::FP(ref fpconf) => {
let mut file = file(&conf, "fp", FP_HEADER);
match (fpconf.level_size, fpconf.level_size_proportional) {
(0, true) => benchmark_all_functions(&conf, &mut file, || {
[fp::MapConf::lsize(fp::ProportionalLevelSize::default())]
}),
(level_size, true) => benchmark_all_functions(&conf, &mut file, || {
[fp::MapConf::lsize(fp::ProportionalLevelSize::with_percent(level_size))]
}),
(0, false) => benchmark_all_functions(&conf, &mut file, || {
[fp::MapConf::default()]
}),
(level_size, false) => benchmark_all_functions(&conf, &mut file, || {
[fp::MapConf::lsize(fp::ResizedLevel::new(level_size, fp::OptimalLevelSize::default()))]
}),
}
},
Function::CLS => {
let mut file = file(&conf, "cls", CLS_HEADER);
let b_range = conf.bits_per_fragments();
benchmark_all_functions(&conf, &mut file, || {
b_range.clone().map(|b| function::BuildLSCMap(b))
});
},
Function::LS => {
let mut file = file(&conf, "ls", "");
benchmark_all_functions(&conf, &mut file, || { [function::BuildLSMap] });
},
}
}