1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
#![doc = include_str!("../README.md")]
//! # Space overhead (benchmarks)
//!
//! ## Introduction and description of the distributions used
//!
//! The plots show the space overhead (over entropy of the distribution of function values) of the static functions
//! included in the `csf`, for two families of functions that differ in the distribution of values:
//! - Functions with ***equal*** distribution map *2<sup>20</sup> = ed+i* keys to *d+1* different values,
//! where *d=1,...,255* and *i=1,...,e*.
//! One value is assigned to *i* keys, and each of the other *d* values is assigned to *e* keys.
//! The entropy of the distribution of function values is in the range *(log<sub>2</sub>(d), log<sub>2</sub>(d+1)]*
//! and increases with both *d* and *i*.
//! - Functions with ***dominated*** (by a single value) distribution map *2<sup>20</sup>* keys to *256* different values.
//! One value is assigned to *2<sup>20</sup>-255k* keys and each of the remaining *255* values is assigned to *k* keys,
//! where *k=1,...,2<sup>20</sup>/256*. The entropy of the distribution of function values increases with *k*.
//!
//! The dashed black line shown in each plot is the size of the uncompressed vector of values,
//! assuming that each value is stored using the smallest possible (for the number of different values),
//! constant whole number of bits.
//!
//! The data for the plots were generated using the [csf_benchmark](https://crates.io/crates/csf_benchmark) program.
//!
//! ## (Uncompressed) static functions
#![doc=include_str!("../plots/equal_abs.svg")]
#![doc=include_str!("../plots/equal_rel.svg")]
#![doc=include_str!("../plots/dominated_abs.svg")]
#![doc=include_str!("../plots/dominated_rel.svg")]
//!
//! ## Compressed static functions
//!
//! Notes: The functions use Huffman coding to compress the values.
//! The codewords consist of fragments of a length that minimizes the size of the function
//! (which is always 1 bit in the case of [`ls::CMap`]).
//! Level sizes of [`fp::CMap`] and [`fp::GOCMap`] are determined by [`fp::OptimalLevelSize`].
//!
#![doc=include_str!("../plots/equal_comp_abs.svg")]
#![doc=include_str!("../plots/equal_comp_rel.svg")]
#![doc=include_str!("../plots/dominated_comp_abs.svg")]
#![doc=include_str!("../plots/dominated_comp_rel.svg")]
pub mod coding;
pub mod fp;
pub mod ls;
pub use dyn_size_of::GetSize;
pub use bitm::bits_to_store;
/// Calculates the minimal number of bits needed to store any of the given `values`.
///
/// # Example
///
/// ```
/// use csf::bits_to_store_any_of;
///
/// assert_eq!(bits_to_store_any_of([2u8, 7, 5, 7]), 3);
/// assert_eq!(bits_to_store_any_of([0u8]), 0);
/// assert_eq!(bits_to_store_any_of::<u32>([]), 0);
/// ```
pub fn bits_to_store_any_of<V: Into<u64>>(values: impl IntoIterator<Item = V>) -> u8 {
values.into_iter().map(|v|Into::<u64>::into(v)).max().map_or(0, bits_to_store)
}
/// Calculates the minimal number of bits needed to store any of the given `values`.
#[inline] pub fn bits_to_store_any_of_ref<'a, V: Clone + Into<u64> + 'a>(values: impl IntoIterator<Item = &'a V>) -> u8 {
bits_to_store_any_of(values.into_iter().cloned())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bits_to_store_any_of() {
assert_eq!(bits_to_store_any_of::<u32>([]), 0);
assert_eq!(bits_to_store_any_of([0u8]), 0);
assert_eq!(bits_to_store_any_of([0u8, 1]), 1);
assert_eq!(bits_to_store_any_of([2u8, 7, 3]), 3);
assert_eq!(bits_to_store_any_of([u64::MAX, 2, 67]), 64);
assert_eq!(bits_to_store_any_of_ref::<u32>([].iter()), 0);
assert_eq!(bits_to_store_any_of_ref([u64::MAX, 2, 67].iter()), 64);
}
}