1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#![doc = include_str!("../README.md")]
//! # Space overhead (benchmarks)
//! 
//! ## Introduction and description of the distributions used
//! 
//! The plots show the space overhead (over entropy of the distribution of function values) of the static functions
//! included in the `csf`, for two families of functions that differ in the distribution of values:
//! - Functions with ***equal*** distribution map *2<sup>20</sup> = ed+i* keys to *d+1* different values,
//!   where *d=1,...,255* and *i=1,...,e*.
//!   One value is assigned to *i* keys, and each of the other *d* values is assigned to *e* keys.
//!   The entropy of the distribution of function values is in the range *(log<sub>2</sub>(d), log<sub>2</sub>(d+1)]*
//!   and increases with both *d* and *i*.
//! - Functions with ***dominated*** (by a single value) distribution map *2<sup>20</sup>* keys to *256* different values.
//!   One value is assigned to *2<sup>20</sup>-255k* keys and each of the remaining *255* values is assigned to *k* keys,
//!   where *k=1,...,2<sup>20</sup>/256*. The entropy of the distribution of function values increases with *k*.
//!
//! The dashed black line shown in each plot is the size of the uncompressed vector of values,
//! assuming that each value is stored using the smallest possible (for the number of different values),
//! constant whole number of bits.
//! 
//! The data for the plots were generated using the [csf_benchmark](https://crates.io/crates/csf_benchmark) program.
//! 
//! ## (Uncompressed) static functions
#![doc=include_str!("../plots/equal_abs.svg")]
#![doc=include_str!("../plots/equal_rel.svg")]
#![doc=include_str!("../plots/dominated_abs.svg")]
#![doc=include_str!("../plots/dominated_rel.svg")]
//!
//! ## Compressed static functions
//!
//! Notes: The functions use Huffman coding to compress the values.
//! The codewords consist of fragments of a length that minimizes the size of the function
//! (which is always 1 bit in the case of [`ls::CMap`]).
//! Level sizes of [`fp::CMap`] and [`fp::GOCMap`] are determined by [`fp::OptimalLevelSize`].
//! 
#![doc=include_str!("../plots/equal_comp_abs.svg")]
#![doc=include_str!("../plots/equal_comp_rel.svg")]
#![doc=include_str!("../plots/dominated_comp_abs.svg")]
#![doc=include_str!("../plots/dominated_comp_rel.svg")]

pub mod coding;

pub mod fp;
pub mod ls;

pub use dyn_size_of::GetSize;
pub use bitm::bits_to_store;

/// Calculates the minimal number of bits needed to store any of the given `values`.
/// 
/// # Example
///
/// ```
/// use csf::bits_to_store_any_of;
///
/// assert_eq!(bits_to_store_any_of([2u8, 7, 5, 7]), 3);
/// assert_eq!(bits_to_store_any_of([0u8]), 0);
/// assert_eq!(bits_to_store_any_of::<u32>([]), 0);
/// ```
pub fn bits_to_store_any_of<V: Into<u64>>(values: impl IntoIterator<Item = V>) -> u8 {
    values.into_iter().map(|v|Into::<u64>::into(v)).max().map_or(0, bits_to_store)
}

/// Calculates the minimal number of bits needed to store any of the given `values`.
#[inline] pub fn bits_to_store_any_of_ref<'a, V: Clone + Into<u64> + 'a>(values: impl IntoIterator<Item = &'a V>) -> u8 {
    bits_to_store_any_of(values.into_iter().cloned())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_bits_to_store_any_of() {
        assert_eq!(bits_to_store_any_of::<u32>([]), 0);
        assert_eq!(bits_to_store_any_of([0u8]), 0);
        assert_eq!(bits_to_store_any_of([0u8, 1]), 1);
        assert_eq!(bits_to_store_any_of([2u8, 7, 3]), 3);
        assert_eq!(bits_to_store_any_of([u64::MAX, 2, 67]), 64);

        assert_eq!(bits_to_store_any_of_ref::<u32>([].iter()), 0);
        assert_eq!(bits_to_store_any_of_ref([u64::MAX, 2, 67].iter()), 64);
    }
}