summavy_bitpacker/
lib.rs

1mod bitpacker;
2mod blocked_bitpacker;
3
4use std::cmp::Ordering;
5
6pub use crate::bitpacker::{BitPacker, BitUnpacker};
7pub use crate::blocked_bitpacker::BlockedBitpacker;
8
9/// Computes the number of bits that will be used for bitpacking.
10///
11/// In general the target is the minimum number of bits
12/// required to express the amplitude given in argument.
13///
14/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
15///
16/// The logic is slightly more convoluted here as for optimization
17/// reasons, we want to ensure that a value spawns over at most 8 bytes
18/// of aligned bytes.
19///
20/// Spanning over 9 bytes is possible for instance, if we do
21/// bitpacking with an amplitude of 63 bits.
22/// In this case, the second int will start on bit
23/// 63 (which belongs to byte 7) and ends at byte 15;
24/// Hence 9 bytes (from byte 7 to byte 15 included).
25///
26/// To avoid this, we force the number of bits to 64bits
27/// when the result is greater than `64-8 = 56 bits`.
28///
29/// Note that this only affects rare use cases spawning over
30/// a very large range of values. Even in this case, it results
31/// in an extra cost of at most 12% compared to the optimal
32/// number of bits.
33pub fn compute_num_bits(n: u64) -> u8 {
34    let amplitude = (64u32 - n.leading_zeros()) as u8;
35    if amplitude <= 64 - 8 {
36        amplitude
37    } else {
38        64
39    }
40}
41
42/// Computes the (min, max) of an iterator of `PartialOrd` values.
43///
44/// For values implementing `Ord` (in a way consistent to their `PartialOrd` impl),
45/// this function behaves as expected.
46///
47/// For values with partial ordering, the behavior is non-trivial and may
48/// depends on the order of the values.
49/// For floats however, it simply returns the same results as if NaN were
50/// skipped.
51pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
52where
53    I: Iterator<Item = T>,
54    T: Copy + PartialOrd,
55{
56    let first_el = vals.find(|val| {
57        // We use this to make sure we skip all NaN values when
58        // working with a float type.
59        val.partial_cmp(val) == Some(Ordering::Equal)
60    })?;
61    let mut min_so_far: T = first_el;
62    let mut max_so_far: T = first_el;
63    for val in vals {
64        if val.partial_cmp(&min_so_far) == Some(Ordering::Less) {
65            min_so_far = val;
66        }
67        if val.partial_cmp(&max_so_far) == Some(Ordering::Greater) {
68            max_so_far = val;
69        }
70    }
71    Some((min_so_far, max_so_far))
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    #[test]
79    fn test_compute_num_bits() {
80        assert_eq!(compute_num_bits(1), 1u8);
81        assert_eq!(compute_num_bits(0), 0u8);
82        assert_eq!(compute_num_bits(2), 2u8);
83        assert_eq!(compute_num_bits(3), 2u8);
84        assert_eq!(compute_num_bits(4), 3u8);
85        assert_eq!(compute_num_bits(255), 8u8);
86        assert_eq!(compute_num_bits(256), 9u8);
87        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
88    }
89
90    #[test]
91    fn test_minmax_empty() {
92        let vals: Vec<u32> = vec![];
93        assert_eq!(minmax(vals.into_iter()), None);
94    }
95
96    #[test]
97    fn test_minmax_one() {
98        assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
99    }
100
101    #[test]
102    fn test_minmax_two() {
103        assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
104        assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
105    }
106
107    #[test]
108    fn test_minmax_nan() {
109        assert_eq!(
110            minmax(vec![f64::NAN, 1f64, 2f64].into_iter()),
111            Some((1f64, 2f64))
112        );
113        assert_eq!(
114            minmax(vec![2f64, f64::NAN, 1f64].into_iter()),
115            Some((1f64, 2f64))
116        );
117        assert_eq!(
118            minmax(vec![2f64, 1f64, f64::NAN].into_iter()),
119            Some((1f64, 2f64))
120        );
121    }
122
123    #[test]
124    fn test_minmax_inf() {
125        assert_eq!(
126            minmax(vec![f64::INFINITY, 1f64, 2f64].into_iter()),
127            Some((1f64, f64::INFINITY))
128        );
129        assert_eq!(
130            minmax(vec![-f64::INFINITY, 1f64, 2f64].into_iter()),
131            Some((-f64::INFINITY, 2f64))
132        );
133        assert_eq!(
134            minmax(vec![2f64, f64::INFINITY, 1f64].into_iter()),
135            Some((1f64, f64::INFINITY))
136        );
137        assert_eq!(
138            minmax(vec![2f64, 1f64, -f64::INFINITY].into_iter()),
139            Some((-f64::INFINITY, 2f64))
140        );
141    }
142}