bit_long_vec/
lib.rs

1//! Vector with fixed bit sized values stored in long.
2//!
3//! Effective to reduce the amount of memory needed for storage values
4//! whose size is not a power of 2. As drawback to set and  get values
5//! uses additional CPU cycles for bit operations.
6//!
7//! # Example
8//!
9//! In this particular scenario, we want to store 10 bit values. It takes
10//! 200 bytes to store 100 values using short. To store 100 values using
11//! a bit long vector, 15 lengths are required, which is 120 bytes (-40%).
12//!
13//! ```
14//! use bit_long_vec::BitLongVec;
15//!
16//! let mut vec = BitLongVec::with_fixed_capacity(100, 10);
17//!
18//! for index in 0..100 {
19//!     vec.set(index, 1023);
20//!     assert_eq!(vec.get(index), 1023);
21//! }
22//! ```
23pub struct BitLongVec {
24    /// Capacity of array.
25    pub capacity: usize,
26    /// Bits per value in internal storage.
27    pub bits_per_value: u8,
28    /// Maximum possible stored value.
29    pub max_possible_value: u64,
30    /// Internal storage for values.
31    pub data: Vec<u64>,
32}
33
34impl BitLongVec {
35    /// Create a fixed capacity vector. All values are will be initialized to 0.
36    ///
37    /// # Panics
38    ///
39    /// Panics if `bits_per_value` is greater or equals 64.
40    pub fn with_fixed_capacity(capacity: usize, bits_per_value: u8) -> Self {
41        assert!(64 > bits_per_value, "Bit per value must be less than 64");
42
43        let longs_required = ((capacity * bits_per_value as usize) as f64 / 64.0).ceil() as usize;
44        let max_possible_value = (1 << bits_per_value as u64) - 1;
45        let data = vec![0u64; longs_required]; // <- Fastest way to initialize a vector.
46
47        BitLongVec {
48            capacity,
49            bits_per_value,
50            max_possible_value,
51            data,
52        }
53    }
54
55    /// Create vector from long array.
56    ///
57    /// # Panics
58    ///
59    /// Panics if `bits_per_value` >= 64 or data length not match capacity.
60    pub fn from_data(data: Vec<u64>, capacity: usize, bits_per_value: u8) -> Self {
61        assert!(64 > bits_per_value, "Bit per value must be less than 64");
62        let longs_required = ((capacity * bits_per_value as usize) as f64 / 64.0).ceil() as usize;
63        assert_eq!(longs_required, data.len(), "Data length not match capacity");
64
65        let max_possible_value = (1 << bits_per_value as u64) - 1;
66
67        BitLongVec {
68            capacity,
69            bits_per_value,
70            max_possible_value,
71            data,
72        }
73    }
74
75    /// Sets the `value` in the` index` position.
76    ///
77    /// # Panics
78    ///
79    /// Panics if `index` out of bounds or `value` exceeds maximum.
80    pub fn set(&mut self, index: usize, value: u64) {
81        assert!(self.capacity > index, "Index out of bounds");
82        assert!(self.max_possible_value >= value, "Value exceeds maximum");
83
84        let bit_index = index * self.bits_per_value as usize;
85        let long_index = bit_index / 64;
86        let long_bit_start_index = bit_index % 64;
87
88        self.data[long_index] &= !(self.max_possible_value << long_bit_start_index as u64);
89        self.data[long_index] |= value << long_bit_start_index as u64;
90
91        // Value overlaps in the next long.
92        if long_bit_start_index + self.bits_per_value as usize > 64 {
93            let bits_written = 64 - long_bit_start_index;
94            let bits_remaining = self.bits_per_value as usize - bits_written;
95
96            let remainder_max_possible_value = (1 << bits_remaining as u64) - 1;
97
98            self.data[long_index + 1] &= !(remainder_max_possible_value);
99            self.data[long_index + 1] |= value >> bits_written as u64;
100        }
101    }
102
103    /// Returns the `value` in the` index` position.
104    ///
105    /// # Panics
106    ///
107    /// Panics if `index` out of bounds.
108    pub fn get(&self, index: usize) -> u64 {
109        assert!(self.capacity > index, "Index out of bounds");
110
111        let bit_index = index * self.bits_per_value as usize;
112        let long_index = bit_index / 64;
113        let long_bit_start_index = bit_index % 64;
114
115        let mut value = self.data[long_index] >> long_bit_start_index as u64;
116
117        // Value overlaps in the next long.
118        if long_bit_start_index + self.bits_per_value as usize > 64 {
119            value |= self.data[long_index + 1] << 64 - long_bit_start_index as u64;
120        }
121
122        value & self.max_possible_value
123    }
124
125    /// Return new vector resized to new `bits_per_block`.
126    ///
127    /// # Panics
128    ///
129    /// Panics if `bits_per_value` >= 64 or `value` after resize exceeds maximum.
130    pub fn resize(&self, bits_per_block: u8) -> BitLongVec {
131        let mut new_vec = BitLongVec::with_fixed_capacity(self.capacity, bits_per_block);
132
133        for index in 0..self.capacity {
134            new_vec.set(index, self.get(index));
135        }
136
137        new_vec
138    }
139}
140
141#[test]
142fn test_longs_required() {
143    let data = vec![
144        (2048, 4, 128),
145        (4096, 4, 256),
146        (2048, 8, 256),
147        (4096, 8, 512),
148        (4096, 14, 896),
149    ];
150
151    for (capacity, bits_per_value, expected_length) in data {
152        let vec = BitLongVec::with_fixed_capacity(capacity, bits_per_value);
153
154        assert_eq!(vec.data.len(), expected_length);
155        assert_eq!(vec.data.capacity(), expected_length);
156    }
157}
158
159#[test]
160fn test_max_possible_value() {
161    let data = vec![(4, 15), (5, 31), (6, 63), (7, 127), (8, 255), (14, 16_383)];
162
163    for (bits_per_value, expected_max_possible_value) in data {
164        let vec = BitLongVec::with_fixed_capacity(1, bits_per_value);
165        assert_eq!(vec.max_possible_value, expected_max_possible_value);
166    }
167}
168
169#[test]
170fn test_set() {
171    let mut vec = BitLongVec::with_fixed_capacity(48, 4);
172
173    // long 1: [1, 2, 3, 4, 0, 0, 0, 0]
174    // long 2: [5, 6, 7, 8, 0, 0, 0, 0]
175    // long 3: [9, 10, 11, 12, 0, 0, 0, 0]
176    for long_index in 0..3 {
177        for long_byte_index in 0..4 {
178            let index = long_index * 16 + long_byte_index;
179            let value = (long_index * 4 + long_byte_index + 1) as u64;
180
181            vec.set(index, value);
182        }
183    }
184
185    assert_eq!(vec.data, vec![17185, 34661, 52137]);
186}
187
188#[test]
189fn test_set_overlap() {
190    let mut vec = BitLongVec::with_fixed_capacity(9, 14);
191
192    for index in 0..9 {
193        vec.set(index, (15_000 + index) as u64);
194    }
195
196    assert_eq!(vec.data, vec![11306972589037353624, 4224634284506261370]);
197}
198
199#[test]
200fn test_set_clean_bits() {
201    let mut vec = BitLongVec::from_data(vec![2762], 3, 4);
202    vec.set(1, 0);
203
204    assert_eq!(vec.data[0], 2570)
205}
206
207#[test]
208fn test_set_overlap_clean_bits() {
209    let data = vec![11306972589037353624, 4224634284506261370];
210    let mut vec = BitLongVec::from_data(data, 9, 14);
211    vec.set(4, 0);
212
213    assert_eq!(vec.data[0], 65987919120595608);
214    assert_eq!(vec.data[1], 4224634284506261312);
215}
216
217#[test]
218fn test_set_change_bits() {
219    let mut vec = BitLongVec::from_data(vec![2762], 3, 4);
220    vec.set(1, 8);
221
222    assert_eq!(vec.data[0], 2698);
223}
224
225#[test]
226fn test_set_overlap_change_bits() {
227    let data = vec![11306972589037353624, 4224634284506261370];
228    let mut vec = BitLongVec::from_data(data, 9, 14);
229    vec.set(4, 8);
230
231    assert_eq!(vec.data[0], 642448671424019096);
232    assert_eq!(vec.data[1], 4224634284506261312);
233}
234
235#[test]
236fn test_get() {
237    let data = vec![17185, 34661, 52137];
238    let vec = BitLongVec::from_data(data, 48, 4);
239
240    // long 1: [1, 2, 3, 4, 0, 0, 0, 0]
241    // long 2: [5, 6, 7, 8, 0, 0, 0, 0]
242    // long 3: [9, 10, 11, 12, 0, 0, 0, 0]
243    for long_index in 0..3 {
244        for long_byte_index in 0..4 {
245            let index = long_index * 16 + long_byte_index;
246            let value = (long_index * 4 + long_byte_index + 1) as u64;
247
248            assert_eq!(vec.get(index), value)
249        }
250    }
251}
252
253#[test]
254fn test_get_overlap() {
255    let data = vec![11306972589037353624, 4224634284506261370];
256    let vec = BitLongVec::from_data(data, 9, 14);
257
258    for index in 0..9 {
259        assert_eq!(vec.get(index), 15_000 + index as u64);
260    }
261}
262
263#[test]
264#[should_panic(expected = "Bit per value must be less than 64")]
265fn test_with_fixed_capacity_bits_above_64() {
266    BitLongVec::with_fixed_capacity(1, 128);
267}
268
269#[test]
270#[should_panic(expected = "Bit per value must be less than 64")]
271fn test_from_data_bits_above_64() {
272    BitLongVec::from_data(vec![], 1, 128);
273}
274
275#[test]
276#[should_panic(expected = "Data length not match capacity")]
277fn test_from_data_length_not_match_capacity() {
278    BitLongVec::from_data(vec![1], 3, 32);
279}
280
281#[test]
282#[should_panic(expected = "Index out of bounds")]
283fn test_set_index_out_of_bounds() {
284    let mut vec = BitLongVec::with_fixed_capacity(1, 4);
285    vec.set(100, 1);
286}
287
288#[test]
289#[should_panic(expected = "Value exceeds maximum")]
290fn test_set_value_exceeds_maximum() {
291    let mut vec = BitLongVec::with_fixed_capacity(1, 4);
292    vec.set(0, 16);
293}
294
295#[test]
296#[should_panic(expected = "Index out of bounds")]
297fn test_get_index_out_of_bounds() {
298    let vec = BitLongVec::with_fixed_capacity(1, 4);
299    vec.get(100);
300}
301
302#[test]
303fn test_resize() {
304    let mut vec = BitLongVec::with_fixed_capacity(15, 8);
305
306    for index in 0..15 {
307        vec.set(index, index as u64 + 1);
308    }
309
310    let new_vec = vec.resize(4);
311
312    for index in 0..15 {
313        assert_eq!(vec.get(index), index as u64 + 1);
314    }
315}