bitpack_vec/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::fmt::Debug;
4
5use deepsize::DeepSizeOf;
6use serde::{Deserialize, Serialize};
7
8/// A densely-packed vector of integers with fixed bit-length
9#[derive(DeepSizeOf, Serialize, Deserialize)]
10pub struct BitpackVec {
11    data: Vec<u64>,
12    len: usize,
13    width: u8,
14}
15
16impl BitpackVec {
17    /// Constructs a new `BitpackVec` with the given bit-width.
18    ///
19    /// ```rust
20    /// use bitpack_vec::BitpackVec;
21    /// let bv = BitpackVec::new(5);
22    /// assert_eq!(bv.width(), 5);
23    /// ```
24    pub fn new(width: usize) -> BitpackVec {
25        assert!(width > 0, "bitpack width must be greater than 0");
26        assert!(
27            width <= 64,
28            "bitpack width must be less than or equal to 64"
29        );
30
31        BitpackVec {
32            data: Vec::with_capacity(1),
33            len: 0,
34            width: width as u8,
35        }
36    }
37
38    /// Construct a `BitpackVec` from a vector of `u64`s, interpreting the
39    /// vector with the given bitwidth.
40    ///
41    /// ```rust
42    /// use bitpack_vec::BitpackVec;
43    ///
44    /// let v = vec![6];
45    /// let bv = BitpackVec::from_raw_vec(v, 5, 12);
46    /// assert_eq!(bv.at(0), 6);
47    /// assert_eq!(bv.at(1), 0);
48    /// ```
49    pub fn from_raw_vec(data: Vec<u64>, width: usize, len: usize) -> BitpackVec {
50        assert!(
51            data.len() * 64 > width * len,
52            "data is not long enough to be valid"
53        );
54        BitpackVec {
55            data,
56            width: width as u8,
57            len,
58        }
59    }
60
61    /// Returns the internal vector representing the bitpacked data
62    ///
63    /// ```rust
64    /// use bitpack_vec::BitpackVec;
65    ///
66    /// let bv = BitpackVec::from_slice(&[10, 15]);
67    /// let v = bv.into_raw_vec();
68    /// assert_eq!(v.len(), 1);
69    /// assert_eq!(v[0], (15 << 4) | 10);
70    /// ```
71    pub fn into_raw_vec(self) -> Vec<u64> {
72        self.data
73    }
74
75    /// Reference to the internal vector, see [into_raw_vec](Self::into_raw_vec)
76    pub fn as_raw(&self) -> &[u64] {
77        &self.data
78    }
79
80    /// Construct a `BitpackVec` from a slice of `u64`s. The smallest
81    /// correct bitwidth will be computed.
82    ///
83    /// ```rust
84    /// use bitpack_vec::BitpackVec;
85    /// let bv = BitpackVec::from_slice(&[5, 12, 13]);
86    /// assert_eq!(bv.width(), 4);
87    /// assert_eq!(bv.at(2), 13);
88    /// ```
89    pub fn from_slice(x: &[u64]) -> BitpackVec {
90        assert!(
91            !x.is_empty(),
92            "Cannot make bitpacked vector from empty slice"
93        );
94
95        // scan the data to figure out the fewest bits needed
96        let max = x.iter().max().unwrap();
97        let bits = 64 - max.leading_zeros();
98
99        let mut bv = BitpackVec::new(bits as usize);
100
101        for i in x {
102            bv.push(*i);
103        }
104
105        bv
106    }
107
108    /// Returns the number of items in the bitpacked vector.
109    /// ```rust
110    /// use bitpack_vec::BitpackVec;
111    /// let bv = BitpackVec::from_slice(&[5, 12, 13]);
112    /// assert_eq!(bv.len(), 3);
113    /// ```
114    pub fn len(&self) -> usize {
115        self.len
116    }
117
118    /// Returns the packing width of the vector (the size in bits of each
119    /// element).
120    ///
121    /// ```rust
122    /// use bitpack_vec::BitpackVec;
123    /// let bv = BitpackVec::from_slice(&[5, 12, 13]);
124    /// assert_eq!(bv.width(), 4);
125    /// ```
126    pub fn width(&self) -> usize {
127        self.width as usize
128    }
129
130    /// Returns the number of items that can be inserted into the
131    /// `BitpackVec` before the vector will grow.
132    pub fn capacity(&self) -> usize {
133        (self.data.capacity() * 64) / self.width()
134    }
135
136    /// Determines if `x` can fit inside this vector.
137    ///
138    /// ```rust
139    /// use bitpack_vec::BitpackVec;
140    ///
141    /// let bv = BitpackVec::new(5);
142    ///
143    /// assert!(bv.fits(31));
144    /// assert!(!bv.fits(32));
145    /// ```
146    pub fn fits(&self, x: u64) -> bool {
147        x < 2_u64.pow(self.width as u32)
148    }
149
150    /// Appends an item to the back of the `BitpackVec`. Panics if the item
151    /// is too large for the vector's bitwidth.
152    ///
153    /// ```rust
154    /// use bitpack_vec::BitpackVec;
155    /// let mut bv = BitpackVec::new(5);
156    ///
157    /// bv.push(4);
158    /// bv.push(22);
159    ///
160    /// assert_eq!(bv.at(0), 4);
161    /// assert_eq!(bv.at(1), 22);
162    /// ```
163    ///
164    /// Adding items that are too large will cause a panic:
165    /// ```should_panic
166    /// use bitpack_vec::BitpackVec;
167    /// let mut bv = BitpackVec::new(5);
168    ///
169    /// bv.push(90);  // panics
170    /// ```
171    pub fn push(&mut self, x: u64) {
172        assert!(
173            self.fits(x),
174            "value {} too large to bitpack to width {}",
175            x,
176            self.width
177        );
178
179        // calculate position info
180        let start_bit = self.len * self.width();
181        let stop_bit = start_bit + self.width() - 1;
182
183        let start_u64 = start_bit / 64;
184        let stop_u64 = stop_bit / 64;
185
186        // ensure we have enough u64s to store the number
187        while self.data.len() <= stop_u64 {
188            self.data.push(0);
189        }
190
191        let local_start_bit = start_bit % 64;
192        if start_u64 == stop_u64 {
193            // pack the whole value into the same u64
194            self.data[start_u64] |= x << local_start_bit;
195        } else {
196            // we have to pack part of the number into one u64, and the
197            // rest into the next
198            let bits_in_first_cell = 64 - local_start_bit;
199            self.data[start_u64] |= x << local_start_bit;
200            self.data[stop_u64] |= x >> bits_in_first_cell;
201        }
202
203        self.len += 1;
204    }
205
206    /// Sets an existing element to a particular value. Panics if `idx` is
207    /// out of range or if `x` does not fit within the bitwidth.
208    ///
209    /// ```rust
210    /// use bitpack_vec::BitpackVec;
211    /// let mut bv = BitpackVec::new(5);
212    ///
213    /// bv.push(5);
214    ///
215    /// assert_eq!(bv.at(0), 5);
216    /// bv.set(0, 9);
217    /// assert_eq!(bv.at(0), 9);
218    /// ```
219    pub fn set(&mut self, idx: usize, x: u64) {
220        assert!(
221            idx < self.len,
222            "index {} out of bounds for len {}",
223            idx,
224            self.len
225        );
226        assert!(
227            self.fits(x),
228            "value {} too large to bitpack to width {}",
229            x,
230            self.width
231        );
232
233        let start_bit = idx * self.width();
234        let stop_bit = start_bit + self.width() - 1;
235
236        let start_u64 = start_bit / 64;
237        let stop_u64 = stop_bit / 64;
238
239        if start_u64 == stop_u64 {
240            // all in the same u64
241            let local_start_bit = start_bit % 64;
242            let local_stop_bit = local_start_bit + self.width();
243            let v = self.data[start_u64];
244
245            // zero out all the data at index `idx`
246            let mut mask = ((!0_u64) >> local_start_bit) << local_start_bit;
247            mask = (mask << (64 - local_stop_bit)) >> (64 - local_stop_bit);
248            mask = !mask;
249
250            let v = v & mask;
251
252            // now or the value into it
253            let x = x << local_start_bit;
254            let v = v | x;
255
256            self.data[start_u64] = v;
257        } else {
258            // bits are split between two cells
259            let local_start_bit = start_bit % 64;
260
261            // clear the bits currently in the cell
262            let bit_count_in_first = 64 - local_start_bit;
263            let v = self.data[start_u64];
264            let v = (v << bit_count_in_first) >> bit_count_in_first;
265            let prefix_bits = x << (64 - bit_count_in_first);
266            let v = v | prefix_bits;
267            self.data[start_u64] = v;
268
269            // clear the bits at the front of the next cell
270            let remaining_bit_count = self.width() - bit_count_in_first;
271            let v = self.data[stop_u64];
272            let v = (v >> remaining_bit_count) << remaining_bit_count;
273            let x = x >> bit_count_in_first;
274            let v = v | x;
275
276            self.data[stop_u64] = v;
277        }
278    }
279
280    /// Returns the value at the specified index. Panics if `idx` is out of
281    /// range.
282    ///
283    /// ```rust
284    /// use bitpack_vec::BitpackVec;
285    /// let mut bv = BitpackVec::new(5);
286    ///
287    /// bv.push(5);
288    ///
289    /// assert_eq!(bv.at(0), 5);
290    /// ```
291    pub fn at(&self, idx: usize) -> u64 {
292        assert!(
293            idx < self.len,
294            "index {} out of bounds for len {}",
295            idx,
296            self.len
297        );
298        let start_bit = idx * self.width();
299        let stop_bit = start_bit + self.width() - 1;
300
301        let start_u64 = start_bit / 64;
302        let stop_u64 = stop_bit / 64;
303
304        if start_u64 == stop_u64 {
305            // all in the same u64
306            let local_start_bit = start_bit % 64;
307            let local_stop_bit = local_start_bit + self.width();
308            let v = self.data[start_u64];
309
310            let v = v << (64 - local_stop_bit);
311            v >> (64 - self.width)
312        } else {
313            // bits are split between two cells
314            let mut x = 0;
315            let local_start_bit = start_bit % 64;
316            x |= self.data[start_u64] >> local_start_bit;
317            x |= self.data[stop_u64] << (64 - local_start_bit);
318            x = (x << (64 - self.width)) >> (64 - self.width);
319            x
320        }
321    }
322
323    /// Determines if the vector's length is 0 (empty)
324    ///
325    /// ```rust
326    /// use bitpack_vec::BitpackVec;
327    /// let mut bv = BitpackVec::new(5);
328    ///
329    /// assert!(bv.is_empty());
330    /// bv.push(5);
331    /// assert!(!bv.is_empty());
332    /// ```
333    pub fn is_empty(&self) -> bool {
334        self.len == 0
335    }
336
337    /// Removes and returns the last element in the vector. Returns `None`
338    /// is the vector is empty.
339    /// ```rust
340    /// use bitpack_vec::BitpackVec;
341    /// let mut bv = BitpackVec::new(5);
342    ///
343    /// bv.push(5);
344    /// bv.push(6);
345    ///
346    /// assert_eq!(bv.pop(), Some(6));
347    /// assert_eq!(bv.pop(), Some(5));
348    /// assert_eq!(bv.pop(), None);
349    /// ```
350    pub fn pop(&mut self) -> Option<u64> {
351        if self.is_empty() {
352            return None;
353        }
354
355        let idx = self.len() - 1;
356        let last = self.at(idx);
357
358        // zero out the value so our xor works later
359        let start_bit = idx * self.width();
360        let stop_bit = start_bit + self.width() - 1;
361
362        let start_u64 = start_bit / 64;
363        let stop_u64 = stop_bit / 64;
364
365        let local_start_bit = start_bit % 64;
366
367        if local_start_bit == 0 {
368            // this is the only value in the cell
369            self.data.pop();
370        } else {
371            // clear the data in the cell containing the start of the number
372            self.data[start_u64] =
373                (self.data[start_u64] << (64 - local_start_bit)) >> (64 - local_start_bit);
374
375            if start_u64 != stop_u64 {
376                // the last cell is now clear
377                self.data.pop();
378            }
379        }
380
381        self.len -= 1;
382
383        Some(last)
384    }
385
386    /// Truncates the vector to the given length.
387    /// ```rust
388    /// use bitpack_vec::BitpackVec;
389    /// let mut bv = BitpackVec::new(5);
390    ///
391    /// bv.push(5);
392    /// bv.push(6);
393    /// bv.push(7);
394    ///
395    /// assert_eq!(bv.len(), 3);
396    /// bv.truncate(1);
397    /// assert_eq!(bv.len(), 1);
398    /// assert_eq!(bv.at(0), 5);
399    /// ```
400    pub fn truncate(&mut self, len: usize) {
401        // TODO this should compute which entire cells can be dropped, and
402        // do that first
403        while self.len() > len {
404            self.pop();
405        }
406    }
407
408    /// Split the vector into two parts, so that `self` will contain all
409    /// elements from 0 to `idx` (exclusive), and the returned value will
410    /// contain all elements from `idx` to the end of the vector.
411    /// ```rust
412    /// use bitpack_vec::BitpackVec;
413    /// let mut bv = BitpackVec::new(5);
414    ///
415    /// bv.push(5);
416    /// bv.push(6);
417    /// bv.push(7);
418    /// bv.push(8);
419    ///
420    /// let bv_rest = bv.split_off(2);
421    ///
422    /// assert_eq!(bv.to_vec(), &[5, 6]);
423    /// assert_eq!(bv_rest.to_vec(), &[7, 8]);
424    /// ```
425    pub fn split_off(&mut self, idx: usize) -> BitpackVec {
426        assert!(
427            idx <= self.len(),
428            "split index ({}) should be <= len ({})",
429            idx,
430            self.len()
431        );
432        let mut rest = BitpackVec::new(self.width());
433
434        for i in idx..self.len() {
435            rest.push(self.at(i));
436        }
437
438        self.truncate(idx);
439        rest
440    }
441
442    /// Copies the bitpacked vector into a standard, non-packed vector of
443    /// `u64`s.
444    /// ```rust
445    /// use bitpack_vec::BitpackVec;
446    /// let mut bv = BitpackVec::new(5);
447    ///
448    /// bv.push(5);
449    /// bv.push(6);
450    /// bv.push(7);
451    /// bv.push(8);
452    ///
453    /// let v = bv.to_vec();
454    ///
455    /// assert_eq!(v, vec![5, 6, 7, 8]);
456    /// ```
457    pub fn to_vec(&self) -> Vec<u64> {
458        let mut r = Vec::with_capacity(self.len());
459
460        for i in 0..self.len() {
461            r.push(self.at(i))
462        }
463
464        r
465    }
466
467    /// Changes the width of the vector via copying (`O(n)`). Panics if any
468    /// element in the current vector will not fit in `new_width` bits.
469    ///
470    /// ```rust
471    /// use bitpack_vec::BitpackVec;
472    /// let mut bv = BitpackVec::new(5);
473    ///
474    ///
475    /// assert!(!bv.fits(34)); // doesn't fit
476    /// bv.change_width(6);
477    /// assert!(bv.fits(34)); // fits now
478    /// ```
479    pub fn change_width(&mut self, new_width: usize) {
480        if new_width == self.width() {
481            return;
482        }
483
484        let mut nv = BitpackVec::new(new_width);
485        for item in self.iter() {
486            nv.push(item);
487        }
488        *self = nv;
489    }
490
491    /// Allows iteration over the values in the bit vector.
492    /// ```rust
493    /// use bitpack_vec::BitpackVec;
494    /// let mut bv = BitpackVec::new(5);
495    ///
496    /// bv.push(5);
497    /// bv.push(6);
498    /// bv.push(7);
499    /// bv.push(8);
500    ///
501    /// let v: Vec<u64> = bv.iter().filter(|x| x % 2 == 0).collect();
502    /// assert_eq!(v, vec![6, 8]);
503    /// ```
504    pub fn iter(&self) -> BitpackIter {
505        BitpackIter { bv: self, curr: 0 }
506    }
507}
508
509/// Iterator over a `BitpackVec`
510pub struct BitpackIter<'a> {
511    bv: &'a BitpackVec,
512    curr: usize,
513}
514
515impl Iterator for BitpackIter<'_> {
516    type Item = u64;
517
518    fn next(&mut self) -> Option<Self::Item> {
519        if self.curr >= self.bv.len() {
520            return None;
521        }
522
523        let v = self.bv.at(self.curr);
524        self.curr += 1;
525
526        Some(v)
527    }
528}
529
530impl PartialEq for BitpackVec {
531    fn eq(&self, other: &Self) -> bool {
532        if self.width != other.width {
533            return false;
534        }
535
536        if self.len != other.len {
537            return false;
538        }
539
540        // TODO could use a faster, bitwise check here, but we can't just
541        // compare the underlying vecs because an element may have been
542        // removed from one, but the data left and unzero'd
543        for i in 0..self.len() {
544            if self.at(i) != other.at(i) {
545                return false;
546            }
547        }
548
549        true
550    }
551}
552
553impl Debug for BitpackVec {
554    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
555        f.write_str(&format!("BitpackVec (width: {}) [ ", self.width()))?;
556
557        for i in self.iter() {
558            f.write_str(&format!("{} ", i))?;
559        }
560
561        f.write_str("]\n")
562    }
563}
564
565impl Extend<u64> for BitpackVec {
566    fn extend<T: IntoIterator<Item = u64>>(&mut self, iter: T) {
567        for i in iter {
568            self.push(i);
569        }
570    }
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576
577    #[test]
578    pub fn test_simple_6bit() {
579        let mut v = BitpackVec::new(6);
580
581        for i in 0..64 {
582            v.push(i)
583        }
584
585        for i in 0..64 {
586            assert_eq!(v.at(i as usize), i);
587        }
588
589        assert_eq!(v.len(), 64);
590    }
591
592    #[test]
593    pub fn test_simple_5bit() {
594        let mut v = BitpackVec::new(5);
595
596        for i in 0..32 {
597            v.push(i)
598        }
599
600        for i in 0..32 {
601            assert_eq!(v.at(i as usize), i);
602        }
603
604        assert_eq!(v.len(), 32);
605    }
606
607    #[test]
608    pub fn test_perfect_pack_8bit() {
609        let mut v = BitpackVec::new(8);
610
611        v.push(10);
612        v.push(11);
613        v.push(12);
614        v.push(13);
615        v.push(14);
616        v.push(15);
617        v.push(16);
618        v.push(17);
619
620        assert_eq!(v.at(0), 10);
621        assert_eq!(v.at(1), 11);
622        assert_eq!(v.at(2), 12);
623        assert_eq!(v.at(3), 13);
624        assert_eq!(v.at(4), 14);
625        assert_eq!(v.at(5), 15);
626        assert_eq!(v.at(6), 16);
627        assert_eq!(v.at(7), 17);
628
629        assert_eq!(v.capacity(), 8);
630    }
631
632    #[test]
633    pub fn test_immut_n_bit() {
634        for bits in 1..13 {
635            let mut v = BitpackVec::new(bits);
636
637            for x in 0..2_u64.pow(bits as u32) {
638                v.push(x);
639            }
640
641            for i in 0..2_u64.pow(bits as u32) {
642                assert_eq!(v.at(i as usize), i);
643            }
644
645            assert_eq!(v.len(), 2_u64.pow(bits as u32) as usize);
646        }
647    }
648
649    #[test]
650    pub fn test_set() {
651        let mut v = BitpackVec::new(5);
652
653        for i in 0..16 {
654            v.push(i);
655        }
656
657        assert_eq!(v.at(5), 5);
658        v.set(5, 20);
659        assert_eq!(v.at(5), 20);
660        v.set(5, 5);
661        assert_eq!(v.at(5), 5);
662
663        v.set(12, 20);
664        assert_eq!(v.at(12), 20);
665
666        for i in 0..16 {
667            v.set(i as usize, i + 5);
668        }
669
670        for i in 0..16 {
671            assert_eq!(v.at(i as usize), i + 5);
672        }
673    }
674
675    #[test]
676    pub fn test_pop() {
677        let mut v = BitpackVec::new(11);
678        let mut rv = Vec::new();
679
680        for i in 0..2048 {
681            v.push(i);
682            rv.push(i);
683        }
684
685        while let Some(i) = rv.pop() {
686            assert_eq!(i, v.pop().unwrap());
687        }
688
689        assert!(v.pop().is_none());
690    }
691
692    #[test]
693    pub fn test_push_pop() {
694        let mut v = BitpackVec::new(11);
695        let mut rv = Vec::new();
696
697        for i in 0..2048 {
698            v.push(i);
699            rv.push(i);
700        }
701
702        for _ in 0..100 {
703            assert_eq!(v.pop(), rv.pop());
704        }
705
706        for i in 0..10_000 {
707            v.push(i % 2048);
708            rv.push(i % 2048);
709        }
710
711        while let Some(i) = rv.pop() {
712            assert_eq!(i, v.pop().unwrap());
713        }
714
715        assert!(v.pop().is_none());
716    }
717
718    #[test]
719    pub fn test_split_off() {
720        let mut v = Vec::new();
721        let mut bv = BitpackVec::new(7);
722
723        for i in 0..10_000 {
724            v.push(i % 128);
725            bv.push(i % 128);
726        }
727
728        assert_eq!(v, bv.to_vec());
729
730        let rv = v.split_off(500);
731        let rbv = bv.split_off(500);
732
733        assert_eq!(v, bv.to_vec());
734        assert_eq!(rv, rbv.to_vec());
735    }
736
737    #[test]
738    pub fn test_raw() {
739        let mut v = BitpackVec::new(7);
740
741        for i in 0..10_000 {
742            v.push(i % 128);
743        }
744
745        let data = v.into_raw_vec();
746
747        let nv = BitpackVec::from_raw_vec(data, 7, 10_000);
748
749        for i in 0..10_000 {
750            assert_eq!(nv.at(i), i as u64 % 128);
751        }
752    }
753
754    #[test]
755    pub fn test_iter() {
756        let mut v = BitpackVec::new(7);
757
758        for i in 0..10_000 {
759            v.push(i % 128);
760        }
761
762        let from_iter: Vec<u64> = v.iter().collect();
763        assert_eq!(v.to_vec(), from_iter);
764    }
765
766    #[test]
767    pub fn test_eq() {
768        let mut v = BitpackVec::new(7);
769        let mut v2 = BitpackVec::new(7);
770
771        for i in 0..10_000 {
772            v.push(i % 128);
773            v2.push(i % 128);
774        }
775
776        assert_eq!(v, v2);
777
778        v.push(7);
779        assert_ne!(v, v2);
780
781        v.pop();
782        assert_eq!(v, v2);
783
784        v.push(8);
785        v2.push(8);
786        assert_eq!(v, v2);
787    }
788
789    #[test]
790    pub fn test_from_slice() {
791        let v = vec![4, 19, 184, 18314, 62];
792        let bv = BitpackVec::from_slice(&v);
793
794        assert_eq!(bv.width(), 15);
795        assert_eq!(bv.to_vec(), v);
796    }
797
798    #[test]
799    pub fn test_change_width() {
800        let mut v = vec![1, 2, 3, 4];
801        let mut bv = BitpackVec::from_slice(&v);
802
803        v.push(66);
804
805        assert!(!bv.fits(66));
806        bv.change_width(7);
807        assert!(bv.fits(66));
808        bv.push(66);
809
810        assert_eq!(bv.to_vec(), v);
811    }
812
813    #[test]
814    #[should_panic]
815    fn test_does_not_fit_push() {
816        let mut bv = BitpackVec::new(5);
817        bv.push(32);
818    }
819
820    #[test]
821    #[should_panic]
822    fn test_does_not_fit_set() {
823        let mut bv = BitpackVec::new(5);
824        bv.push(18);
825        bv.set(0, 32);
826    }
827}