1#![doc = include_str!("../README.md")]
2
3use std::fmt::Debug;
4
5use deepsize::DeepSizeOf;
6use serde::{Deserialize, Serialize};
7
8#[derive(DeepSizeOf, Serialize, Deserialize)]
10pub struct BitpackVec {
11 data: Vec<u64>,
12 len: usize,
13 width: u8,
14}
15
16impl BitpackVec {
17 pub fn new(width: usize) -> BitpackVec {
25 assert!(width > 0, "bitpack width must be greater than 0");
26 assert!(
27 width <= 64,
28 "bitpack width must be less than or equal to 64"
29 );
30
31 BitpackVec {
32 data: Vec::with_capacity(1),
33 len: 0,
34 width: width as u8,
35 }
36 }
37
38 pub fn from_raw_vec(data: Vec<u64>, width: usize, len: usize) -> BitpackVec {
50 assert!(
51 data.len() * 64 > width * len,
52 "data is not long enough to be valid"
53 );
54 BitpackVec {
55 data,
56 width: width as u8,
57 len,
58 }
59 }
60
61 pub fn into_raw_vec(self) -> Vec<u64> {
72 self.data
73 }
74
75 pub fn as_raw(&self) -> &[u64] {
77 &self.data
78 }
79
80 pub fn from_slice(x: &[u64]) -> BitpackVec {
90 assert!(
91 !x.is_empty(),
92 "Cannot make bitpacked vector from empty slice"
93 );
94
95 let max = x.iter().max().unwrap();
97 let bits = 64 - max.leading_zeros();
98
99 let mut bv = BitpackVec::new(bits as usize);
100
101 for i in x {
102 bv.push(*i);
103 }
104
105 bv
106 }
107
108 pub fn len(&self) -> usize {
115 self.len
116 }
117
118 pub fn width(&self) -> usize {
127 self.width as usize
128 }
129
130 pub fn capacity(&self) -> usize {
133 (self.data.capacity() * 64) / self.width()
134 }
135
136 pub fn fits(&self, x: u64) -> bool {
147 x < 2_u64.pow(self.width as u32)
148 }
149
150 pub fn push(&mut self, x: u64) {
172 assert!(
173 self.fits(x),
174 "value {} too large to bitpack to width {}",
175 x,
176 self.width
177 );
178
179 let start_bit = self.len * self.width();
181 let stop_bit = start_bit + self.width() - 1;
182
183 let start_u64 = start_bit / 64;
184 let stop_u64 = stop_bit / 64;
185
186 while self.data.len() <= stop_u64 {
188 self.data.push(0);
189 }
190
191 let local_start_bit = start_bit % 64;
192 if start_u64 == stop_u64 {
193 self.data[start_u64] |= x << local_start_bit;
195 } else {
196 let bits_in_first_cell = 64 - local_start_bit;
199 self.data[start_u64] |= x << local_start_bit;
200 self.data[stop_u64] |= x >> bits_in_first_cell;
201 }
202
203 self.len += 1;
204 }
205
206 pub fn set(&mut self, idx: usize, x: u64) {
220 assert!(
221 idx < self.len,
222 "index {} out of bounds for len {}",
223 idx,
224 self.len
225 );
226 assert!(
227 self.fits(x),
228 "value {} too large to bitpack to width {}",
229 x,
230 self.width
231 );
232
233 let start_bit = idx * self.width();
234 let stop_bit = start_bit + self.width() - 1;
235
236 let start_u64 = start_bit / 64;
237 let stop_u64 = stop_bit / 64;
238
239 if start_u64 == stop_u64 {
240 let local_start_bit = start_bit % 64;
242 let local_stop_bit = local_start_bit + self.width();
243 let v = self.data[start_u64];
244
245 let mut mask = ((!0_u64) >> local_start_bit) << local_start_bit;
247 mask = (mask << (64 - local_stop_bit)) >> (64 - local_stop_bit);
248 mask = !mask;
249
250 let v = v & mask;
251
252 let x = x << local_start_bit;
254 let v = v | x;
255
256 self.data[start_u64] = v;
257 } else {
258 let local_start_bit = start_bit % 64;
260
261 let bit_count_in_first = 64 - local_start_bit;
263 let v = self.data[start_u64];
264 let v = (v << bit_count_in_first) >> bit_count_in_first;
265 let prefix_bits = x << (64 - bit_count_in_first);
266 let v = v | prefix_bits;
267 self.data[start_u64] = v;
268
269 let remaining_bit_count = self.width() - bit_count_in_first;
271 let v = self.data[stop_u64];
272 let v = (v >> remaining_bit_count) << remaining_bit_count;
273 let x = x >> bit_count_in_first;
274 let v = v | x;
275
276 self.data[stop_u64] = v;
277 }
278 }
279
280 pub fn at(&self, idx: usize) -> u64 {
292 assert!(
293 idx < self.len,
294 "index {} out of bounds for len {}",
295 idx,
296 self.len
297 );
298 let start_bit = idx * self.width();
299 let stop_bit = start_bit + self.width() - 1;
300
301 let start_u64 = start_bit / 64;
302 let stop_u64 = stop_bit / 64;
303
304 if start_u64 == stop_u64 {
305 let local_start_bit = start_bit % 64;
307 let local_stop_bit = local_start_bit + self.width();
308 let v = self.data[start_u64];
309
310 let v = v << (64 - local_stop_bit);
311 v >> (64 - self.width)
312 } else {
313 let mut x = 0;
315 let local_start_bit = start_bit % 64;
316 x |= self.data[start_u64] >> local_start_bit;
317 x |= self.data[stop_u64] << (64 - local_start_bit);
318 x = (x << (64 - self.width)) >> (64 - self.width);
319 x
320 }
321 }
322
323 pub fn is_empty(&self) -> bool {
334 self.len == 0
335 }
336
337 pub fn pop(&mut self) -> Option<u64> {
351 if self.is_empty() {
352 return None;
353 }
354
355 let idx = self.len() - 1;
356 let last = self.at(idx);
357
358 let start_bit = idx * self.width();
360 let stop_bit = start_bit + self.width() - 1;
361
362 let start_u64 = start_bit / 64;
363 let stop_u64 = stop_bit / 64;
364
365 let local_start_bit = start_bit % 64;
366
367 if local_start_bit == 0 {
368 self.data.pop();
370 } else {
371 self.data[start_u64] =
373 (self.data[start_u64] << (64 - local_start_bit)) >> (64 - local_start_bit);
374
375 if start_u64 != stop_u64 {
376 self.data.pop();
378 }
379 }
380
381 self.len -= 1;
382
383 Some(last)
384 }
385
386 pub fn truncate(&mut self, len: usize) {
401 while self.len() > len {
404 self.pop();
405 }
406 }
407
408 pub fn split_off(&mut self, idx: usize) -> BitpackVec {
426 assert!(
427 idx <= self.len(),
428 "split index ({}) should be <= len ({})",
429 idx,
430 self.len()
431 );
432 let mut rest = BitpackVec::new(self.width());
433
434 for i in idx..self.len() {
435 rest.push(self.at(i));
436 }
437
438 self.truncate(idx);
439 rest
440 }
441
442 pub fn to_vec(&self) -> Vec<u64> {
458 let mut r = Vec::with_capacity(self.len());
459
460 for i in 0..self.len() {
461 r.push(self.at(i))
462 }
463
464 r
465 }
466
467 pub fn change_width(&mut self, new_width: usize) {
480 if new_width == self.width() {
481 return;
482 }
483
484 let mut nv = BitpackVec::new(new_width);
485 for item in self.iter() {
486 nv.push(item);
487 }
488 *self = nv;
489 }
490
491 pub fn iter(&self) -> BitpackIter {
505 BitpackIter { bv: self, curr: 0 }
506 }
507}
508
509pub struct BitpackIter<'a> {
511 bv: &'a BitpackVec,
512 curr: usize,
513}
514
515impl Iterator for BitpackIter<'_> {
516 type Item = u64;
517
518 fn next(&mut self) -> Option<Self::Item> {
519 if self.curr >= self.bv.len() {
520 return None;
521 }
522
523 let v = self.bv.at(self.curr);
524 self.curr += 1;
525
526 Some(v)
527 }
528}
529
530impl PartialEq for BitpackVec {
531 fn eq(&self, other: &Self) -> bool {
532 if self.width != other.width {
533 return false;
534 }
535
536 if self.len != other.len {
537 return false;
538 }
539
540 for i in 0..self.len() {
544 if self.at(i) != other.at(i) {
545 return false;
546 }
547 }
548
549 true
550 }
551}
552
553impl Debug for BitpackVec {
554 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
555 f.write_str(&format!("BitpackVec (width: {}) [ ", self.width()))?;
556
557 for i in self.iter() {
558 f.write_str(&format!("{} ", i))?;
559 }
560
561 f.write_str("]\n")
562 }
563}
564
565impl Extend<u64> for BitpackVec {
566 fn extend<T: IntoIterator<Item = u64>>(&mut self, iter: T) {
567 for i in iter {
568 self.push(i);
569 }
570 }
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576
577 #[test]
578 pub fn test_simple_6bit() {
579 let mut v = BitpackVec::new(6);
580
581 for i in 0..64 {
582 v.push(i)
583 }
584
585 for i in 0..64 {
586 assert_eq!(v.at(i as usize), i);
587 }
588
589 assert_eq!(v.len(), 64);
590 }
591
592 #[test]
593 pub fn test_simple_5bit() {
594 let mut v = BitpackVec::new(5);
595
596 for i in 0..32 {
597 v.push(i)
598 }
599
600 for i in 0..32 {
601 assert_eq!(v.at(i as usize), i);
602 }
603
604 assert_eq!(v.len(), 32);
605 }
606
607 #[test]
608 pub fn test_perfect_pack_8bit() {
609 let mut v = BitpackVec::new(8);
610
611 v.push(10);
612 v.push(11);
613 v.push(12);
614 v.push(13);
615 v.push(14);
616 v.push(15);
617 v.push(16);
618 v.push(17);
619
620 assert_eq!(v.at(0), 10);
621 assert_eq!(v.at(1), 11);
622 assert_eq!(v.at(2), 12);
623 assert_eq!(v.at(3), 13);
624 assert_eq!(v.at(4), 14);
625 assert_eq!(v.at(5), 15);
626 assert_eq!(v.at(6), 16);
627 assert_eq!(v.at(7), 17);
628
629 assert_eq!(v.capacity(), 8);
630 }
631
632 #[test]
633 pub fn test_immut_n_bit() {
634 for bits in 1..13 {
635 let mut v = BitpackVec::new(bits);
636
637 for x in 0..2_u64.pow(bits as u32) {
638 v.push(x);
639 }
640
641 for i in 0..2_u64.pow(bits as u32) {
642 assert_eq!(v.at(i as usize), i);
643 }
644
645 assert_eq!(v.len(), 2_u64.pow(bits as u32) as usize);
646 }
647 }
648
649 #[test]
650 pub fn test_set() {
651 let mut v = BitpackVec::new(5);
652
653 for i in 0..16 {
654 v.push(i);
655 }
656
657 assert_eq!(v.at(5), 5);
658 v.set(5, 20);
659 assert_eq!(v.at(5), 20);
660 v.set(5, 5);
661 assert_eq!(v.at(5), 5);
662
663 v.set(12, 20);
664 assert_eq!(v.at(12), 20);
665
666 for i in 0..16 {
667 v.set(i as usize, i + 5);
668 }
669
670 for i in 0..16 {
671 assert_eq!(v.at(i as usize), i + 5);
672 }
673 }
674
675 #[test]
676 pub fn test_pop() {
677 let mut v = BitpackVec::new(11);
678 let mut rv = Vec::new();
679
680 for i in 0..2048 {
681 v.push(i);
682 rv.push(i);
683 }
684
685 while let Some(i) = rv.pop() {
686 assert_eq!(i, v.pop().unwrap());
687 }
688
689 assert!(v.pop().is_none());
690 }
691
692 #[test]
693 pub fn test_push_pop() {
694 let mut v = BitpackVec::new(11);
695 let mut rv = Vec::new();
696
697 for i in 0..2048 {
698 v.push(i);
699 rv.push(i);
700 }
701
702 for _ in 0..100 {
703 assert_eq!(v.pop(), rv.pop());
704 }
705
706 for i in 0..10_000 {
707 v.push(i % 2048);
708 rv.push(i % 2048);
709 }
710
711 while let Some(i) = rv.pop() {
712 assert_eq!(i, v.pop().unwrap());
713 }
714
715 assert!(v.pop().is_none());
716 }
717
718 #[test]
719 pub fn test_split_off() {
720 let mut v = Vec::new();
721 let mut bv = BitpackVec::new(7);
722
723 for i in 0..10_000 {
724 v.push(i % 128);
725 bv.push(i % 128);
726 }
727
728 assert_eq!(v, bv.to_vec());
729
730 let rv = v.split_off(500);
731 let rbv = bv.split_off(500);
732
733 assert_eq!(v, bv.to_vec());
734 assert_eq!(rv, rbv.to_vec());
735 }
736
737 #[test]
738 pub fn test_raw() {
739 let mut v = BitpackVec::new(7);
740
741 for i in 0..10_000 {
742 v.push(i % 128);
743 }
744
745 let data = v.into_raw_vec();
746
747 let nv = BitpackVec::from_raw_vec(data, 7, 10_000);
748
749 for i in 0..10_000 {
750 assert_eq!(nv.at(i), i as u64 % 128);
751 }
752 }
753
754 #[test]
755 pub fn test_iter() {
756 let mut v = BitpackVec::new(7);
757
758 for i in 0..10_000 {
759 v.push(i % 128);
760 }
761
762 let from_iter: Vec<u64> = v.iter().collect();
763 assert_eq!(v.to_vec(), from_iter);
764 }
765
766 #[test]
767 pub fn test_eq() {
768 let mut v = BitpackVec::new(7);
769 let mut v2 = BitpackVec::new(7);
770
771 for i in 0..10_000 {
772 v.push(i % 128);
773 v2.push(i % 128);
774 }
775
776 assert_eq!(v, v2);
777
778 v.push(7);
779 assert_ne!(v, v2);
780
781 v.pop();
782 assert_eq!(v, v2);
783
784 v.push(8);
785 v2.push(8);
786 assert_eq!(v, v2);
787 }
788
789 #[test]
790 pub fn test_from_slice() {
791 let v = vec![4, 19, 184, 18314, 62];
792 let bv = BitpackVec::from_slice(&v);
793
794 assert_eq!(bv.width(), 15);
795 assert_eq!(bv.to_vec(), v);
796 }
797
798 #[test]
799 pub fn test_change_width() {
800 let mut v = vec![1, 2, 3, 4];
801 let mut bv = BitpackVec::from_slice(&v);
802
803 v.push(66);
804
805 assert!(!bv.fits(66));
806 bv.change_width(7);
807 assert!(bv.fits(66));
808 bv.push(66);
809
810 assert_eq!(bv.to_vec(), v);
811 }
812
813 #[test]
814 #[should_panic]
815 fn test_does_not_fit_push() {
816 let mut bv = BitpackVec::new(5);
817 bv.push(32);
818 }
819
820 #[test]
821 #[should_panic]
822 fn test_does_not_fit_set() {
823 let mut bv = BitpackVec::new(5);
824 bv.push(18);
825 bv.set(0, 32);
826 }
827}