summavy_bitpacker/
blocked_bitpacker.rs1use super::bitpacker::BitPacker;
2use super::compute_num_bits;
3use crate::{minmax, BitUnpacker};
4
5const BLOCK_SIZE: usize = 128;
6
7#[derive(Debug, Clone)]
10pub struct BlockedBitpacker {
11 compressed_blocks: Vec<u8>,
13 buffer: Vec<u64>,
15 offset_and_bits: Vec<BlockedBitpackerEntryMetaData>,
16}
17impl Default for BlockedBitpacker {
18 fn default() -> Self {
19 BlockedBitpacker::new()
20 }
21}
22
23#[derive(Debug, Clone, Default)]
30struct BlockedBitpackerEntryMetaData {
31 encoded: u64,
32 base_value: u64,
33}
34
35impl BlockedBitpackerEntryMetaData {
36 fn new(offset: u64, num_bits: u8, base_value: u64) -> Self {
37 let encoded = offset | (num_bits as u64) << (64 - 8);
38 Self {
39 encoded,
40 base_value,
41 }
42 }
43 fn offset(&self) -> u64 {
44 (self.encoded << 8) >> 8
45 }
46 fn num_bits(&self) -> u8 {
47 (self.encoded >> 56) as u8
48 }
49 fn base_value(&self) -> u64 {
50 self.base_value
51 }
52}
53
54#[test]
55fn metadata_test() {
56 let meta = BlockedBitpackerEntryMetaData::new(50000, 6, 40000);
57 assert_eq!(meta.offset(), 50000);
58 assert_eq!(meta.num_bits(), 6);
59}
60
61fn mem_usage<T>(items: &Vec<T>) -> usize {
62 items.capacity() * std::mem::size_of::<T>()
63}
64
65impl BlockedBitpacker {
66 pub fn new() -> Self {
67 let mut compressed_blocks = vec![];
68 compressed_blocks.resize(8, 0);
69 Self {
70 compressed_blocks,
71 buffer: vec![],
72 offset_and_bits: vec![],
73 }
74 }
75
76 pub fn mem_usage(&self) -> usize {
78 std::mem::size_of::<BlockedBitpacker>()
79 + self.compressed_blocks.capacity()
80 + mem_usage(&self.offset_and_bits)
81 + mem_usage(&self.buffer)
82 }
83
84 #[inline]
85 pub fn add(&mut self, val: u64) {
86 self.buffer.push(val);
87 if self.buffer.len() == BLOCK_SIZE {
88 self.flush();
89 }
90 }
91
92 pub fn flush(&mut self) {
93 if let Some((min_value, max_value)) = minmax(self.buffer.iter()) {
94 let mut bit_packer = BitPacker::new();
95 let num_bits_block = compute_num_bits(*max_value - min_value);
96 self.compressed_blocks
99 .resize(self.compressed_blocks.len() - 8, 0); let offset = self.compressed_blocks.len() as u64;
101 for val in self.buffer.iter() {
106 bit_packer
107 .write(
108 *val - min_value,
109 num_bits_block,
110 &mut self.compressed_blocks,
111 )
112 .expect("cannot write bitpacking to output"); }
114 bit_packer.flush(&mut self.compressed_blocks).unwrap();
115 self.offset_and_bits
116 .push(BlockedBitpackerEntryMetaData::new(
117 offset,
118 num_bits_block,
119 *min_value,
120 ));
121
122 self.buffer.clear();
123 self.compressed_blocks
124 .resize(self.compressed_blocks.len() + 8, 0); }
126 }
127 #[inline]
128 pub fn get(&self, idx: usize) -> u64 {
129 let metadata_pos = idx / BLOCK_SIZE;
130 let pos_in_block = idx % BLOCK_SIZE;
131 if let Some(metadata) = self.offset_and_bits.get(metadata_pos) {
132 let unpacked = BitUnpacker::new(metadata.num_bits()).get(
133 pos_in_block as u32,
134 &self.compressed_blocks[metadata.offset() as usize..],
135 );
136 unpacked + metadata.base_value()
137 } else {
138 self.buffer[pos_in_block]
139 }
140 }
141
142 pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
143 let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE;
145 let iter = (0..bitpacked_elems)
146 .map(move |idx| self.get(idx))
147 .chain(self.buffer.iter().cloned());
148 iter
149 }
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155 #[test]
156 fn blocked_bitpacker_empty() {
157 let blocked_bitpacker = BlockedBitpacker::new();
158 assert_eq!(blocked_bitpacker.iter().collect::<Vec<u64>>(), vec![]);
159 }
160 #[test]
161 fn blocked_bitpacker_one() {
162 let mut blocked_bitpacker = BlockedBitpacker::new();
163 blocked_bitpacker.add(50000);
164 assert_eq!(blocked_bitpacker.get(0), 50000);
165 assert_eq!(blocked_bitpacker.iter().collect::<Vec<u64>>(), vec![50000]);
166 }
167 #[test]
168 fn blocked_bitpacker_test() {
169 let mut blocked_bitpacker = BlockedBitpacker::new();
170 for val in 0..21500 {
171 blocked_bitpacker.add(val);
172 }
173 for val in 0..21500 {
174 assert_eq!(blocked_bitpacker.get(val as usize), val);
175 }
176 assert_eq!(blocked_bitpacker.iter().count(), 21500);
177 assert_eq!(blocked_bitpacker.iter().last().unwrap(), 21499);
178 }
179}