tantivy_bitpacker/
blocked_bitpacker.rs1use super::bitpacker::BitPacker;
2use super::compute_num_bits;
3use crate::{minmax, BitUnpacker};
4
5const BLOCK_SIZE: usize = 128;
6
7#[derive(Debug, Clone)]
10pub struct BlockedBitpacker {
11 compressed_blocks: Vec<u8>,
13 buffer: Vec<u64>,
15 offset_and_bits: Vec<BlockedBitpackerEntryMetaData>,
16}
17impl Default for BlockedBitpacker {
18 fn default() -> Self {
19 BlockedBitpacker::new()
20 }
21}
22
23#[derive(Debug, Clone, Default)]
30struct BlockedBitpackerEntryMetaData {
31 encoded: u64,
32 base_value: u64,
33}
34
35impl BlockedBitpackerEntryMetaData {
36 fn new(offset: u64, num_bits: u8, base_value: u64) -> Self {
37 let encoded = offset | (num_bits as u64) << (64 - 8);
38 Self {
39 encoded,
40 base_value,
41 }
42 }
43 fn offset(&self) -> u64 {
44 (self.encoded << 8) >> 8
45 }
46 fn num_bits(&self) -> u8 {
47 (self.encoded >> 56) as u8
48 }
49 fn base_value(&self) -> u64 {
50 self.base_value
51 }
52}
53
54#[test]
55fn metadata_test() {
56 let meta = BlockedBitpackerEntryMetaData::new(50000, 6, 40000);
57 assert_eq!(meta.offset(), 50000);
58 assert_eq!(meta.num_bits(), 6);
59}
60
61fn mem_usage<T>(items: &Vec<T>) -> usize {
62 items.capacity() * std::mem::size_of::<T>()
63}
64
65impl BlockedBitpacker {
66 pub fn new() -> Self {
67 Self {
68 compressed_blocks: vec![0; 8],
69 buffer: vec![],
70 offset_and_bits: vec![],
71 }
72 }
73
74 pub fn mem_usage(&self) -> usize {
76 std::mem::size_of::<BlockedBitpacker>()
77 + self.compressed_blocks.capacity()
78 + mem_usage(&self.offset_and_bits)
79 + mem_usage(&self.buffer)
80 }
81
82 #[inline]
83 pub fn add(&mut self, val: u64) {
84 self.buffer.push(val);
85 if self.buffer.len() == BLOCK_SIZE {
86 self.flush();
87 }
88 }
89
90 pub fn flush(&mut self) {
91 if let Some((min_value, max_value)) = minmax(self.buffer.iter()) {
92 let mut bit_packer = BitPacker::new();
93 let num_bits_block = compute_num_bits(*max_value - min_value);
94 self.compressed_blocks
97 .resize(self.compressed_blocks.len() - 8, 0); let offset = self.compressed_blocks.len() as u64;
99 for val in self.buffer.iter() {
104 bit_packer
105 .write(
106 *val - min_value,
107 num_bits_block,
108 &mut self.compressed_blocks,
109 )
110 .expect("cannot write bitpacking to output"); }
112 bit_packer.flush(&mut self.compressed_blocks).unwrap();
113 self.offset_and_bits
114 .push(BlockedBitpackerEntryMetaData::new(
115 offset,
116 num_bits_block,
117 *min_value,
118 ));
119
120 self.buffer.clear();
121 self.compressed_blocks
122 .resize(self.compressed_blocks.len() + 8, 0); }
124 }
125 #[inline]
126 pub fn get(&self, idx: usize) -> u64 {
127 let metadata_pos = idx / BLOCK_SIZE;
128 let pos_in_block = idx % BLOCK_SIZE;
129 if let Some(metadata) = self.offset_and_bits.get(metadata_pos) {
130 let unpacked = BitUnpacker::new(metadata.num_bits()).get(
131 pos_in_block as u32,
132 &self.compressed_blocks[metadata.offset() as usize..],
133 );
134 unpacked + metadata.base_value()
135 } else {
136 self.buffer[pos_in_block]
137 }
138 }
139
140 pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
141 let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE;
143 let iter = (0..bitpacked_elems)
144 .map(move |idx| self.get(idx))
145 .chain(self.buffer.iter().cloned());
146 iter
147 }
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153 #[test]
154 fn blocked_bitpacker_empty() {
155 let blocked_bitpacker = BlockedBitpacker::new();
156 assert_eq!(blocked_bitpacker.iter().collect::<Vec<u64>>(), vec![]);
157 }
158 #[test]
159 fn blocked_bitpacker_one() {
160 let mut blocked_bitpacker = BlockedBitpacker::new();
161 blocked_bitpacker.add(50000);
162 assert_eq!(blocked_bitpacker.get(0), 50000);
163 assert_eq!(blocked_bitpacker.iter().collect::<Vec<u64>>(), vec![50000]);
164 }
165 #[test]
166 fn blocked_bitpacker_test() {
167 let mut blocked_bitpacker = BlockedBitpacker::new();
168 for val in 0..21500 {
169 blocked_bitpacker.add(val);
170 }
171 for val in 0..21500 {
172 assert_eq!(blocked_bitpacker.get(val as usize), val);
173 }
174 assert_eq!(blocked_bitpacker.iter().count(), 21500);
175 assert_eq!(blocked_bitpacker.iter().last().unwrap(), 21499);
176 }
177}