1use std::io;
21
22#[derive(Debug, Clone)]
27pub struct BitPackedInts {
28 data: Vec<u64>,
30 bits_per_value: u8,
32 count: usize,
34}
35
36impl BitPackedInts {
37 #[must_use]
39 pub fn pack(values: &[u64]) -> Self {
40 if values.is_empty() {
41 return Self {
42 data: Vec::new(),
43 bits_per_value: 0,
44 count: 0,
45 };
46 }
47
48 let max_value = values.iter().copied().max().unwrap_or(0);
49 let bits = Self::bits_needed(max_value);
50 Self::pack_with_bits(values, bits)
51 }
52
53 #[must_use]
59 pub fn pack_with_bits(values: &[u64], bits_per_value: u8) -> Self {
60 if values.is_empty() {
61 return Self {
62 data: Vec::new(),
63 bits_per_value,
64 count: 0,
65 };
66 }
67
68 if bits_per_value == 0 {
69 debug_assert!(values.iter().all(|&v| v == 0));
71 return Self {
72 data: Vec::new(),
73 bits_per_value: 0,
74 count: values.len(),
75 };
76 }
77
78 let bits = bits_per_value as usize;
79 let values_per_word = 64 / bits;
80 let num_words = (values.len() + values_per_word - 1) / values_per_word;
81
82 let mut data = vec![0u64; num_words];
83 let mask = if bits >= 64 {
84 u64::MAX
85 } else {
86 (1u64 << bits) - 1
87 };
88
89 for (i, &value) in values.iter().enumerate() {
90 debug_assert!(
91 value <= mask,
92 "Value {} doesn't fit in {} bits",
93 value,
94 bits_per_value
95 );
96
97 let word_idx = i / values_per_word;
98 let bit_offset = (i % values_per_word) * bits;
99 data[word_idx] |= (value & mask) << bit_offset;
100 }
101
102 Self {
103 data,
104 bits_per_value,
105 count: values.len(),
106 }
107 }
108
109 #[must_use]
111 pub fn unpack(&self) -> Vec<u64> {
112 if self.count == 0 {
113 return Vec::new();
114 }
115
116 if self.bits_per_value == 0 {
117 return vec![0u64; self.count];
118 }
119
120 let bits = self.bits_per_value as usize;
121 let values_per_word = 64 / bits;
122 let mask = if bits >= 64 {
123 u64::MAX
124 } else {
125 (1u64 << bits) - 1
126 };
127
128 let mut result = Vec::with_capacity(self.count);
129
130 for i in 0..self.count {
131 let word_idx = i / values_per_word;
132 let bit_offset = (i % values_per_word) * bits;
133 let value = (self.data[word_idx] >> bit_offset) & mask;
134 result.push(value);
135 }
136
137 result
138 }
139
140 #[must_use]
142 pub fn get(&self, index: usize) -> Option<u64> {
143 if index >= self.count {
144 return None;
145 }
146
147 if self.bits_per_value == 0 {
148 return Some(0);
149 }
150
151 let bits = self.bits_per_value as usize;
152 let values_per_word = 64 / bits;
153 let word_idx = index / values_per_word;
154 let bit_offset = (index % values_per_word) * bits;
155 let mask = if bits >= 64 {
156 u64::MAX
157 } else {
158 (1u64 << bits) - 1
159 };
160
161 Some((self.data[word_idx] >> bit_offset) & mask)
162 }
163
164 #[must_use]
166 pub fn len(&self) -> usize {
167 self.count
168 }
169
170 #[must_use]
172 pub fn is_empty(&self) -> bool {
173 self.count == 0
174 }
175
176 #[must_use]
178 pub fn bits_per_value(&self) -> u8 {
179 self.bits_per_value
180 }
181
182 #[must_use]
184 pub fn data(&self) -> &[u64] {
185 &self.data
186 }
187
188 #[must_use]
190 pub fn compression_ratio(&self) -> f64 {
191 if self.count == 0 {
192 return 1.0;
193 }
194
195 let original_size = self.count * 8; let packed_size = self.data.len() * 8;
197
198 if packed_size == 0 {
199 return f64::INFINITY; }
201
202 original_size as f64 / packed_size as f64
203 }
204
205 #[must_use]
207 pub fn bits_needed(value: u64) -> u8 {
208 if value == 0 {
209 1 } else {
211 64 - value.leading_zeros() as u8
212 }
213 }
214
215 pub fn to_bytes(&self) -> Vec<u8> {
217 let mut buf = Vec::with_capacity(1 + 4 + self.data.len() * 8);
218 buf.push(self.bits_per_value);
219 buf.extend_from_slice(&(self.count as u32).to_le_bytes());
220 for &word in &self.data {
221 buf.extend_from_slice(&word.to_le_bytes());
222 }
223 buf
224 }
225
226 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
228 if bytes.len() < 5 {
229 return Err(io::Error::new(
230 io::ErrorKind::InvalidData,
231 "BitPackedInts too short",
232 ));
233 }
234
235 let bits_per_value = bytes[0];
236 let count = u32::from_le_bytes(bytes[1..5].try_into().unwrap()) as usize;
237
238 let num_words = if bits_per_value == 0 || count == 0 {
239 0
240 } else {
241 let values_per_word = 64 / bits_per_value as usize;
242 (count + values_per_word - 1) / values_per_word
243 };
244
245 if bytes.len() < 5 + num_words * 8 {
246 return Err(io::Error::new(
247 io::ErrorKind::InvalidData,
248 "BitPackedInts truncated",
249 ));
250 }
251
252 let mut data = Vec::with_capacity(num_words);
253 for i in 0..num_words {
254 let offset = 5 + i * 8;
255 let word = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
256 data.push(word);
257 }
258
259 Ok(Self {
260 data,
261 bits_per_value,
262 count,
263 })
264 }
265}
266
267#[derive(Debug, Clone)]
272pub struct DeltaBitPacked {
273 base: u64,
275 deltas: BitPackedInts,
277}
278
279impl DeltaBitPacked {
280 #[must_use]
282 pub fn encode(values: &[u64]) -> Self {
283 if values.is_empty() {
284 return Self {
285 base: 0,
286 deltas: BitPackedInts::pack(&[]),
287 };
288 }
289
290 let base = values[0];
291 let delta_values: Vec<u64> = values
292 .windows(2)
293 .map(|w| w[1].saturating_sub(w[0]))
294 .collect();
295
296 let deltas = BitPackedInts::pack(&delta_values);
297
298 Self { base, deltas }
299 }
300
301 #[must_use]
303 pub fn decode(&self) -> Vec<u64> {
304 if self.deltas.is_empty() && self.base == 0 {
305 return Vec::new();
306 }
307
308 let delta_values = self.deltas.unpack();
309 let mut result = Vec::with_capacity(delta_values.len() + 1);
310 let mut current = self.base;
311 result.push(current);
312
313 for delta in delta_values {
314 current = current.wrapping_add(delta);
315 result.push(current);
316 }
317
318 result
319 }
320
321 #[must_use]
323 pub fn len(&self) -> usize {
324 if self.deltas.is_empty() && self.base == 0 {
325 0
326 } else {
327 self.deltas.len() + 1
328 }
329 }
330
331 #[must_use]
333 pub fn is_empty(&self) -> bool {
334 self.deltas.is_empty() && self.base == 0
335 }
336
337 #[must_use]
339 pub fn base(&self) -> u64 {
340 self.base
341 }
342
343 #[must_use]
345 pub fn bits_per_delta(&self) -> u8 {
346 self.deltas.bits_per_value()
347 }
348
349 #[must_use]
351 pub fn compression_ratio(&self) -> f64 {
352 let count = self.len();
353 if count == 0 {
354 return 1.0;
355 }
356
357 let original_size = count * 8;
358 let packed_size = 8 + self.deltas.data().len() * 8; original_size as f64 / packed_size as f64
361 }
362
363 pub fn to_bytes(&self) -> Vec<u8> {
365 let delta_bytes = self.deltas.to_bytes();
366 let mut buf = Vec::with_capacity(8 + delta_bytes.len());
367 buf.extend_from_slice(&self.base.to_le_bytes());
368 buf.extend_from_slice(&delta_bytes);
369 buf
370 }
371
372 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
374 if bytes.len() < 8 {
375 return Err(io::Error::new(
376 io::ErrorKind::InvalidData,
377 "DeltaBitPacked too short",
378 ));
379 }
380
381 let base = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
382 let deltas = BitPackedInts::from_bytes(&bytes[8..])?;
383
384 Ok(Self { base, deltas })
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391
392 #[test]
393 fn test_bitpack_basic() {
394 let values = vec![5u64, 2, 3, 5, 5, 8, 2];
395 let packed = BitPackedInts::pack(&values);
396 let unpacked = packed.unpack();
397 assert_eq!(values, unpacked);
398 }
399
400 #[test]
401 fn test_bitpack_empty() {
402 let values: Vec<u64> = vec![];
403 let packed = BitPackedInts::pack(&values);
404 assert!(packed.is_empty());
405 assert_eq!(packed.unpack(), values);
406 }
407
408 #[test]
409 fn test_bitpack_single() {
410 let values = vec![42u64];
411 let packed = BitPackedInts::pack(&values);
412 assert_eq!(packed.len(), 1);
413 assert_eq!(packed.unpack(), values);
414 }
415
416 #[test]
417 fn test_bitpack_all_zeros() {
418 let values = vec![0u64; 100];
419 let packed = BitPackedInts::pack(&values);
420 assert_eq!(packed.bits_per_value(), 1);
421 assert_eq!(packed.unpack(), values);
422 }
423
424 #[test]
425 fn test_bitpack_powers_of_two() {
426 for bits in 1..=64u8 {
427 let max_val = if bits == 64 {
428 u64::MAX
429 } else {
430 (1u64 << bits) - 1
431 };
432 let values = vec![0, max_val / 2, max_val];
433 let packed = BitPackedInts::pack(&values);
434 assert_eq!(packed.bits_per_value(), bits);
435 assert_eq!(packed.unpack(), values);
436 }
437 }
438
439 #[test]
440 fn test_bitpack_get() {
441 let values = vec![1u64, 2, 3, 4, 5];
442 let packed = BitPackedInts::pack(&values);
443
444 for (i, &expected) in values.iter().enumerate() {
445 assert_eq!(packed.get(i), Some(expected));
446 }
447 assert_eq!(packed.get(100), None);
448 }
449
450 #[test]
451 fn test_bitpack_compression() {
452 let values: Vec<u64> = (0..100).map(|i| i % 16).collect();
454 let packed = BitPackedInts::pack(&values);
455 assert_eq!(packed.bits_per_value(), 4);
456 let ratio = packed.compression_ratio();
458 assert!(ratio > 10.0, "Expected ratio > 10, got {}", ratio);
459 }
460
461 #[test]
462 fn test_bitpack_serialization() {
463 let values = vec![1u64, 3, 7, 15, 31];
464 let packed = BitPackedInts::pack(&values);
465 let bytes = packed.to_bytes();
466 let restored = BitPackedInts::from_bytes(&bytes).unwrap();
467 assert_eq!(packed.unpack(), restored.unpack());
468 }
469
470 #[test]
471 fn test_delta_bitpacked_basic() {
472 let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
473 let encoded = DeltaBitPacked::encode(&values);
474 let decoded = encoded.decode();
475 assert_eq!(values, decoded);
476 }
477
478 #[test]
479 fn test_delta_bitpacked_sequential() {
480 let values: Vec<u64> = (1000..1100).collect();
482 let encoded = DeltaBitPacked::encode(&values);
483 assert_eq!(encoded.bits_per_delta(), 1);
484 assert_eq!(encoded.decode(), values);
485
486 let ratio = encoded.compression_ratio();
488 assert!(ratio > 5.0, "Expected ratio > 5, got {}", ratio);
489 }
490
491 #[test]
492 fn test_delta_bitpacked_empty() {
493 let values: Vec<u64> = vec![];
494 let encoded = DeltaBitPacked::encode(&values);
495 assert!(encoded.is_empty());
496 assert_eq!(encoded.decode(), values);
497 }
498
499 #[test]
500 fn test_delta_bitpacked_single() {
501 let values = vec![42u64];
502 let encoded = DeltaBitPacked::encode(&values);
503 assert_eq!(encoded.len(), 1);
504 assert_eq!(encoded.decode(), values);
505 }
506
507 #[test]
508 fn test_delta_bitpacked_serialization() {
509 let values = vec![100u64, 105, 107, 110, 115];
510 let encoded = DeltaBitPacked::encode(&values);
511 let bytes = encoded.to_bytes();
512 let restored = DeltaBitPacked::from_bytes(&bytes).unwrap();
513 assert_eq!(encoded.decode(), restored.decode());
514 }
515
516 #[test]
517 fn test_bits_needed() {
518 assert_eq!(BitPackedInts::bits_needed(0), 1);
519 assert_eq!(BitPackedInts::bits_needed(1), 1);
520 assert_eq!(BitPackedInts::bits_needed(2), 2);
521 assert_eq!(BitPackedInts::bits_needed(3), 2);
522 assert_eq!(BitPackedInts::bits_needed(4), 3);
523 assert_eq!(BitPackedInts::bits_needed(7), 3);
524 assert_eq!(BitPackedInts::bits_needed(8), 4);
525 assert_eq!(BitPackedInts::bits_needed(255), 8);
526 assert_eq!(BitPackedInts::bits_needed(256), 9);
527 assert_eq!(BitPackedInts::bits_needed(u64::MAX), 64);
528 }
529}