1use std::io;
24
25#[derive(Debug, Clone)]
30pub struct BitPackedInts {
31 data: Vec<u64>,
33 bits_per_value: u8,
35 count: usize,
37}
38
39impl BitPackedInts {
40 #[must_use]
42 pub fn pack(values: &[u64]) -> Self {
43 if values.is_empty() {
44 return Self {
45 data: Vec::new(),
46 bits_per_value: 0,
47 count: 0,
48 };
49 }
50
51 let max_value = values.iter().copied().max().unwrap_or(0);
52 let bits = Self::bits_needed(max_value);
53 Self::pack_with_bits(values, bits)
54 }
55
56 #[must_use]
62 pub fn pack_with_bits(values: &[u64], bits_per_value: u8) -> Self {
63 if values.is_empty() {
64 return Self {
65 data: Vec::new(),
66 bits_per_value,
67 count: 0,
68 };
69 }
70
71 if bits_per_value == 0 {
72 debug_assert!(values.iter().all(|&v| v == 0));
74 return Self {
75 data: Vec::new(),
76 bits_per_value: 0,
77 count: values.len(),
78 };
79 }
80
81 let bits = bits_per_value as usize;
82 let values_per_word = 64 / bits;
83 let num_words = (values.len() + values_per_word - 1) / values_per_word;
84
85 let mut data = vec![0u64; num_words];
86 let mask = if bits >= 64 {
87 u64::MAX
88 } else {
89 (1u64 << bits) - 1
90 };
91
92 for (i, &value) in values.iter().enumerate() {
93 debug_assert!(
94 value <= mask,
95 "Value {} doesn't fit in {} bits",
96 value,
97 bits_per_value
98 );
99
100 let word_idx = i / values_per_word;
101 let bit_offset = (i % values_per_word) * bits;
102 data[word_idx] |= (value & mask) << bit_offset;
103 }
104
105 Self {
106 data,
107 bits_per_value,
108 count: values.len(),
109 }
110 }
111
112 #[must_use]
114 pub fn unpack(&self) -> Vec<u64> {
115 if self.count == 0 {
116 return Vec::new();
117 }
118
119 if self.bits_per_value == 0 {
120 return vec![0u64; self.count];
121 }
122
123 let bits = self.bits_per_value as usize;
124 let values_per_word = 64 / bits;
125 let mask = if bits >= 64 {
126 u64::MAX
127 } else {
128 (1u64 << bits) - 1
129 };
130
131 let mut result = Vec::with_capacity(self.count);
132
133 for i in 0..self.count {
134 let word_idx = i / values_per_word;
135 let bit_offset = (i % values_per_word) * bits;
136 let value = (self.data[word_idx] >> bit_offset) & mask;
137 result.push(value);
138 }
139
140 result
141 }
142
143 #[must_use]
145 pub fn get(&self, index: usize) -> Option<u64> {
146 if index >= self.count {
147 return None;
148 }
149
150 if self.bits_per_value == 0 {
151 return Some(0);
152 }
153
154 let bits = self.bits_per_value as usize;
155 let values_per_word = 64 / bits;
156 let word_idx = index / values_per_word;
157 let bit_offset = (index % values_per_word) * bits;
158 let mask = if bits >= 64 {
159 u64::MAX
160 } else {
161 (1u64 << bits) - 1
162 };
163
164 Some((self.data[word_idx] >> bit_offset) & mask)
165 }
166
167 #[must_use]
169 pub fn len(&self) -> usize {
170 self.count
171 }
172
173 #[must_use]
175 pub fn is_empty(&self) -> bool {
176 self.count == 0
177 }
178
179 #[must_use]
181 pub fn bits_per_value(&self) -> u8 {
182 self.bits_per_value
183 }
184
185 #[must_use]
187 pub fn data(&self) -> &[u64] {
188 &self.data
189 }
190
191 #[must_use]
193 pub fn compression_ratio(&self) -> f64 {
194 if self.count == 0 {
195 return 1.0;
196 }
197
198 let original_size = self.count * 8; let packed_size = self.data.len() * 8;
200
201 if packed_size == 0 {
202 return f64::INFINITY; }
204
205 original_size as f64 / packed_size as f64
206 }
207
208 #[must_use]
210 pub fn bits_needed(value: u64) -> u8 {
211 if value == 0 {
212 1 } else {
214 64 - value.leading_zeros() as u8
215 }
216 }
217
218 pub fn to_bytes(&self) -> Vec<u8> {
220 let mut buf = Vec::with_capacity(1 + 4 + self.data.len() * 8);
221 buf.push(self.bits_per_value);
222 buf.extend_from_slice(&(self.count as u32).to_le_bytes());
223 for &word in &self.data {
224 buf.extend_from_slice(&word.to_le_bytes());
225 }
226 buf
227 }
228
229 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
235 if bytes.len() < 5 {
236 return Err(io::Error::new(
237 io::ErrorKind::InvalidData,
238 "BitPackedInts too short",
239 ));
240 }
241
242 let bits_per_value = bytes[0];
243 if bits_per_value > 64 {
244 return Err(io::Error::new(
245 io::ErrorKind::InvalidData,
246 format!("BitPackedInts bits_per_value {bits_per_value} exceeds 64"),
247 ));
248 }
249 let count = u32::from_le_bytes(
250 bytes[1..5]
251 .try_into()
252 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
253 ) as usize;
254
255 let num_words = if bits_per_value == 0 || count == 0 {
256 0
257 } else {
258 let values_per_word = 64 / bits_per_value as usize;
259 (count + values_per_word - 1) / values_per_word
260 };
261
262 if bytes.len() < 5 + num_words * 8 {
263 return Err(io::Error::new(
264 io::ErrorKind::InvalidData,
265 "BitPackedInts truncated",
266 ));
267 }
268
269 let mut data = Vec::with_capacity(num_words);
270 for i in 0..num_words {
271 let offset = 5 + i * 8;
272 let word = u64::from_le_bytes(
273 bytes[offset..offset + 8]
274 .try_into()
275 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
276 );
277 data.push(word);
278 }
279
280 Ok(Self {
281 data,
282 bits_per_value,
283 count,
284 })
285 }
286}
287
288#[derive(Debug, Clone)]
294pub struct DeltaBitPacked {
295 base: u64,
297 deltas: BitPackedInts,
299}
300
301impl DeltaBitPacked {
302 #[must_use]
304 pub fn encode(values: &[u64]) -> Self {
305 if values.is_empty() {
306 return Self {
307 base: 0,
308 deltas: BitPackedInts::pack(&[]),
309 };
310 }
311
312 let base = values[0];
313 let delta_values: Vec<u64> = values
314 .windows(2)
315 .map(|w| w[1].saturating_sub(w[0]))
316 .collect();
317
318 let deltas = BitPackedInts::pack(&delta_values);
319
320 Self { base, deltas }
321 }
322
323 #[must_use]
325 pub fn decode(&self) -> Vec<u64> {
326 if self.deltas.is_empty() && self.base == 0 {
327 return Vec::new();
328 }
329
330 let delta_values = self.deltas.unpack();
331 let mut result = Vec::with_capacity(delta_values.len() + 1);
332 let mut current = self.base;
333 result.push(current);
334
335 for delta in delta_values {
336 current = current.wrapping_add(delta);
337 result.push(current);
338 }
339
340 result
341 }
342
343 #[must_use]
345 pub fn len(&self) -> usize {
346 if self.deltas.is_empty() && self.base == 0 {
347 0
348 } else {
349 self.deltas.len() + 1
350 }
351 }
352
353 #[must_use]
355 pub fn is_empty(&self) -> bool {
356 self.deltas.is_empty() && self.base == 0
357 }
358
359 #[must_use]
361 pub fn base(&self) -> u64 {
362 self.base
363 }
364
365 #[must_use]
367 pub fn bits_per_delta(&self) -> u8 {
368 self.deltas.bits_per_value()
369 }
370
371 #[must_use]
373 pub fn compression_ratio(&self) -> f64 {
374 let count = self.len();
375 if count == 0 {
376 return 1.0;
377 }
378
379 let original_size = count * 8;
380 let packed_size = 8 + self.deltas.data().len() * 8; original_size as f64 / packed_size as f64
383 }
384
385 pub fn to_bytes(&self) -> Vec<u8> {
387 let delta_bytes = self.deltas.to_bytes();
388 let mut buf = Vec::with_capacity(8 + delta_bytes.len());
389 buf.extend_from_slice(&self.base.to_le_bytes());
390 buf.extend_from_slice(&delta_bytes);
391 buf
392 }
393
394 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
400 if bytes.len() < 8 {
401 return Err(io::Error::new(
402 io::ErrorKind::InvalidData,
403 "DeltaBitPacked too short",
404 ));
405 }
406
407 let base = u64::from_le_bytes(
408 bytes[0..8]
409 .try_into()
410 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
411 );
412 let deltas = BitPackedInts::from_bytes(&bytes[8..])?;
413
414 Ok(Self { base, deltas })
415 }
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[test]
423 fn test_bitpack_basic() {
424 let values = vec![5u64, 2, 3, 5, 5, 8, 2];
425 let packed = BitPackedInts::pack(&values);
426 let unpacked = packed.unpack();
427 assert_eq!(values, unpacked);
428 }
429
430 #[test]
431 fn test_bitpack_empty() {
432 let values: Vec<u64> = vec![];
433 let packed = BitPackedInts::pack(&values);
434 assert!(packed.is_empty());
435 assert_eq!(packed.unpack(), values);
436 }
437
438 #[test]
439 fn test_bitpack_single() {
440 let values = vec![42u64];
441 let packed = BitPackedInts::pack(&values);
442 assert_eq!(packed.len(), 1);
443 assert_eq!(packed.unpack(), values);
444 }
445
446 #[test]
447 fn test_bitpack_all_zeros() {
448 let values = vec![0u64; 100];
449 let packed = BitPackedInts::pack(&values);
450 assert_eq!(packed.bits_per_value(), 1);
451 assert_eq!(packed.unpack(), values);
452 }
453
454 #[test]
455 fn test_bitpack_powers_of_two() {
456 for bits in 1..=64u8 {
457 let max_val = if bits == 64 {
458 u64::MAX
459 } else {
460 (1u64 << bits) - 1
461 };
462 let values = vec![0, max_val / 2, max_val];
463 let packed = BitPackedInts::pack(&values);
464 assert_eq!(packed.bits_per_value(), bits);
465 assert_eq!(packed.unpack(), values);
466 }
467 }
468
469 #[test]
470 fn test_bitpack_get() {
471 let values = vec![1u64, 2, 3, 4, 5];
472 let packed = BitPackedInts::pack(&values);
473
474 for (i, &expected) in values.iter().enumerate() {
475 assert_eq!(packed.get(i), Some(expected));
476 }
477 assert_eq!(packed.get(100), None);
478 }
479
480 #[test]
481 fn test_bitpack_compression() {
482 let values: Vec<u64> = (0..100).map(|i| i % 16).collect();
484 let packed = BitPackedInts::pack(&values);
485 assert_eq!(packed.bits_per_value(), 4);
486 let ratio = packed.compression_ratio();
488 assert!(ratio > 10.0, "Expected ratio > 10, got {}", ratio);
489 }
490
491 #[test]
492 fn test_bitpack_serialization() {
493 let values = vec![1u64, 3, 7, 15, 31];
494 let packed = BitPackedInts::pack(&values);
495 let bytes = packed.to_bytes();
496 let restored = BitPackedInts::from_bytes(&bytes).unwrap();
497 assert_eq!(packed.unpack(), restored.unpack());
498 }
499
500 #[test]
501 fn test_delta_bitpacked_basic() {
502 let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
503 let encoded = DeltaBitPacked::encode(&values);
504 let decoded = encoded.decode();
505 assert_eq!(values, decoded);
506 }
507
508 #[test]
509 fn test_delta_bitpacked_sequential() {
510 let values: Vec<u64> = (1000..1100).collect();
512 let encoded = DeltaBitPacked::encode(&values);
513 assert_eq!(encoded.bits_per_delta(), 1);
514 assert_eq!(encoded.decode(), values);
515
516 let ratio = encoded.compression_ratio();
518 assert!(ratio > 5.0, "Expected ratio > 5, got {}", ratio);
519 }
520
521 #[test]
522 fn test_delta_bitpacked_empty() {
523 let values: Vec<u64> = vec![];
524 let encoded = DeltaBitPacked::encode(&values);
525 assert!(encoded.is_empty());
526 assert_eq!(encoded.decode(), values);
527 }
528
529 #[test]
530 fn test_delta_bitpacked_single() {
531 let values = vec![42u64];
532 let encoded = DeltaBitPacked::encode(&values);
533 assert_eq!(encoded.len(), 1);
534 assert_eq!(encoded.decode(), values);
535 }
536
537 #[test]
538 fn test_delta_bitpacked_serialization() {
539 let values = vec![100u64, 105, 107, 110, 115];
540 let encoded = DeltaBitPacked::encode(&values);
541 let bytes = encoded.to_bytes();
542 let restored = DeltaBitPacked::from_bytes(&bytes).unwrap();
543 assert_eq!(encoded.decode(), restored.decode());
544 }
545
546 #[test]
547 fn test_bits_needed() {
548 assert_eq!(BitPackedInts::bits_needed(0), 1);
549 assert_eq!(BitPackedInts::bits_needed(1), 1);
550 assert_eq!(BitPackedInts::bits_needed(2), 2);
551 assert_eq!(BitPackedInts::bits_needed(3), 2);
552 assert_eq!(BitPackedInts::bits_needed(4), 3);
553 assert_eq!(BitPackedInts::bits_needed(7), 3);
554 assert_eq!(BitPackedInts::bits_needed(8), 4);
555 assert_eq!(BitPackedInts::bits_needed(255), 8);
556 assert_eq!(BitPackedInts::bits_needed(256), 9);
557 assert_eq!(BitPackedInts::bits_needed(u64::MAX), 64);
558 }
559}