1use std::io;
24
25#[derive(Debug, Clone)]
30pub struct BitPackedInts {
31 data: Vec<u64>,
33 bits_per_value: u8,
35 count: usize,
37}
38
39impl BitPackedInts {
40 #[must_use]
45 pub fn from_raw_parts(data: Vec<u64>, bits_per_value: u8, count: usize) -> Self {
46 Self {
47 data,
48 bits_per_value,
49 count,
50 }
51 }
52
53 #[must_use]
55 pub fn pack(values: &[u64]) -> Self {
56 if values.is_empty() {
57 return Self {
58 data: Vec::new(),
59 bits_per_value: 0,
60 count: 0,
61 };
62 }
63
64 let max_value = values.iter().copied().max().unwrap_or(0);
65 let bits = Self::bits_needed(max_value);
66 Self::pack_with_bits(values, bits)
67 }
68
69 #[must_use]
75 pub fn pack_with_bits(values: &[u64], bits_per_value: u8) -> Self {
76 if values.is_empty() {
77 return Self {
78 data: Vec::new(),
79 bits_per_value,
80 count: 0,
81 };
82 }
83
84 if bits_per_value == 0 {
85 debug_assert!(values.iter().all(|&v| v == 0));
87 return Self {
88 data: Vec::new(),
89 bits_per_value: 0,
90 count: values.len(),
91 };
92 }
93
94 let bits = bits_per_value as usize;
95 let values_per_word = 64 / bits;
96 let num_words = (values.len() + values_per_word - 1) / values_per_word;
97
98 let mut data = vec![0u64; num_words];
99 let mask = if bits >= 64 {
100 u64::MAX
101 } else {
102 (1u64 << bits) - 1
103 };
104
105 for (i, &value) in values.iter().enumerate() {
106 debug_assert!(
107 value <= mask,
108 "Value {} doesn't fit in {} bits",
109 value,
110 bits_per_value
111 );
112
113 let word_idx = i / values_per_word;
114 let bit_offset = (i % values_per_word) * bits;
115 data[word_idx] |= (value & mask) << bit_offset;
116 }
117
118 Self {
119 data,
120 bits_per_value,
121 count: values.len(),
122 }
123 }
124
125 #[must_use]
127 pub fn unpack(&self) -> Vec<u64> {
128 if self.count == 0 {
129 return Vec::new();
130 }
131
132 if self.bits_per_value == 0 {
133 return vec![0u64; self.count];
134 }
135
136 let bits = self.bits_per_value as usize;
137 let values_per_word = 64 / bits;
138 let mask = if bits >= 64 {
139 u64::MAX
140 } else {
141 (1u64 << bits) - 1
142 };
143
144 let mut result = Vec::with_capacity(self.count);
145
146 for i in 0..self.count {
147 let word_idx = i / values_per_word;
148 let bit_offset = (i % values_per_word) * bits;
149 let value = (self.data[word_idx] >> bit_offset) & mask;
150 result.push(value);
151 }
152
153 result
154 }
155
156 #[must_use]
158 pub fn get(&self, index: usize) -> Option<u64> {
159 if index >= self.count {
160 return None;
161 }
162
163 if self.bits_per_value == 0 {
164 return Some(0);
165 }
166
167 let bits = self.bits_per_value as usize;
168 let values_per_word = 64 / bits;
169 let word_idx = index / values_per_word;
170 let bit_offset = (index % values_per_word) * bits;
171 let mask = if bits >= 64 {
172 u64::MAX
173 } else {
174 (1u64 << bits) - 1
175 };
176
177 Some((self.data[word_idx] >> bit_offset) & mask)
178 }
179
180 #[must_use]
182 pub fn len(&self) -> usize {
183 self.count
184 }
185
186 #[must_use]
188 pub fn is_empty(&self) -> bool {
189 self.count == 0
190 }
191
192 #[must_use]
194 pub fn bits_per_value(&self) -> u8 {
195 self.bits_per_value
196 }
197
198 #[must_use]
200 pub fn data(&self) -> &[u64] {
201 &self.data
202 }
203
204 #[must_use]
206 pub fn compression_ratio(&self) -> f64 {
207 if self.count == 0 {
208 return 1.0;
209 }
210
211 let original_size = self.count * 8; let packed_size = self.data.len() * 8;
213
214 if packed_size == 0 {
215 return f64::INFINITY; }
217
218 original_size as f64 / packed_size as f64
219 }
220
221 #[must_use]
230 pub fn bits_needed(value: u64) -> u8 {
231 if value == 0 {
232 1 } else {
234 u8::try_from(64u32 - value.leading_zeros()).expect("bits_needed result is in 1..=64")
237 }
238 }
239
240 pub fn to_bytes(&self) -> io::Result<Vec<u8>> {
246 let count_u32 = u32::try_from(self.count).map_err(|_| {
247 io::Error::new(
248 io::ErrorKind::InvalidInput,
249 format!(
250 "BitPackedInts count {} exceeds u32::MAX, cannot serialize",
251 self.count
252 ),
253 )
254 })?;
255 let mut buf = Vec::with_capacity(1 + 4 + self.data.len() * 8);
256 buf.push(self.bits_per_value);
257 buf.extend_from_slice(&count_u32.to_le_bytes());
258 for &word in &self.data {
259 buf.extend_from_slice(&word.to_le_bytes());
260 }
261 Ok(buf)
262 }
263
264 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
270 if bytes.len() < 5 {
271 return Err(io::Error::new(
272 io::ErrorKind::InvalidData,
273 "BitPackedInts too short",
274 ));
275 }
276
277 let bits_per_value = bytes[0];
278 if bits_per_value > 64 {
279 return Err(io::Error::new(
280 io::ErrorKind::InvalidData,
281 format!("BitPackedInts bits_per_value {bits_per_value} exceeds 64"),
282 ));
283 }
284 let count = u32::from_le_bytes(
285 bytes[1..5]
286 .try_into()
287 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
288 ) as usize;
289
290 let num_words = if bits_per_value == 0 || count == 0 {
291 0
292 } else {
293 let values_per_word = 64 / bits_per_value as usize;
294 (count + values_per_word - 1) / values_per_word
295 };
296
297 if bytes.len() < 5 + num_words * 8 {
298 return Err(io::Error::new(
299 io::ErrorKind::InvalidData,
300 "BitPackedInts truncated",
301 ));
302 }
303
304 let mut data = Vec::with_capacity(num_words);
305 for i in 0..num_words {
306 let offset = 5 + i * 8;
307 let word = u64::from_le_bytes(
308 bytes[offset..offset + 8]
309 .try_into()
310 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
311 );
312 data.push(word);
313 }
314
315 Ok(Self {
316 data,
317 bits_per_value,
318 count,
319 })
320 }
321}
322
323#[derive(Debug, Clone)]
329pub struct DeltaBitPacked {
330 base: u64,
332 deltas: BitPackedInts,
334}
335
336impl DeltaBitPacked {
337 #[must_use]
339 pub fn encode(values: &[u64]) -> Self {
340 if values.is_empty() {
341 return Self {
342 base: 0,
343 deltas: BitPackedInts::pack(&[]),
344 };
345 }
346
347 let base = values[0];
348 let delta_values: Vec<u64> = values
349 .windows(2)
350 .map(|w| w[1].saturating_sub(w[0]))
351 .collect();
352
353 let deltas = BitPackedInts::pack(&delta_values);
354
355 Self { base, deltas }
356 }
357
358 #[must_use]
360 pub fn decode(&self) -> Vec<u64> {
361 if self.deltas.is_empty() && self.base == 0 {
362 return Vec::new();
363 }
364
365 let delta_values = self.deltas.unpack();
366 let mut result = Vec::with_capacity(delta_values.len() + 1);
367 let mut current = self.base;
368 result.push(current);
369
370 for delta in delta_values {
371 current = current.wrapping_add(delta);
372 result.push(current);
373 }
374
375 result
376 }
377
378 #[must_use]
380 pub fn len(&self) -> usize {
381 if self.deltas.is_empty() && self.base == 0 {
382 0
383 } else {
384 self.deltas.len() + 1
385 }
386 }
387
388 #[must_use]
390 pub fn is_empty(&self) -> bool {
391 self.deltas.is_empty() && self.base == 0
392 }
393
394 #[must_use]
396 pub fn base(&self) -> u64 {
397 self.base
398 }
399
400 #[must_use]
402 pub fn bits_per_delta(&self) -> u8 {
403 self.deltas.bits_per_value()
404 }
405
406 #[must_use]
408 pub fn compression_ratio(&self) -> f64 {
409 let count = self.len();
410 if count == 0 {
411 return 1.0;
412 }
413
414 let original_size = count * 8;
415 let packed_size = 8 + self.deltas.data().len() * 8; original_size as f64 / packed_size as f64
418 }
419
420 pub fn to_bytes(&self) -> io::Result<Vec<u8>> {
426 let delta_bytes = self.deltas.to_bytes()?;
427 let mut buf = Vec::with_capacity(8 + delta_bytes.len());
428 buf.extend_from_slice(&self.base.to_le_bytes());
429 buf.extend_from_slice(&delta_bytes);
430 Ok(buf)
431 }
432
433 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
439 if bytes.len() < 8 {
440 return Err(io::Error::new(
441 io::ErrorKind::InvalidData,
442 "DeltaBitPacked too short",
443 ));
444 }
445
446 let base = u64::from_le_bytes(
447 bytes[0..8]
448 .try_into()
449 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
450 );
451 let deltas = BitPackedInts::from_bytes(&bytes[8..])?;
452
453 Ok(Self { base, deltas })
454 }
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460
461 #[test]
462 fn test_bitpack_basic() {
463 let values = vec![5u64, 2, 3, 5, 5, 8, 2];
464 let packed = BitPackedInts::pack(&values);
465 let unpacked = packed.unpack();
466 assert_eq!(values, unpacked);
467 }
468
469 #[test]
470 fn test_bitpack_empty() {
471 let values: Vec<u64> = vec![];
472 let packed = BitPackedInts::pack(&values);
473 assert!(packed.is_empty());
474 assert_eq!(packed.unpack(), values);
475 }
476
477 #[test]
478 fn test_bitpack_single() {
479 let values = vec![42u64];
480 let packed = BitPackedInts::pack(&values);
481 assert_eq!(packed.len(), 1);
482 assert_eq!(packed.unpack(), values);
483 }
484
485 #[test]
486 fn test_bitpack_all_zeros() {
487 let values = vec![0u64; 100];
488 let packed = BitPackedInts::pack(&values);
489 assert_eq!(packed.bits_per_value(), 1);
490 assert_eq!(packed.unpack(), values);
491 }
492
493 #[test]
494 fn test_bitpack_powers_of_two() {
495 for bits in 1..=64u8 {
496 let max_val = if bits == 64 {
497 u64::MAX
498 } else {
499 (1u64 << bits) - 1
500 };
501 let values = vec![0, max_val / 2, max_val];
502 let packed = BitPackedInts::pack(&values);
503 assert_eq!(packed.bits_per_value(), bits);
504 assert_eq!(packed.unpack(), values);
505 }
506 }
507
508 #[test]
509 fn test_bitpack_get() {
510 let values = vec![1u64, 2, 3, 4, 5];
511 let packed = BitPackedInts::pack(&values);
512
513 for (i, &expected) in values.iter().enumerate() {
514 assert_eq!(packed.get(i), Some(expected));
515 }
516 assert_eq!(packed.get(100), None);
517 }
518
519 #[test]
520 fn test_bitpack_compression() {
521 let values: Vec<u64> = (0..100).map(|i| i % 16).collect();
523 let packed = BitPackedInts::pack(&values);
524 assert_eq!(packed.bits_per_value(), 4);
525 let ratio = packed.compression_ratio();
527 assert!(ratio > 10.0, "Expected ratio > 10, got {}", ratio);
528 }
529
530 #[test]
531 fn test_bitpack_serialization() {
532 let values = vec![1u64, 3, 7, 15, 31];
533 let packed = BitPackedInts::pack(&values);
534 let bytes = packed.to_bytes().unwrap();
535 let restored = BitPackedInts::from_bytes(&bytes).unwrap();
536 assert_eq!(packed.unpack(), restored.unpack());
537 }
538
539 #[test]
540 fn test_delta_bitpacked_basic() {
541 let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
542 let encoded = DeltaBitPacked::encode(&values);
543 let decoded = encoded.decode();
544 assert_eq!(values, decoded);
545 }
546
547 #[test]
548 fn test_delta_bitpacked_sequential() {
549 let values: Vec<u64> = (1000..1100).collect();
551 let encoded = DeltaBitPacked::encode(&values);
552 assert_eq!(encoded.bits_per_delta(), 1);
553 assert_eq!(encoded.decode(), values);
554
555 let ratio = encoded.compression_ratio();
557 assert!(ratio > 5.0, "Expected ratio > 5, got {}", ratio);
558 }
559
560 #[test]
561 fn test_delta_bitpacked_empty() {
562 let values: Vec<u64> = vec![];
563 let encoded = DeltaBitPacked::encode(&values);
564 assert!(encoded.is_empty());
565 assert_eq!(encoded.decode(), values);
566 }
567
568 #[test]
569 fn test_delta_bitpacked_single() {
570 let values = vec![42u64];
571 let encoded = DeltaBitPacked::encode(&values);
572 assert_eq!(encoded.len(), 1);
573 assert_eq!(encoded.decode(), values);
574 }
575
576 #[test]
577 fn test_delta_bitpacked_serialization() {
578 let values = vec![100u64, 105, 107, 110, 115];
579 let encoded = DeltaBitPacked::encode(&values);
580 let bytes = encoded.to_bytes().unwrap();
581 let restored = DeltaBitPacked::from_bytes(&bytes).unwrap();
582 assert_eq!(encoded.decode(), restored.decode());
583 }
584
585 #[test]
586 fn test_bits_needed() {
587 assert_eq!(BitPackedInts::bits_needed(0), 1);
588 assert_eq!(BitPackedInts::bits_needed(1), 1);
589 assert_eq!(BitPackedInts::bits_needed(2), 2);
590 assert_eq!(BitPackedInts::bits_needed(3), 2);
591 assert_eq!(BitPackedInts::bits_needed(4), 3);
592 assert_eq!(BitPackedInts::bits_needed(7), 3);
593 assert_eq!(BitPackedInts::bits_needed(8), 4);
594 assert_eq!(BitPackedInts::bits_needed(255), 8);
595 assert_eq!(BitPackedInts::bits_needed(256), 9);
596 assert_eq!(BitPackedInts::bits_needed(u64::MAX), 64);
597 }
598}