1use std::io;
23
24#[derive(Debug, Clone)]
29pub struct BitPackedInts {
30 data: Vec<u64>,
32 bits_per_value: u8,
34 count: usize,
36}
37
38impl BitPackedInts {
39 #[must_use]
41 pub fn pack(values: &[u64]) -> Self {
42 if values.is_empty() {
43 return Self {
44 data: Vec::new(),
45 bits_per_value: 0,
46 count: 0,
47 };
48 }
49
50 let max_value = values.iter().copied().max().unwrap_or(0);
51 let bits = Self::bits_needed(max_value);
52 Self::pack_with_bits(values, bits)
53 }
54
55 #[must_use]
61 pub fn pack_with_bits(values: &[u64], bits_per_value: u8) -> Self {
62 if values.is_empty() {
63 return Self {
64 data: Vec::new(),
65 bits_per_value,
66 count: 0,
67 };
68 }
69
70 if bits_per_value == 0 {
71 debug_assert!(values.iter().all(|&v| v == 0));
73 return Self {
74 data: Vec::new(),
75 bits_per_value: 0,
76 count: values.len(),
77 };
78 }
79
80 let bits = bits_per_value as usize;
81 let values_per_word = 64 / bits;
82 let num_words = (values.len() + values_per_word - 1) / values_per_word;
83
84 let mut data = vec![0u64; num_words];
85 let mask = if bits >= 64 {
86 u64::MAX
87 } else {
88 (1u64 << bits) - 1
89 };
90
91 for (i, &value) in values.iter().enumerate() {
92 debug_assert!(
93 value <= mask,
94 "Value {} doesn't fit in {} bits",
95 value,
96 bits_per_value
97 );
98
99 let word_idx = i / values_per_word;
100 let bit_offset = (i % values_per_word) * bits;
101 data[word_idx] |= (value & mask) << bit_offset;
102 }
103
104 Self {
105 data,
106 bits_per_value,
107 count: values.len(),
108 }
109 }
110
111 #[must_use]
113 pub fn unpack(&self) -> Vec<u64> {
114 if self.count == 0 {
115 return Vec::new();
116 }
117
118 if self.bits_per_value == 0 {
119 return vec![0u64; self.count];
120 }
121
122 let bits = self.bits_per_value as usize;
123 let values_per_word = 64 / bits;
124 let mask = if bits >= 64 {
125 u64::MAX
126 } else {
127 (1u64 << bits) - 1
128 };
129
130 let mut result = Vec::with_capacity(self.count);
131
132 for i in 0..self.count {
133 let word_idx = i / values_per_word;
134 let bit_offset = (i % values_per_word) * bits;
135 let value = (self.data[word_idx] >> bit_offset) & mask;
136 result.push(value);
137 }
138
139 result
140 }
141
142 #[must_use]
144 pub fn get(&self, index: usize) -> Option<u64> {
145 if index >= self.count {
146 return None;
147 }
148
149 if self.bits_per_value == 0 {
150 return Some(0);
151 }
152
153 let bits = self.bits_per_value as usize;
154 let values_per_word = 64 / bits;
155 let word_idx = index / values_per_word;
156 let bit_offset = (index % values_per_word) * bits;
157 let mask = if bits >= 64 {
158 u64::MAX
159 } else {
160 (1u64 << bits) - 1
161 };
162
163 Some((self.data[word_idx] >> bit_offset) & mask)
164 }
165
166 #[must_use]
168 pub fn len(&self) -> usize {
169 self.count
170 }
171
172 #[must_use]
174 pub fn is_empty(&self) -> bool {
175 self.count == 0
176 }
177
178 #[must_use]
180 pub fn bits_per_value(&self) -> u8 {
181 self.bits_per_value
182 }
183
184 #[must_use]
186 pub fn data(&self) -> &[u64] {
187 &self.data
188 }
189
190 #[must_use]
192 pub fn compression_ratio(&self) -> f64 {
193 if self.count == 0 {
194 return 1.0;
195 }
196
197 let original_size = self.count * 8; let packed_size = self.data.len() * 8;
199
200 if packed_size == 0 {
201 return f64::INFINITY; }
203
204 original_size as f64 / packed_size as f64
205 }
206
207 #[must_use]
209 pub fn bits_needed(value: u64) -> u8 {
210 if value == 0 {
211 1 } else {
213 64 - value.leading_zeros() as u8
214 }
215 }
216
217 pub fn to_bytes(&self) -> Vec<u8> {
219 let mut buf = Vec::with_capacity(1 + 4 + self.data.len() * 8);
220 buf.push(self.bits_per_value);
221 buf.extend_from_slice(&(self.count as u32).to_le_bytes());
222 for &word in &self.data {
223 buf.extend_from_slice(&word.to_le_bytes());
224 }
225 buf
226 }
227
228 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
230 if bytes.len() < 5 {
231 return Err(io::Error::new(
232 io::ErrorKind::InvalidData,
233 "BitPackedInts too short",
234 ));
235 }
236
237 let bits_per_value = bytes[0];
238 let count = u32::from_le_bytes(bytes[1..5].try_into().unwrap()) as usize;
239
240 let num_words = if bits_per_value == 0 || count == 0 {
241 0
242 } else {
243 let values_per_word = 64 / bits_per_value as usize;
244 (count + values_per_word - 1) / values_per_word
245 };
246
247 if bytes.len() < 5 + num_words * 8 {
248 return Err(io::Error::new(
249 io::ErrorKind::InvalidData,
250 "BitPackedInts truncated",
251 ));
252 }
253
254 let mut data = Vec::with_capacity(num_words);
255 for i in 0..num_words {
256 let offset = 5 + i * 8;
257 let word = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
258 data.push(word);
259 }
260
261 Ok(Self {
262 data,
263 bits_per_value,
264 count,
265 })
266 }
267}
268
269#[derive(Debug, Clone)]
275pub struct DeltaBitPacked {
276 base: u64,
278 deltas: BitPackedInts,
280}
281
282impl DeltaBitPacked {
283 #[must_use]
285 pub fn encode(values: &[u64]) -> Self {
286 if values.is_empty() {
287 return Self {
288 base: 0,
289 deltas: BitPackedInts::pack(&[]),
290 };
291 }
292
293 let base = values[0];
294 let delta_values: Vec<u64> = values
295 .windows(2)
296 .map(|w| w[1].saturating_sub(w[0]))
297 .collect();
298
299 let deltas = BitPackedInts::pack(&delta_values);
300
301 Self { base, deltas }
302 }
303
304 #[must_use]
306 pub fn decode(&self) -> Vec<u64> {
307 if self.deltas.is_empty() && self.base == 0 {
308 return Vec::new();
309 }
310
311 let delta_values = self.deltas.unpack();
312 let mut result = Vec::with_capacity(delta_values.len() + 1);
313 let mut current = self.base;
314 result.push(current);
315
316 for delta in delta_values {
317 current = current.wrapping_add(delta);
318 result.push(current);
319 }
320
321 result
322 }
323
324 #[must_use]
326 pub fn len(&self) -> usize {
327 if self.deltas.is_empty() && self.base == 0 {
328 0
329 } else {
330 self.deltas.len() + 1
331 }
332 }
333
334 #[must_use]
336 pub fn is_empty(&self) -> bool {
337 self.deltas.is_empty() && self.base == 0
338 }
339
340 #[must_use]
342 pub fn base(&self) -> u64 {
343 self.base
344 }
345
346 #[must_use]
348 pub fn bits_per_delta(&self) -> u8 {
349 self.deltas.bits_per_value()
350 }
351
352 #[must_use]
354 pub fn compression_ratio(&self) -> f64 {
355 let count = self.len();
356 if count == 0 {
357 return 1.0;
358 }
359
360 let original_size = count * 8;
361 let packed_size = 8 + self.deltas.data().len() * 8; original_size as f64 / packed_size as f64
364 }
365
366 pub fn to_bytes(&self) -> Vec<u8> {
368 let delta_bytes = self.deltas.to_bytes();
369 let mut buf = Vec::with_capacity(8 + delta_bytes.len());
370 buf.extend_from_slice(&self.base.to_le_bytes());
371 buf.extend_from_slice(&delta_bytes);
372 buf
373 }
374
375 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
377 if bytes.len() < 8 {
378 return Err(io::Error::new(
379 io::ErrorKind::InvalidData,
380 "DeltaBitPacked too short",
381 ));
382 }
383
384 let base = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
385 let deltas = BitPackedInts::from_bytes(&bytes[8..])?;
386
387 Ok(Self { base, deltas })
388 }
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
396 fn test_bitpack_basic() {
397 let values = vec![5u64, 2, 3, 5, 5, 8, 2];
398 let packed = BitPackedInts::pack(&values);
399 let unpacked = packed.unpack();
400 assert_eq!(values, unpacked);
401 }
402
403 #[test]
404 fn test_bitpack_empty() {
405 let values: Vec<u64> = vec![];
406 let packed = BitPackedInts::pack(&values);
407 assert!(packed.is_empty());
408 assert_eq!(packed.unpack(), values);
409 }
410
411 #[test]
412 fn test_bitpack_single() {
413 let values = vec![42u64];
414 let packed = BitPackedInts::pack(&values);
415 assert_eq!(packed.len(), 1);
416 assert_eq!(packed.unpack(), values);
417 }
418
419 #[test]
420 fn test_bitpack_all_zeros() {
421 let values = vec![0u64; 100];
422 let packed = BitPackedInts::pack(&values);
423 assert_eq!(packed.bits_per_value(), 1);
424 assert_eq!(packed.unpack(), values);
425 }
426
427 #[test]
428 fn test_bitpack_powers_of_two() {
429 for bits in 1..=64u8 {
430 let max_val = if bits == 64 {
431 u64::MAX
432 } else {
433 (1u64 << bits) - 1
434 };
435 let values = vec![0, max_val / 2, max_val];
436 let packed = BitPackedInts::pack(&values);
437 assert_eq!(packed.bits_per_value(), bits);
438 assert_eq!(packed.unpack(), values);
439 }
440 }
441
442 #[test]
443 fn test_bitpack_get() {
444 let values = vec![1u64, 2, 3, 4, 5];
445 let packed = BitPackedInts::pack(&values);
446
447 for (i, &expected) in values.iter().enumerate() {
448 assert_eq!(packed.get(i), Some(expected));
449 }
450 assert_eq!(packed.get(100), None);
451 }
452
453 #[test]
454 fn test_bitpack_compression() {
455 let values: Vec<u64> = (0..100).map(|i| i % 16).collect();
457 let packed = BitPackedInts::pack(&values);
458 assert_eq!(packed.bits_per_value(), 4);
459 let ratio = packed.compression_ratio();
461 assert!(ratio > 10.0, "Expected ratio > 10, got {}", ratio);
462 }
463
464 #[test]
465 fn test_bitpack_serialization() {
466 let values = vec![1u64, 3, 7, 15, 31];
467 let packed = BitPackedInts::pack(&values);
468 let bytes = packed.to_bytes();
469 let restored = BitPackedInts::from_bytes(&bytes).unwrap();
470 assert_eq!(packed.unpack(), restored.unpack());
471 }
472
473 #[test]
474 fn test_delta_bitpacked_basic() {
475 let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
476 let encoded = DeltaBitPacked::encode(&values);
477 let decoded = encoded.decode();
478 assert_eq!(values, decoded);
479 }
480
481 #[test]
482 fn test_delta_bitpacked_sequential() {
483 let values: Vec<u64> = (1000..1100).collect();
485 let encoded = DeltaBitPacked::encode(&values);
486 assert_eq!(encoded.bits_per_delta(), 1);
487 assert_eq!(encoded.decode(), values);
488
489 let ratio = encoded.compression_ratio();
491 assert!(ratio > 5.0, "Expected ratio > 5, got {}", ratio);
492 }
493
494 #[test]
495 fn test_delta_bitpacked_empty() {
496 let values: Vec<u64> = vec![];
497 let encoded = DeltaBitPacked::encode(&values);
498 assert!(encoded.is_empty());
499 assert_eq!(encoded.decode(), values);
500 }
501
502 #[test]
503 fn test_delta_bitpacked_single() {
504 let values = vec![42u64];
505 let encoded = DeltaBitPacked::encode(&values);
506 assert_eq!(encoded.len(), 1);
507 assert_eq!(encoded.decode(), values);
508 }
509
510 #[test]
511 fn test_delta_bitpacked_serialization() {
512 let values = vec![100u64, 105, 107, 110, 115];
513 let encoded = DeltaBitPacked::encode(&values);
514 let bytes = encoded.to_bytes();
515 let restored = DeltaBitPacked::from_bytes(&bytes).unwrap();
516 assert_eq!(encoded.decode(), restored.decode());
517 }
518
519 #[test]
520 fn test_bits_needed() {
521 assert_eq!(BitPackedInts::bits_needed(0), 1);
522 assert_eq!(BitPackedInts::bits_needed(1), 1);
523 assert_eq!(BitPackedInts::bits_needed(2), 2);
524 assert_eq!(BitPackedInts::bits_needed(3), 2);
525 assert_eq!(BitPackedInts::bits_needed(4), 3);
526 assert_eq!(BitPackedInts::bits_needed(7), 3);
527 assert_eq!(BitPackedInts::bits_needed(8), 4);
528 assert_eq!(BitPackedInts::bits_needed(255), 8);
529 assert_eq!(BitPackedInts::bits_needed(256), 9);
530 assert_eq!(BitPackedInts::bits_needed(u64::MAX), 64);
531 }
532}