1pub mod dictionary;
5mod stats;
6
7use std::fmt::Debug;
8use std::hash::Hash;
9
10pub use stats::IntegerStats;
11use vortex_array::arrays::{ConstantArray, MaskedArray, PrimitiveArray, PrimitiveVTable};
12use vortex_array::vtable::ValidityHelper;
13use vortex_array::{ArrayRef, IntoArray, ToCanonical};
14use vortex_dict::DictArray;
15use vortex_error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
16use vortex_fastlanes::{FoRArray, bit_width_histogram, bitpack_encode, find_best_bit_width};
17use vortex_runend::RunEndArray;
18use vortex_runend::compress::runend_encode;
19use vortex_scalar::Scalar;
20use vortex_sequence::sequence_encode;
21use vortex_sparse::{SparseArray, SparseVTable};
22use vortex_zigzag::{ZigZagArray, zigzag_encode};
23
24use crate::integer::dictionary::dictionary_encode;
25use crate::patches::compress_patches;
26use crate::rle::RLEScheme;
27use crate::{
28 Compressor, CompressorStats, GenerateStatsOptions, Scheme,
29 estimate_compression_ratio_with_sampling,
30};
31
32pub struct IntCompressor;
34
35impl Compressor for IntCompressor {
36 type ArrayVTable = PrimitiveVTable;
37 type SchemeType = dyn IntegerScheme;
38 type StatsType = IntegerStats;
39
40 fn schemes() -> &'static [&'static dyn IntegerScheme] {
41 &[
42 &ConstantScheme,
43 &FORScheme,
44 &ZigZagScheme,
45 &BitPackingScheme,
46 &SparseScheme,
47 &DictScheme,
48 &RunEndScheme,
49 &SequenceScheme,
50 &RLE_INTEGER_SCHEME,
51 ]
52 }
53
54 fn default_scheme() -> &'static Self::SchemeType {
55 &UncompressedScheme
56 }
57
58 fn dict_scheme_code() -> IntCode {
59 DICT_SCHEME
60 }
61}
62
63impl IntCompressor {
64 pub(crate) fn compress_no_dict(
65 array: &PrimitiveArray,
66 is_sample: bool,
67 allowed_cascading: usize,
68 excludes: &[IntCode],
69 ) -> VortexResult<ArrayRef> {
70 let stats = IntegerStats::generate_opts(
71 array,
72 GenerateStatsOptions {
73 count_distinct_values: false,
74 },
75 );
76
77 let scheme = Self::choose_scheme(&stats, is_sample, allowed_cascading, excludes)?;
78 let output = scheme.compress(&stats, is_sample, allowed_cascading, excludes)?;
79
80 if output.nbytes() < array.nbytes() {
81 Ok(output)
82 } else {
83 log::debug!("resulting tree too large: {}", output.display_tree());
84 Ok(array.to_array())
85 }
86 }
87}
88
89pub trait IntegerScheme: Scheme<StatsType = IntegerStats, CodeType = IntCode> {}
90
91impl<T> IntegerScheme for T where T: Scheme<StatsType = IntegerStats, CodeType = IntCode> {}
93
94#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
95pub struct IntCode(u8);
96
97const UNCOMPRESSED_SCHEME: IntCode = IntCode(0);
98const CONSTANT_SCHEME: IntCode = IntCode(1);
99const FOR_SCHEME: IntCode = IntCode(2);
100const ZIGZAG_SCHEME: IntCode = IntCode(3);
101const BITPACKING_SCHEME: IntCode = IntCode(4);
102const SPARSE_SCHEME: IntCode = IntCode(5);
103const DICT_SCHEME: IntCode = IntCode(6);
104const RUN_END_SCHEME: IntCode = IntCode(7);
105const SEQUENCE_SCHEME: IntCode = IntCode(8);
106const RUN_LENGTH_SCHEME: IntCode = IntCode(9);
107
108#[derive(Debug, Copy, Clone)]
109pub struct UncompressedScheme;
110
111#[derive(Debug, Copy, Clone)]
112pub struct ConstantScheme;
113
114#[derive(Debug, Copy, Clone)]
115pub struct FORScheme;
116
117#[derive(Debug, Copy, Clone)]
118pub struct ZigZagScheme;
119
120#[derive(Debug, Copy, Clone)]
121pub struct BitPackingScheme;
122
123#[derive(Debug, Copy, Clone)]
124pub struct SparseScheme;
125
126#[derive(Debug, Copy, Clone)]
127pub struct DictScheme;
128
129#[derive(Debug, Copy, Clone)]
130pub struct RunEndScheme;
131
132#[derive(Debug, Copy, Clone)]
133pub struct SequenceScheme;
134
135const RUN_END_THRESHOLD: u32 = 4;
137
138pub const RLE_INTEGER_SCHEME: RLEScheme<IntegerStats, IntCode> = RLEScheme::new(
139 RUN_LENGTH_SCHEME,
140 |values, is_sample, allowed_cascading, excludes| {
141 IntCompressor::compress_no_dict(values, is_sample, allowed_cascading, excludes)
142 },
143);
144
145impl Scheme for UncompressedScheme {
146 type StatsType = IntegerStats;
147 type CodeType = IntCode;
148
149 fn code(&self) -> IntCode {
150 UNCOMPRESSED_SCHEME
151 }
152
153 fn expected_compression_ratio(
154 &self,
155 _stats: &IntegerStats,
156 _is_sample: bool,
157 _allowed_cascading: usize,
158 _excludes: &[IntCode],
159 ) -> VortexResult<f64> {
160 Ok(1.0)
162 }
163
164 fn compress(
165 &self,
166 stats: &IntegerStats,
167 _is_sample: bool,
168 _allowed_cascading: usize,
169 _excludes: &[IntCode],
170 ) -> VortexResult<ArrayRef> {
171 Ok(stats.source().to_array())
172 }
173}
174
175impl Scheme for ConstantScheme {
176 type StatsType = IntegerStats;
177 type CodeType = IntCode;
178
179 fn code(&self) -> IntCode {
180 CONSTANT_SCHEME
181 }
182
183 fn is_constant(&self) -> bool {
184 true
185 }
186
187 fn expected_compression_ratio(
188 &self,
189 stats: &IntegerStats,
190 is_sample: bool,
191 _allowed_cascading: usize,
192 _excludes: &[IntCode],
193 ) -> VortexResult<f64> {
194 if is_sample {
196 return Ok(0.0);
197 }
198
199 if stats.distinct_values_count > 1 {
201 return Ok(0.0);
202 }
203
204 Ok(stats.value_count as f64)
205 }
206
207 fn compress(
208 &self,
209 stats: &IntegerStats,
210 _is_sample: bool,
211 _allowed_cascading: usize,
212 _excludes: &[IntCode],
213 ) -> VortexResult<ArrayRef> {
214 let scalar_idx = (0..stats.source().len()).position(|idx| stats.source().is_valid(idx));
215
216 match scalar_idx {
217 Some(idx) => {
218 let scalar = stats.source().scalar_at(idx);
219 let const_arr = ConstantArray::new(scalar, stats.src.len()).into_array();
220 if !stats.source().all_valid() {
221 Ok(MaskedArray::try_new(const_arr, stats.src.validity().clone())?.into_array())
222 } else {
223 Ok(const_arr)
224 }
225 }
226 None => Ok(ConstantArray::new(
227 Scalar::null(stats.src.dtype().clone()),
228 stats.src.len(),
229 )
230 .into_array()),
231 }
232 }
233}
234
235impl Scheme for FORScheme {
236 type StatsType = IntegerStats;
237 type CodeType = IntCode;
238
239 fn code(&self) -> IntCode {
240 FOR_SCHEME
241 }
242
243 fn expected_compression_ratio(
244 &self,
245 stats: &IntegerStats,
246 _is_sample: bool,
247 allowed_cascading: usize,
248 _excludes: &[IntCode],
249 ) -> VortexResult<f64> {
250 if allowed_cascading == 0 {
252 return Ok(0.0);
253 }
254
255 if stats.value_count == 0 {
257 return Ok(0.0);
258 }
259
260 if stats.typed.min_is_zero() {
262 return Ok(0.0);
263 }
264
265 let full_width: u32 = stats.src.ptype().bit_width().try_into().vortex_unwrap();
267 let bw = match stats.typed.max_minus_min().checked_ilog2() {
268 Some(l) => l + 1,
269 None => return Ok(0.0),
272 };
273
274 if full_width - bw < 8 {
276 return Ok(0.0);
277 }
278
279 Ok(full_width as f64 / bw as f64)
280 }
281
282 fn compress(
283 &self,
284 stats: &IntegerStats,
285 is_sample: bool,
286 _allowed_cascading: usize,
287 excludes: &[IntCode],
288 ) -> VortexResult<ArrayRef> {
289 let for_array = FoRArray::encode(stats.src.clone())?;
290 let biased = for_array.encoded().to_primitive();
291 let biased_stats = IntegerStats::generate_opts(
292 &biased,
293 GenerateStatsOptions {
294 count_distinct_values: false,
295 },
296 );
297
298 let compressed = BitPackingScheme.compress(&biased_stats, is_sample, 0, excludes)?;
303
304 Ok(FoRArray::try_new(compressed, for_array.reference_scalar().clone())?.into_array())
305 }
306}
307
308impl Scheme for ZigZagScheme {
309 type StatsType = IntegerStats;
310 type CodeType = IntCode;
311
312 fn code(&self) -> IntCode {
313 ZIGZAG_SCHEME
314 }
315
316 fn expected_compression_ratio(
317 &self,
318 stats: &IntegerStats,
319 is_sample: bool,
320 allowed_cascading: usize,
321 excludes: &[IntCode],
322 ) -> VortexResult<f64> {
323 if allowed_cascading == 0 {
325 return Ok(0.0);
326 }
327
328 if stats.value_count == 0 {
330 return Ok(0.0);
331 }
332
333 if !stats.typed.min_is_negative() {
335 return Ok(0.0);
336 }
337
338 estimate_compression_ratio_with_sampling(
340 self,
341 stats,
342 is_sample,
343 allowed_cascading,
344 excludes,
345 )
346 }
347
348 fn compress(
349 &self,
350 stats: &IntegerStats,
351 is_sample: bool,
352 allowed_cascading: usize,
353 excludes: &[IntCode],
354 ) -> VortexResult<ArrayRef> {
355 let zag = zigzag_encode(stats.src.clone())?;
357 let encoded = zag.encoded().to_primitive();
358
359 let mut new_excludes = vec![
362 ZigZagScheme.code(),
363 DictScheme.code(),
364 RunEndScheme.code(),
365 SparseScheme.code(),
366 ];
367 new_excludes.extend_from_slice(excludes);
368
369 let compressed =
370 IntCompressor::compress(&encoded, is_sample, allowed_cascading - 1, &new_excludes)?;
371
372 log::debug!("zigzag output: {}", compressed.display_tree());
373
374 Ok(ZigZagArray::try_new(compressed)?.into_array())
375 }
376}
377
378impl Scheme for BitPackingScheme {
379 type StatsType = IntegerStats;
380 type CodeType = IntCode;
381
382 fn code(&self) -> IntCode {
383 BITPACKING_SCHEME
384 }
385
386 #[allow(clippy::cast_possible_truncation)]
387 fn expected_compression_ratio(
388 &self,
389 stats: &IntegerStats,
390 is_sample: bool,
391 allowed_cascading: usize,
392 excludes: &[IntCode],
393 ) -> VortexResult<f64> {
394 if stats.typed.min_is_negative() {
396 return Ok(0.0);
397 }
398
399 if stats.value_count == 0 {
401 return Ok(0.0);
402 }
403
404 estimate_compression_ratio_with_sampling(
405 self,
406 stats,
407 is_sample,
408 allowed_cascading,
409 excludes,
410 )
411 }
412
413 #[allow(clippy::cast_possible_truncation)]
414 fn compress(
415 &self,
416 stats: &IntegerStats,
417 _is_sample: bool,
418 _allowed_cascading: usize,
419 _excludes: &[IntCode],
420 ) -> VortexResult<ArrayRef> {
421 let histogram = bit_width_histogram(stats.source())?;
422 let bw = find_best_bit_width(stats.source().ptype(), &histogram)?;
423 if bw as usize == stats.source().ptype().bit_width() {
425 return Ok(stats.source().clone().into_array());
426 }
427 let mut packed = bitpack_encode(stats.source(), bw, Some(&histogram))?;
428
429 let patches = packed.patches().map(compress_patches).transpose()?;
430 packed.replace_patches(patches);
431
432 Ok(packed.into_array())
433 }
434}
435
436impl Scheme for SparseScheme {
437 type StatsType = IntegerStats;
438 type CodeType = IntCode;
439
440 fn code(&self) -> IntCode {
441 SPARSE_SCHEME
442 }
443
444 fn expected_compression_ratio(
446 &self,
447 stats: &IntegerStats,
448 _is_sample: bool,
449 allowed_cascading: usize,
450 _excludes: &[IntCode],
451 ) -> VortexResult<f64> {
452 if allowed_cascading == 0 {
454 return Ok(0.0);
455 }
456
457 if stats.value_count == 0 {
458 return Ok(0.0);
460 }
461
462 if stats.null_count as f64 / stats.src.len() as f64 > 0.9 {
464 return Ok(stats.src.len() as f64 / stats.value_count as f64);
465 }
466
467 let (_, top_count) = stats.typed.top_value_and_count();
469
470 if top_count == stats.value_count {
471 return Ok(0.0);
473 }
474
475 let freq = top_count as f64 / stats.value_count as f64;
476 if freq >= 0.9 {
477 return Ok(stats.value_count as f64 / (stats.value_count - top_count) as f64);
479 }
480
481 Ok(0.0)
482 }
483
484 fn compress(
485 &self,
486 stats: &IntegerStats,
487 is_sample: bool,
488 allowed_cascading: usize,
489 excludes: &[IntCode],
490 ) -> VortexResult<ArrayRef> {
491 assert!(allowed_cascading > 0);
492 let (top_pvalue, top_count) = stats.typed.top_value_and_count();
493 if top_count as usize == stats.src.len() {
494 return Ok(ConstantArray::new(
496 Scalar::primitive_value(
497 top_pvalue,
498 top_pvalue.ptype(),
499 stats.src.dtype().nullability(),
500 ),
501 stats.src.len(),
502 )
503 .into_array());
504 }
505
506 let sparse_encoded = SparseArray::encode(
507 stats.src.as_ref(),
508 Some(Scalar::primitive_value(
509 top_pvalue,
510 top_pvalue.ptype(),
511 stats.src.dtype().nullability(),
512 )),
513 )?;
514
515 if let Some(sparse) = sparse_encoded.as_opt::<SparseVTable>() {
516 let mut new_excludes = vec![SparseScheme.code()];
518 new_excludes.extend_from_slice(excludes);
519
520 let compressed_values = IntCompressor::compress_no_dict(
521 &sparse.patches().values().to_primitive(),
522 is_sample,
523 allowed_cascading - 1,
524 &new_excludes,
525 )?;
526
527 let indices = sparse.patches().indices().to_primitive().narrow()?;
528
529 let compressed_indices = IntCompressor::compress_no_dict(
530 &indices,
531 is_sample,
532 allowed_cascading - 1,
533 &new_excludes,
534 )?;
535
536 SparseArray::try_new(
537 compressed_indices,
538 compressed_values,
539 sparse.len(),
540 sparse.fill_scalar().clone(),
541 )
542 .map(|a| a.into_array())
543 } else {
544 Ok(sparse_encoded)
545 }
546 }
547}
548
549impl Scheme for DictScheme {
550 type StatsType = IntegerStats;
551 type CodeType = IntCode;
552
553 fn code(&self) -> IntCode {
554 DICT_SCHEME
555 }
556
557 fn expected_compression_ratio(
558 &self,
559 stats: &IntegerStats,
560 _is_sample: bool,
561 allowed_cascading: usize,
562 _excludes: &[IntCode],
563 ) -> VortexResult<f64> {
564 if allowed_cascading == 0 {
566 return Ok(0.0);
567 }
568
569 if stats.value_count == 0 {
570 return Ok(0.0);
571 }
572
573 if stats.distinct_values_count > stats.value_count / 2 {
575 return Ok(0.0);
576 }
577
578 let values_size = stats.source().ptype().bit_width() * stats.distinct_values_count as usize;
580
581 let codes_bw = usize::BITS - stats.distinct_values_count.leading_zeros();
583
584 let n_runs = stats.value_count / stats.average_run_length;
585
586 let codes_size_bp = (codes_bw * stats.value_count) as usize;
588 let codes_size_rle_bp = (codes_bw + 32) * n_runs;
589
590 let codes_size = usize::min(codes_size_bp, codes_size_rle_bp as usize);
591
592 let before = stats.value_count as usize * stats.source().ptype().bit_width();
593
594 Ok(before as f64 / (values_size + codes_size) as f64)
595 }
596
597 fn compress(
598 &self,
599 stats: &IntegerStats,
600 is_sample: bool,
601 allowed_cascading: usize,
602 excludes: &[IntCode],
603 ) -> VortexResult<ArrayRef> {
604 assert!(allowed_cascading > 0);
605
606 let dict = dictionary_encode(stats);
610
611 let mut new_excludes = vec![DICT_SCHEME, SEQUENCE_SCHEME];
614 new_excludes.extend_from_slice(excludes);
615
616 let compressed_codes = IntCompressor::compress_no_dict(
617 &dict.codes().to_primitive().narrow()?,
618 is_sample,
619 allowed_cascading - 1,
620 &new_excludes,
621 )?;
622
623 unsafe {
625 Ok(DictArray::new_unchecked(compressed_codes, dict.values().clone()).into_array())
626 }
627 }
628}
629
630impl Scheme for RunEndScheme {
631 type StatsType = IntegerStats;
632 type CodeType = IntCode;
633
634 fn code(&self) -> IntCode {
635 RUN_END_SCHEME
636 }
637
638 fn expected_compression_ratio(
639 &self,
640 stats: &IntegerStats,
641 is_sample: bool,
642 allowed_cascading: usize,
643 excludes: &[IntCode],
644 ) -> VortexResult<f64> {
645 if stats.average_run_length < RUN_END_THRESHOLD {
647 return Ok(0.0);
648 }
649
650 if allowed_cascading == 0 {
651 return Ok(0.0);
652 }
653
654 estimate_compression_ratio_with_sampling(
656 self,
657 stats,
658 is_sample,
659 allowed_cascading,
660 excludes,
661 )
662 }
663
664 fn compress(
665 &self,
666 stats: &IntegerStats,
667 is_sample: bool,
668 allowed_cascading: usize,
669 excludes: &[IntCode],
670 ) -> VortexResult<ArrayRef> {
671 assert!(allowed_cascading > 0);
672
673 let (ends, values) = runend_encode(&stats.src);
675
676 let mut new_excludes = vec![RunEndScheme.code(), DictScheme.code()];
677 new_excludes.extend_from_slice(excludes);
678
679 let ends_stats = IntegerStats::generate_opts(
680 &ends.to_primitive(),
681 GenerateStatsOptions {
682 count_distinct_values: false,
683 },
684 );
685 let ends_scheme = IntCompressor::choose_scheme(
686 &ends_stats,
687 is_sample,
688 allowed_cascading - 1,
689 &new_excludes,
690 )?;
691 let compressed_ends =
692 ends_scheme.compress(&ends_stats, is_sample, allowed_cascading - 1, &new_excludes)?;
693
694 let compressed_values = IntCompressor::compress_no_dict(
695 &values.to_primitive(),
696 is_sample,
697 allowed_cascading - 1,
698 &new_excludes,
699 )?;
700
701 unsafe {
703 Ok(
704 RunEndArray::new_unchecked(compressed_ends, compressed_values, 0, stats.src.len())
705 .into_array(),
706 )
707 }
708 }
709}
710
711impl Scheme for SequenceScheme {
712 type StatsType = IntegerStats;
713 type CodeType = IntCode;
714
715 fn code(&self) -> Self::CodeType {
716 SEQUENCE_SCHEME
717 }
718
719 fn expected_compression_ratio(
720 &self,
721 stats: &Self::StatsType,
722 _is_sample: bool,
723 _allowed_cascading: usize,
724 _excludes: &[Self::CodeType],
725 ) -> VortexResult<f64> {
726 if stats.null_count > 0 {
727 return Ok(0.0);
728 }
729 Ok(sequence_encode(&stats.src)?
732 .map(|_| stats.src.len() as f64 / 2.0)
733 .unwrap_or(0.0))
734 }
735
736 fn compress(
737 &self,
738 stats: &Self::StatsType,
739 _is_sample: bool,
740 _allowed_cascading: usize,
741 _excludes: &[Self::CodeType],
742 ) -> VortexResult<ArrayRef> {
743 if stats.null_count > 0 {
744 vortex_bail!("sequence encoding does not support nulls");
745 }
746 sequence_encode(&stats.src)?.ok_or_else(|| vortex_err!("cannot sequence encode array"))
747 }
748}
749
750#[cfg(test)]
751mod tests {
752 use std::iter;
753
754 use itertools::Itertools;
755 use log::LevelFilter;
756 use rand::rngs::StdRng;
757 use rand::{RngCore, SeedableRng};
758 use vortex_array::arrays::PrimitiveArray;
759 use vortex_array::validity::Validity;
760 use vortex_array::vtable::ValidityHelper;
761 use vortex_array::{Array, IntoArray, ToCanonical};
762 use vortex_buffer::{Buffer, BufferMut, buffer, buffer_mut};
763 use vortex_dict::DictEncoding;
764 use vortex_sequence::SequenceEncoding;
765 use vortex_sparse::SparseEncoding;
766 use vortex_utils::aliases::hash_set::HashSet;
767
768 use crate::integer::{
769 IntCompressor, IntegerStats, RLE_INTEGER_SCHEME, SequenceScheme, SparseScheme,
770 };
771 use crate::{Compressor, CompressorStats, Scheme};
772
773 #[test]
774 fn test_empty() {
775 let result = IntCompressor::compress(
777 &PrimitiveArray::new(Buffer::<i32>::empty(), Validity::NonNullable),
778 false,
779 3,
780 &[],
781 )
782 .unwrap();
783
784 assert!(result.is_empty());
785 }
786
787 #[test]
788 fn test_dict_encodable() {
789 let mut codes = BufferMut::<i32>::with_capacity(65_535);
790 let numbers = [0, 10, 50, 100, 1000, 3000]
794 .into_iter()
795 .map(|i| 1234 * i)
796 .collect_vec();
797
798 let mut rng = StdRng::seed_from_u64(1u64);
799 while codes.len() < 64000 {
800 let run_length = rng.next_u32() % 5;
801 let value = numbers[rng.next_u32() as usize % numbers.len()];
802 for _ in 0..run_length {
803 codes.push(value);
804 }
805 }
806
807 let primitive = codes.freeze().into_array().to_primitive();
808 let compressed = IntCompressor::compress(&primitive, false, 3, &[]).unwrap();
809 assert_eq!(compressed.encoding_id(), DictEncoding.id());
810 }
811
812 #[test]
813 fn test_window_name() {
814 env_logger::builder()
815 .filter(None, LevelFilter::Debug)
816 .try_init()
817 .ok();
818
819 let mut values = buffer_mut![-1i32; 1_000_000];
821 let mut visited = HashSet::new();
822 let mut rng = StdRng::seed_from_u64(1u64);
823 while visited.len() < 223 {
824 let random = (rng.next_u32() as usize) % 1_000_000;
825 if visited.contains(&random) {
826 continue;
827 }
828 visited.insert(random);
829 values[random] = 5 * (rng.next_u64() % 100) as i32;
831 }
832
833 let array = values.freeze().into_array().to_primitive();
834 let compressed = IntCompressor::compress(&array, false, 3, &[]).unwrap();
835 log::info!("WindowName compressed: {}", compressed.display_tree());
836 }
837
838 #[test]
839 fn sparse_with_nulls() {
840 let array = PrimitiveArray::new(
841 buffer![189u8, 189, 189, 0, 46],
842 Validity::from_iter(vec![true, true, true, true, false]),
843 );
844 let compressed = SparseScheme
845 .compress(&IntegerStats::generate(&array), false, 3, &[])
846 .unwrap();
847 assert_eq!(compressed.encoding_id(), SparseEncoding.id());
848 let decoded = compressed.to_primitive();
849 let expected = [189u8, 189, 189, 0, 0];
850 assert_eq!(decoded.as_slice::<u8>(), &expected);
851 assert_eq!(decoded.validity(), array.validity());
852 }
853
854 #[test]
855 fn sparse_mostly_nulls() {
856 let array = PrimitiveArray::new(
857 buffer![189u8, 189, 189, 189, 189, 189, 189, 189, 189, 0, 46],
858 Validity::from_iter(vec![
859 false, false, false, false, false, false, false, false, false, false, true,
860 ]),
861 );
862 let compressed = SparseScheme
863 .compress(&IntegerStats::generate(&array), false, 3, &[])
864 .unwrap();
865 assert_eq!(compressed.encoding_id(), SparseEncoding.id());
866 let decoded = compressed.to_primitive();
867 let expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46];
868 assert_eq!(decoded.as_slice::<u8>(), &expected);
869 assert_eq!(decoded.validity(), array.validity());
870 }
871
872 #[test]
873 fn nullable_sequence() {
874 let values = (0i32..20).step_by(7).collect_vec();
875 let array = PrimitiveArray::from_option_iter(values.clone().into_iter().map(Some));
876 let compressed = SequenceScheme
877 .compress(&IntegerStats::generate(&array), false, 3, &[])
878 .unwrap();
879 assert_eq!(compressed.encoding_id(), SequenceEncoding.id());
880 let decoded = compressed.to_primitive();
881 assert_eq!(decoded.as_slice::<i32>(), values.as_slice());
882 }
883
884 #[test]
885 fn test_rle_compression() {
886 let mut values = Vec::new();
887 values.extend(iter::repeat_n(42i32, 100));
888 values.extend(iter::repeat_n(123i32, 200));
889 values.extend(iter::repeat_n(987i32, 150));
890
891 let array = PrimitiveArray::new(Buffer::copy_from(&values), Validity::NonNullable);
892 let compressed = RLE_INTEGER_SCHEME
893 .compress(&IntegerStats::generate(&array), false, 3, &[])
894 .unwrap();
895
896 let decoded = compressed.to_primitive();
897 assert_eq!(decoded.as_slice::<i32>(), values.as_slice());
898 }
899}