vortex_btrblocks/schemes/integer/
sequence.rs1use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_compressor::builtins::BinaryDictScheme;
10use vortex_compressor::builtins::FloatDictScheme;
11use vortex_compressor::builtins::IntDictScheme;
12use vortex_compressor::builtins::StringDictScheme;
13use vortex_compressor::estimate::CompressionEstimate;
14use vortex_compressor::estimate::DeferredEstimate;
15use vortex_compressor::estimate::EstimateScore;
16use vortex_compressor::estimate::EstimateVerdict;
17use vortex_compressor::scheme::AncestorExclusion;
18use vortex_compressor::scheme::ChildSelection;
19use vortex_error::VortexResult;
20use vortex_error::vortex_bail;
21use vortex_error::vortex_err;
22use vortex_sequence::sequence_encode;
23
24use crate::ArrayAndStats;
25use crate::CascadingCompressor;
26use crate::CompressorContext;
27use crate::Scheme;
28use crate::SchemeExt;
29
30#[derive(Debug, Copy, Clone, PartialEq, Eq)]
32pub struct SequenceScheme;
33
34impl Scheme for SequenceScheme {
35 fn scheme_name(&self) -> &'static str {
36 "vortex.int.sequence"
37 }
38
39 fn matches(&self, canonical: &Canonical) -> bool {
40 canonical.dtype().is_int()
41 }
42
43 fn ancestor_exclusions(&self) -> Vec<AncestorExclusion> {
47 vec![
48 AncestorExclusion {
49 ancestor: IntDictScheme.id(),
50 children: ChildSelection::One(1),
51 },
52 AncestorExclusion {
53 ancestor: FloatDictScheme.id(),
54 children: ChildSelection::One(1),
55 },
56 AncestorExclusion {
57 ancestor: StringDictScheme.id(),
58 children: ChildSelection::One(1),
59 },
60 AncestorExclusion {
61 ancestor: BinaryDictScheme.id(),
62 children: ChildSelection::One(1),
63 },
64 ]
65 }
66
67 fn expected_compression_ratio(
68 &self,
69 data: &ArrayAndStats,
70 compress_ctx: CompressorContext,
71 exec_ctx: &mut ExecutionCtx,
72 ) -> CompressionEstimate {
73 if compress_ctx.is_sample() {
76 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
77 }
78 let stats = data.integer_stats(exec_ctx);
79
80 if stats.null_count() > 0 {
82 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
83 }
84
85 if stats
88 .distinct_count()
89 .is_some_and(|count| count as usize != data.array_len())
90 {
91 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
92 }
93
94 CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
97 |_compressor, data, best_so_far, _ctx, exec_ctx| {
98 let compressed_size = 2usize;
101 let max_ratio = data.array_len() as f64 / compressed_size as f64;
102
103 let threshold = best_so_far.and_then(EstimateScore::finite_ratio);
106 if threshold.is_some_and(|t| max_ratio <= t) {
107 return Ok(EstimateVerdict::Skip);
108 }
109
110 if sequence_encode(data.array_as_primitive(), exec_ctx)?.is_none() {
113 return Ok(EstimateVerdict::Skip);
114 }
115 Ok(EstimateVerdict::Ratio(max_ratio))
117 },
118 )))
119 }
120
121 fn compress(
122 &self,
123 _compressor: &CascadingCompressor,
124 data: &ArrayAndStats,
125 _compress_ctx: CompressorContext,
126 exec_ctx: &mut ExecutionCtx,
127 ) -> VortexResult<ArrayRef> {
128 let stats = data.integer_stats(exec_ctx);
129
130 if stats.null_count() > 0 {
131 vortex_bail!("sequence encoding does not support nulls");
132 }
133 sequence_encode(data.array_as_primitive(), exec_ctx)?
134 .ok_or_else(|| vortex_err!("cannot sequence encode array"))
135 }
136}