vortex_btrblocks/schemes/string/
sparse.rs1use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_array::IntoArray;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::arrays::primitive::PrimitiveArrayExt;
12use vortex_compressor::estimate::CompressionEstimate;
13use vortex_compressor::estimate::EstimateVerdict;
14use vortex_compressor::scheme::ChildSelection;
15use vortex_compressor::scheme::DescendantExclusion;
16use vortex_error::VortexResult;
17use vortex_sparse::Sparse;
18use vortex_sparse::SparseExt as _;
19
20use crate::ArrayAndStats;
21use crate::CascadingCompressor;
22use crate::CompressorContext;
23use crate::Scheme;
24use crate::SchemeExt;
25use crate::schemes::integer::IntDictScheme;
26use crate::schemes::integer::SparseScheme as IntSparseScheme;
27
28#[derive(Debug, Copy, Clone, PartialEq, Eq)]
32pub struct NullDominatedSparseScheme;
33
34impl Scheme for NullDominatedSparseScheme {
35 fn scheme_name(&self) -> &'static str {
36 "vortex.string.sparse"
37 }
38
39 fn matches(&self, canonical: &Canonical) -> bool {
40 canonical.dtype().is_utf8()
41 }
42
43 fn num_children(&self) -> usize {
45 1
46 }
47
48 fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
50 vec![
51 DescendantExclusion {
52 excluded: IntSparseScheme.id(),
53 children: ChildSelection::All,
54 },
55 DescendantExclusion {
56 excluded: IntDictScheme.id(),
57 children: ChildSelection::All,
58 },
59 ]
60 }
61
62 fn expected_compression_ratio(
63 &self,
64 data: &ArrayAndStats,
65 _compress_ctx: CompressorContext,
66 exec_ctx: &mut ExecutionCtx,
67 ) -> CompressionEstimate {
68 let len = data.array_len() as f64;
69 let stats = data.varbinview_stats(exec_ctx);
70 let value_count = stats.value_count();
71
72 if value_count == 0 {
74 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
75 }
76
77 if stats.null_count() as f64 / len > 0.9 {
79 return CompressionEstimate::Verdict(EstimateVerdict::Ratio(len / value_count as f64));
80 }
81
82 CompressionEstimate::Verdict(EstimateVerdict::Skip)
84 }
85
86 fn compress(
87 &self,
88 compressor: &CascadingCompressor,
89 data: &ArrayAndStats,
90 compress_ctx: CompressorContext,
91 exec_ctx: &mut ExecutionCtx,
92 ) -> VortexResult<ArrayRef> {
93 let sparse_encoded = Sparse::encode(data.array(), None, exec_ctx)?;
95
96 if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
97 let indices = sparse
99 .patches()
100 .indices()
101 .clone()
102 .execute::<PrimitiveArray>(exec_ctx)?
103 .narrow(exec_ctx)?;
104 let compressed_indices = compressor.compress_child(
105 &indices.into_array(),
106 &compress_ctx,
107 self.id(),
108 0,
109 exec_ctx,
110 )?;
111
112 Sparse::try_new(
113 compressed_indices,
114 sparse.patches().values().clone(),
115 sparse.len(),
116 sparse.fill_scalar().clone(),
117 )
118 .map(|a| a.into_array())
119 } else {
120 Ok(sparse_encoded)
121 }
122 }
123}