Skip to main content

vortex_btrblocks/schemes/float/
sparse.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Sparse encoding for null-dominated float arrays.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_array::IntoArray;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::arrays::primitive::PrimitiveArrayExt;
12use vortex_compressor::estimate::CompressionEstimate;
13use vortex_compressor::estimate::EstimateVerdict;
14use vortex_compressor::scheme::ChildSelection;
15use vortex_compressor::scheme::DescendantExclusion;
16use vortex_error::VortexResult;
17use vortex_sparse::Sparse;
18use vortex_sparse::SparseExt as _;
19
20use crate::ArrayAndStats;
21use crate::CascadingCompressor;
22use crate::CompressorContext;
23use crate::Scheme;
24use crate::SchemeExt;
25use crate::schemes::integer::SparseScheme as IntSparseScheme;
26
27/// Sparse encoding for null-dominated float arrays.
28///
29/// This is the same as the integer `SparseScheme`, but we only use this for null-dominated arrays.
30#[derive(Debug, Copy, Clone, PartialEq, Eq)]
31pub struct NullDominatedSparseScheme;
32
33impl Scheme for NullDominatedSparseScheme {
34    fn scheme_name(&self) -> &'static str {
35        "vortex.float.sparse"
36    }
37
38    fn matches(&self, canonical: &Canonical) -> bool {
39        canonical.dtype().is_float()
40    }
41
42    /// Children: indices=0.
43    fn num_children(&self) -> usize {
44        1
45    }
46
47    /// The indices of a null-dominated sparse array should not be sparse-encoded again.
48    fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
49        vec![DescendantExclusion {
50            excluded: IntSparseScheme.id(),
51            children: ChildSelection::All,
52        }]
53    }
54
55    fn expected_compression_ratio(
56        &self,
57        data: &ArrayAndStats,
58        _compress_ctx: CompressorContext,
59        exec_ctx: &mut ExecutionCtx,
60    ) -> CompressionEstimate {
61        let len = data.array_len() as f64;
62        let stats = data.float_stats(exec_ctx);
63        let value_count = stats.value_count();
64
65        // All-null arrays should be compressed as constant instead anyways.
66        if value_count == 0 {
67            return CompressionEstimate::Verdict(EstimateVerdict::Skip);
68        }
69
70        // If the majority (90%) of values is null, this will compress well.
71        if stats.null_count() as f64 / len > 0.9 {
72            return CompressionEstimate::Verdict(EstimateVerdict::Ratio(len / value_count as f64));
73        }
74
75        // Otherwise we don't go this route.
76        CompressionEstimate::Verdict(EstimateVerdict::Skip)
77    }
78
79    fn compress(
80        &self,
81        compressor: &CascadingCompressor,
82        data: &ArrayAndStats,
83        compress_ctx: CompressorContext,
84        exec_ctx: &mut ExecutionCtx,
85    ) -> VortexResult<ArrayRef> {
86        // We pass None as we only run this pathway for NULL-dominated float arrays.
87        let sparse_encoded = Sparse::encode(data.array(), None, exec_ctx)?;
88
89        if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
90            let indices = sparse
91                .patches()
92                .indices()
93                .clone()
94                .execute::<PrimitiveArray>(exec_ctx)?
95                .narrow(exec_ctx)?;
96            let compressed_indices = compressor.compress_child(
97                &indices.into_array(),
98                &compress_ctx,
99                self.id(),
100                0,
101                exec_ctx,
102            )?;
103
104            Sparse::try_new(
105                compressed_indices,
106                sparse.patches().values().clone(),
107                sparse.len(),
108                sparse.fill_scalar().clone(),
109            )
110            .map(|a| a.into_array())
111        } else {
112            Ok(sparse_encoded)
113        }
114    }
115}