vortex_compressor/builtins/constant/
string.rs1use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::aggregate_fn::fns::is_constant::is_constant;
9use vortex_error::VortexResult;
10
11use super::is_utf8_string;
12use crate::CascadingCompressor;
13use crate::builtins::StringConstantScheme;
14use crate::builtins::constant::compress_constant_array_with_validity;
15use crate::ctx::CompressorContext;
16use crate::estimate::CompressionEstimate;
17use crate::estimate::DeferredEstimate;
18use crate::estimate::EstimateVerdict;
19use crate::scheme::Scheme;
20use crate::stats::ArrayAndStats;
21
22impl Scheme for StringConstantScheme {
23 fn scheme_name(&self) -> &'static str {
24 "vortex.string.constant"
25 }
26
27 fn matches(&self, canonical: &Canonical) -> bool {
28 is_utf8_string(canonical)
29 }
30
31 fn expected_compression_ratio(
32 &self,
33 data: &mut ArrayAndStats,
34 ctx: CompressorContext,
35 ) -> CompressionEstimate {
36 if ctx.is_sample() {
39 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
40 }
41
42 let array_len = data.array().len();
43 let stats = data.string_stats();
44
45 if stats.value_count() == 0 {
47 debug_assert_eq!(stats.null_count() as usize, array_len);
48 return CompressionEstimate::Verdict(EstimateVerdict::AlwaysUse);
49 }
50
51 if stats.estimated_distinct_count().is_some_and(|c| c > 1) {
54 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
55 }
56
57 CompressionEstimate::Deferred(DeferredEstimate::Callback(Box::new(
61 |compressor, data, _ctx| {
62 if is_constant(data.array(), &mut compressor.execution_ctx())? {
63 Ok(EstimateVerdict::AlwaysUse)
64 } else {
65 Ok(EstimateVerdict::Skip)
66 }
67 },
68 )))
69 }
70
71 fn compress(
72 &self,
73 _compressor: &CascadingCompressor,
74 data: &mut ArrayAndStats,
75 _ctx: CompressorContext,
76 ) -> VortexResult<ArrayRef> {
77 compress_constant_array_with_validity(data.array())
78 }
79}