vortex_compressor/builtins/constant/
string.rs1use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::aggregate_fn::fns::is_constant::is_constant;
9use vortex_error::VortexResult;
10
11use super::is_utf8_string;
12use crate::CascadingCompressor;
13use crate::builtins::StringConstantScheme;
14use crate::builtins::constant::compress_constant_array_with_validity;
15use crate::ctx::CompressorContext;
16use crate::estimate::CompressionEstimate;
17use crate::scheme::Scheme;
18use crate::stats::ArrayAndStats;
19
20impl Scheme for StringConstantScheme {
21 fn scheme_name(&self) -> &'static str {
22 "vortex.string.constant"
23 }
24
25 fn matches(&self, canonical: &Canonical) -> bool {
26 is_utf8_string(canonical)
27 }
28
29 fn expected_compression_ratio(
30 &self,
31 data: &mut ArrayAndStats,
32 ctx: CompressorContext,
33 ) -> CompressionEstimate {
34 if ctx.is_sample() {
37 return CompressionEstimate::Skip;
38 }
39
40 let array_len = data.array().len();
41 let stats = data.string_stats();
42
43 if stats.value_count() == 0 {
45 debug_assert_eq!(stats.null_count() as usize, array_len);
46 return CompressionEstimate::AlwaysUse;
47 }
48
49 if stats.estimated_distinct_count().is_some_and(|c| c > 1) {
52 return CompressionEstimate::Skip;
53 }
54
55 CompressionEstimate::Estimate(Box::new(|compressor, data, _ctx| {
59 if is_constant(data.array(), &mut compressor.execution_ctx())? {
60 Ok(CompressionEstimate::AlwaysUse)
61 } else {
62 Ok(CompressionEstimate::Skip)
63 }
64 }))
65 }
66
67 fn compress(
68 &self,
69 _compressor: &CascadingCompressor,
70 data: &mut ArrayAndStats,
71 _ctx: CompressorContext,
72 ) -> VortexResult<ArrayRef> {
73 compress_constant_array_with_validity(data.array())
74 }
75}