vortex_array/compute/
is_constant.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::{DType, Nullability};
9use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err};
10use vortex_scalar::Scalar;
11
12use crate::Array;
13use crate::arrays::{ConstantVTable, NullVTable};
14use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
15use crate::stats::{Precision, Stat, StatsProvider, StatsProviderExt};
16use crate::vtable::VTable;
17
18static IS_CONSTANT_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
19    let compute = ComputeFn::new("is_constant".into(), ArcRef::new_ref(&IsConstant));
20    for kernel in inventory::iter::<IsConstantKernelRef> {
21        compute.register_kernel(kernel.0.clone());
22    }
23    compute
24});
25
26pub(crate) fn warm_up_vtable() -> usize {
27    IS_CONSTANT_FN.kernels().len()
28}
29
30/// Computes whether an array has constant values. If the array's encoding doesn't implement the
31/// relevant VTable, it'll try and canonicalize in order to make a determination.
32///
33/// An array is constant IFF at least one of the following conditions apply:
34/// 1. It has at least one element (**Note** - an empty array isn't constant).
35/// 1. It's encoded as a [`crate::arrays::ConstantArray`] or [`crate::arrays::NullArray`]
36/// 1. Has an exact statistic attached to it, saying its constant.
37/// 1. Is all invalid.
38/// 1. Is all valid AND has minimum and maximum statistics that are equal.
39///
40/// If the array has some null values but is not all null, it'll never be constant.
41///
42/// Returns `Ok(None)` if we could not determine whether the array is constant, e.g. if
43/// canonicalization is disabled and the no kernel exists for the array's encoding.
44pub fn is_constant(array: &dyn Array) -> VortexResult<Option<bool>> {
45    let opts = IsConstantOpts::default();
46    is_constant_opts(array, &opts)
47}
48
49/// Computes whether an array has constant values. Configurable by [`IsConstantOpts`].
50///
51/// Please see [`is_constant`] for a more detailed explanation of its behavior.
52pub fn is_constant_opts(array: &dyn Array, options: &IsConstantOpts) -> VortexResult<Option<bool>> {
53    Ok(IS_CONSTANT_FN
54        .invoke(&InvocationArgs {
55            inputs: &[array.into()],
56            options,
57        })?
58        .unwrap_scalar()?
59        .as_bool()
60        .value())
61}
62
63struct IsConstant;
64
65impl ComputeFnVTable for IsConstant {
66    fn invoke(
67        &self,
68        args: &InvocationArgs,
69        kernels: &[ArcRef<dyn Kernel>],
70    ) -> VortexResult<Output> {
71        let IsConstantArgs { array, options } = IsConstantArgs::try_from(args)?;
72
73        // We try and rely on some easy-to-get stats
74        if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsConstant) {
75            return Ok(Scalar::from(Some(value)).into());
76        }
77
78        let value = is_constant_impl(array, options, kernels)?;
79
80        if options.cost == Cost::Canonicalize {
81            // When we run linear canonicalize, there we must always return an exact answer.
82            assert!(
83                value.is_some(),
84                "is constant in array {array} canonicalize returned None"
85            );
86        }
87
88        // Only if we made a determination do we update the stats.
89        if let Some(value) = value {
90            array
91                .statistics()
92                .set(Stat::IsConstant, Precision::Exact(value.into()));
93        }
94
95        Ok(Scalar::from(value).into())
96    }
97
98    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
99        // We always return a nullable boolean where `null` indicates we couldn't determine
100        // whether the array is constant.
101        Ok(DType::Bool(Nullability::Nullable))
102    }
103
104    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
105        Ok(1)
106    }
107
108    fn is_elementwise(&self) -> bool {
109        false
110    }
111}
112
113fn is_constant_impl(
114    array: &dyn Array,
115    options: &IsConstantOpts,
116    kernels: &[ArcRef<dyn Kernel>],
117) -> VortexResult<Option<bool>> {
118    match array.len() {
119        // Our current semantics are that we can always get a value out of a constant array. We might want to change that in the future.
120        0 => return Ok(Some(false)),
121        // Array of length 1 is always constant.
122        1 => return Ok(Some(true)),
123        _ => {}
124    }
125
126    // Constant and null arrays are always constant
127    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
128        return Ok(Some(true));
129    }
130
131    let all_invalid = array.all_invalid();
132    if all_invalid {
133        return Ok(Some(true));
134    }
135
136    let all_valid = array.all_valid();
137
138    // If we have some nulls, array can't be constant
139    if !all_valid && !all_invalid {
140        return Ok(Some(false));
141    }
142
143    // We already know here that the array is all valid, so we check for min/max stats.
144    let min = array.statistics().get(Stat::Min);
145    let max = array.statistics().get(Stat::Max);
146
147    if let Some((min, max)) = min.zip(max) {
148        // min/max are equal and exact and there are no NaNs
149        if min.is_exact()
150            && min == max
151            && (Stat::NaNCount.dtype(array.dtype()).is_none()
152                || array.statistics().get_as::<u64>(Stat::NaNCount) == Some(Precision::exact(0u64)))
153        {
154            return Ok(Some(true));
155        }
156    }
157
158    assert!(
159        all_valid,
160        "All values must be valid as an invariant of the VTable."
161    );
162    let args = InvocationArgs {
163        inputs: &[array.into()],
164        options,
165    };
166    for kernel in kernels {
167        if let Some(output) = kernel.invoke(&args)? {
168            return Ok(output.unwrap_scalar()?.as_bool().value());
169        }
170    }
171    if let Some(output) = array.invoke(&IS_CONSTANT_FN, &args)? {
172        return Ok(output.unwrap_scalar()?.as_bool().value());
173    }
174
175    log::debug!(
176        "No is_constant implementation found for {}",
177        array.encoding_id()
178    );
179
180    if options.cost == Cost::Canonicalize && !array.is_canonical() {
181        let array = array.to_canonical();
182        let is_constant = is_constant_opts(array.as_ref(), options)?;
183        return Ok(is_constant);
184    }
185
186    // Otherwise, we cannot determine if the array is constant.
187    Ok(None)
188}
189
190pub struct IsConstantKernelRef(ArcRef<dyn Kernel>);
191inventory::collect!(IsConstantKernelRef);
192
193pub trait IsConstantKernel: VTable {
194    /// # Preconditions
195    ///
196    /// * All values are valid
197    /// * array.len() > 1
198    ///
199    /// Returns `Ok(None)` to signal we couldn't make an exact determination.
200    fn is_constant(&self, array: &Self::Array, opts: &IsConstantOpts)
201    -> VortexResult<Option<bool>>;
202}
203
204#[derive(Debug)]
205pub struct IsConstantKernelAdapter<V: VTable>(pub V);
206
207impl<V: VTable + IsConstantKernel> IsConstantKernelAdapter<V> {
208    pub const fn lift(&'static self) -> IsConstantKernelRef {
209        IsConstantKernelRef(ArcRef::new_ref(self))
210    }
211}
212
213impl<V: VTable + IsConstantKernel> Kernel for IsConstantKernelAdapter<V> {
214    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
215        let args = IsConstantArgs::try_from(args)?;
216        let Some(array) = args.array.as_opt::<V>() else {
217            return Ok(None);
218        };
219        let is_constant = V::is_constant(&self.0, array, args.options)?;
220        Ok(Some(Scalar::from(is_constant).into()))
221    }
222}
223
224struct IsConstantArgs<'a> {
225    array: &'a dyn Array,
226    options: &'a IsConstantOpts,
227}
228
229impl<'a> TryFrom<&InvocationArgs<'a>> for IsConstantArgs<'a> {
230    type Error = VortexError;
231
232    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
233        if value.inputs.len() != 1 {
234            vortex_bail!("Expected 1 input, found {}", value.inputs.len());
235        }
236        let array = value.inputs[0]
237            .array()
238            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
239        let options = value
240            .options
241            .as_any()
242            .downcast_ref::<IsConstantOpts>()
243            .ok_or_else(|| vortex_err!("Expected options to be of type IsConstantOpts"))?;
244        Ok(Self { array, options })
245    }
246}
247
248/// When calling `is_constant` the children are all checked for constantness.
249/// This enum decide at each precision/cost level the constant check should run as.
250/// The cost increase as we move down the list.
251#[derive(Clone, Copy, Debug, Eq, PartialEq)]
252pub enum Cost {
253    /// Only apply constant time computation to estimate constantness.
254    Negligible,
255    /// Allow the encoding to do a linear amount of work to determine is constant.
256    /// Each encoding should implement short-circuiting make the common case runtime well below
257    /// a linear scan.
258    Specialized,
259    /// Same as linear, but when necessary canonicalize the array and check is constant.
260    /// This *must* always return a known answer.
261    Canonicalize,
262}
263
264/// Configuration for [`is_constant_opts`] operations.
265#[derive(Clone, Debug)]
266pub struct IsConstantOpts {
267    /// What precision cost trade off should be used
268    pub cost: Cost,
269}
270
271impl Default for IsConstantOpts {
272    fn default() -> Self {
273        Self {
274            cost: Cost::Canonicalize,
275        }
276    }
277}
278
279impl Options for IsConstantOpts {
280    fn as_any(&self) -> &dyn Any {
281        self
282    }
283}
284
285impl IsConstantOpts {
286    pub fn is_negligible_cost(&self) -> bool {
287        self.cost == Cost::Negligible
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use vortex_buffer::buffer;
294
295    use crate::IntoArray as _;
296    use crate::arrays::PrimitiveArray;
297    use crate::stats::Stat;
298
299    #[test]
300    fn is_constant_min_max_no_nan() {
301        let arr = buffer![0, 1].into_array();
302        arr.statistics()
303            .compute_all(&[Stat::Min, Stat::Max])
304            .unwrap();
305        assert!(!arr.is_constant());
306
307        let arr = buffer![0, 0].into_array();
308        arr.statistics()
309            .compute_all(&[Stat::Min, Stat::Max])
310            .unwrap();
311        assert!(arr.is_constant());
312
313        let arr = PrimitiveArray::from_option_iter([Some(0), Some(0)]);
314        assert!(arr.is_constant());
315    }
316
317    #[test]
318    fn is_constant_min_max_with_nan() {
319        let arr = PrimitiveArray::from_iter([0.0, 0.0, f32::NAN]);
320        arr.statistics()
321            .compute_all(&[Stat::Min, Stat::Max])
322            .unwrap();
323        assert!(!arr.is_constant());
324
325        let arr =
326            PrimitiveArray::from_option_iter([Some(f32::NEG_INFINITY), Some(f32::NEG_INFINITY)]);
327        arr.statistics()
328            .compute_all(&[Stat::Min, Stat::Max])
329            .unwrap();
330        assert!(arr.is_constant());
331    }
332}