Skip to main content

vortex_array/compute/
is_sorted.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_error::VortexError;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_err;
12
13use crate::Array;
14use crate::ArrayRef;
15use crate::IntoArray as _;
16use crate::arrays::ConstantVTable;
17use crate::arrays::NullVTable;
18use crate::compute::ComputeFn;
19use crate::compute::ComputeFnVTable;
20use crate::compute::InvocationArgs;
21use crate::compute::Kernel;
22use crate::compute::Options;
23use crate::compute::Output;
24use crate::dtype::DType;
25use crate::dtype::Nullability;
26use crate::expr::stats::Precision;
27use crate::expr::stats::Stat;
28use crate::expr::stats::StatsProviderExt;
29use crate::scalar::Scalar;
30use crate::vtable::VTable;
31
32static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
33    let compute = ComputeFn::new("is_sorted".into(), ArcRef::new_ref(&IsSorted));
34    for kernel in inventory::iter::<IsSortedKernelRef> {
35        compute.register_kernel(kernel.0.clone());
36    }
37    compute
38});
39
40pub(crate) fn warm_up_vtable() -> usize {
41    IS_SORTED_FN.kernels().len()
42}
43
44pub fn is_sorted(array: &ArrayRef) -> VortexResult<Option<bool>> {
45    is_sorted_opts(array, false)
46}
47
48pub fn is_strict_sorted(array: &ArrayRef) -> VortexResult<Option<bool>> {
49    is_sorted_opts(array, true)
50}
51
52pub fn is_sorted_opts(array: &ArrayRef, strict: bool) -> VortexResult<Option<bool>> {
53    Ok(IS_SORTED_FN
54        .invoke(&InvocationArgs {
55            inputs: &[array.into()],
56            options: &IsSortedOptions { strict },
57        })?
58        .unwrap_scalar()?
59        .as_bool()
60        .value())
61}
62
63struct IsSorted;
64impl ComputeFnVTable for IsSorted {
65    fn invoke(
66        &self,
67        args: &InvocationArgs,
68        kernels: &[ArcRef<dyn Kernel>],
69    ) -> VortexResult<Output> {
70        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
71        let array = array.to_array();
72
73        // We currently don't support sorting struct arrays.
74        if array.dtype().is_struct() {
75            let scalar: Scalar = Some(false).into();
76            return Ok(scalar.into());
77        }
78
79        let is_sorted = if strict {
80            if let Some(Precision::Exact(value)) =
81                array.statistics().get_as::<bool>(Stat::IsStrictSorted)
82            {
83                let scalar: Scalar = Some(value).into();
84                return Ok(scalar.into());
85            }
86
87            let is_strict_sorted = is_sorted_impl(&array, kernels, true)?;
88            let array_stats = array.statistics();
89
90            if is_strict_sorted.is_some() {
91                if is_strict_sorted.unwrap_or(false) {
92                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
93                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(true.into()));
94                } else {
95                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
96                }
97            }
98
99            is_strict_sorted
100        } else {
101            if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsSorted)
102            {
103                let scalar: Scalar = Some(value).into();
104                return Ok(scalar.into());
105            }
106
107            let is_sorted = is_sorted_impl(&array, kernels, false)?;
108            let array_stats = array.statistics();
109
110            if is_sorted.is_some() {
111                if is_sorted.unwrap_or(false) {
112                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
113                } else {
114                    array_stats.set(Stat::IsSorted, Precision::Exact(false.into()));
115                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
116                }
117            }
118
119            is_sorted
120        };
121
122        let scalar: Scalar = is_sorted.into();
123        Ok(scalar.into())
124    }
125
126    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
127        // We always return a nullable boolean where `null` indicates we couldn't determine
128        // whether the array is constant.
129        Ok(DType::Bool(Nullability::Nullable))
130    }
131
132    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
133        Ok(1)
134    }
135
136    fn is_elementwise(&self) -> bool {
137        true
138    }
139}
140
141struct IsSortedArgs<'a> {
142    array: &'a dyn Array,
143    strict: bool,
144}
145
146impl<'a> TryFrom<&InvocationArgs<'a>> for IsSortedArgs<'a> {
147    type Error = VortexError;
148
149    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
150        if value.inputs.len() != 1 {
151            vortex_bail!(
152                "IsSorted function requires exactly one argument, got {}",
153                value.inputs.len()
154            );
155        }
156        let array = value.inputs[0]
157            .array()
158            .ok_or_else(|| vortex_err!("Invalid argument type for is sorted function"))?;
159        let options = *value
160            .options
161            .as_any()
162            .downcast_ref::<IsSortedOptions>()
163            .ok_or_else(|| vortex_err!("Invalid options type for is sorted function"))?;
164
165        Ok(IsSortedArgs {
166            array,
167            strict: options.strict,
168        })
169    }
170}
171
172#[derive(Clone, Copy)]
173struct IsSortedOptions {
174    strict: bool,
175}
176
177impl Options for IsSortedOptions {
178    fn as_any(&self) -> &dyn Any {
179        self
180    }
181}
182
183pub struct IsSortedKernelRef(ArcRef<dyn Kernel>);
184inventory::collect!(IsSortedKernelRef);
185
186#[derive(Debug)]
187pub struct IsSortedKernelAdapter<V: VTable>(pub V);
188
189impl<V: VTable + IsSortedKernel> IsSortedKernelAdapter<V> {
190    pub const fn lift(&'static self) -> IsSortedKernelRef {
191        IsSortedKernelRef(ArcRef::new_ref(self))
192    }
193}
194
195impl<V: VTable + IsSortedKernel> Kernel for IsSortedKernelAdapter<V> {
196    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
197        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
198        let Some(array) = array.as_opt::<V>() else {
199            return Ok(None);
200        };
201
202        let is_sorted = if strict {
203            V::is_strict_sorted(&self.0, array)?
204        } else {
205            V::is_sorted(&self.0, array)?
206        };
207
208        let scalar: Scalar = is_sorted.into();
209        Ok(Some(scalar.into()))
210    }
211}
212
213pub trait IsSortedKernel: VTable {
214    /// # Preconditions
215    /// - The array's length is > 1.
216    /// - The array is not encoded as `NullArray` or `ConstantArray`.
217    /// - If doing a `strict` check, if the array is nullable, it'll have at most 1 null element
218    ///   as the first item in the array.
219    fn is_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
220
221    fn is_strict_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
222}
223
224#[expect(
225    clippy::wrong_self_convention,
226    reason = "is_* naming follows Iterator::is_sorted convention"
227)]
228/// Helper methods to check sortedness with strictness
229pub trait IsSortedIteratorExt: Iterator
230where
231    <Self as Iterator>::Item: PartialOrd,
232{
233    fn is_strict_sorted(self) -> bool
234    where
235        Self: Sized,
236        Self::Item: PartialOrd,
237    {
238        self.is_sorted_by(|a, b| a < b)
239    }
240}
241
242impl<T> IsSortedIteratorExt for T
243where
244    T: Iterator + ?Sized,
245    T::Item: PartialOrd,
246{
247}
248
249fn is_sorted_impl(
250    array: &ArrayRef,
251    kernels: &[ArcRef<dyn Kernel>],
252    strict: bool,
253) -> VortexResult<Option<bool>> {
254    // Arrays with 0 or 1 elements are strict sorted.
255    if array.len() <= 1 {
256        return Ok(Some(true));
257    }
258
259    // Constant and null arrays are always sorted, but not strict sorted.
260    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
261        return Ok(Some(!strict));
262    }
263
264    // Enforce strictness before we even try to check if the array is sorted.
265    if strict {
266        let invalid_count = array.invalid_count()?;
267        match invalid_count {
268            // We can keep going
269            0 => {}
270            // If we have a potential null value - it has to be the first one.
271            1 => {
272                if !array.is_invalid(0)? {
273                    return Ok(Some(false));
274                }
275            }
276            _ => return Ok(Some(false)),
277        }
278    }
279
280    let args = InvocationArgs {
281        inputs: &[array.into()],
282        options: &IsSortedOptions { strict },
283    };
284
285    for kernel in kernels {
286        if let Some(output) = kernel.invoke(&args)? {
287            return Ok(output.unwrap_scalar()?.as_bool().value());
288        }
289    }
290
291    if !array.is_canonical() {
292        tracing::debug!(
293            "No is_sorted implementation found for {}",
294            array.encoding_id()
295        );
296
297        // Recurse to canonical implementation
298        let array = array.to_canonical()?.into_array();
299
300        return if strict {
301            is_strict_sorted(&array)
302        } else {
303            is_sorted(&array)
304        };
305    }
306
307    vortex_bail!(
308        "No is_sorted function for canonical array: {}",
309        array.encoding_id(),
310    )
311}
312
313#[cfg(test)]
314mod tests {
315    use vortex_buffer::buffer;
316
317    use crate::IntoArray;
318    use crate::arrays::BoolArray;
319    use crate::arrays::PrimitiveArray;
320    use crate::compute::is_sorted;
321    use crate::compute::is_strict_sorted;
322    use crate::validity::Validity;
323    #[test]
324    fn test_is_sorted() {
325        let arr = PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).into_array();
326        assert!(is_sorted(&arr).unwrap().unwrap());
327
328        let arr = PrimitiveArray::new(
329            buffer!(0, 1, 2, 3),
330            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
331        )
332        .into_array();
333        assert!(is_sorted(&arr).unwrap().unwrap());
334
335        let arr = PrimitiveArray::new(
336            buffer!(0, 1, 2, 3),
337            Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
338        )
339        .into_array();
340        assert!(!is_sorted(&arr).unwrap().unwrap());
341
342        let arr = PrimitiveArray::new(buffer!(0, 1, 3, 2), Validity::AllValid).into_array();
343        assert!(!is_sorted(&arr).unwrap().unwrap());
344
345        let arr = PrimitiveArray::new(
346            buffer!(0, 1, 3, 2),
347            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
348        )
349        .into_array();
350        assert!(!is_sorted(&arr).unwrap().unwrap());
351    }
352
353    #[test]
354    fn test_is_strict_sorted() {
355        let arr = PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).into_array();
356        assert!(is_strict_sorted(&arr).unwrap().unwrap());
357
358        let arr = PrimitiveArray::new(
359            buffer!(0, 1, 2, 3),
360            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
361        )
362        .into_array();
363        assert!(is_strict_sorted(&arr).unwrap().unwrap());
364
365        let arr = PrimitiveArray::new(
366            buffer!(0, 1, 2, 3),
367            Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
368        )
369        .into_array();
370        assert!(!is_strict_sorted(&arr).unwrap().unwrap());
371
372        let arr = PrimitiveArray::new(
373            buffer!(0, 1, 3, 2),
374            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
375        )
376        .into_array();
377        assert!(!is_strict_sorted(&arr).unwrap().unwrap());
378    }
379}