vortex_array/compute/
is_sorted.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::{DType, Nullability};
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
10use vortex_scalar::Scalar;
11
12use crate::Array;
13use crate::arrays::{ConstantVTable, NullVTable};
14use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
15use crate::stats::{Precision, Stat, StatsProviderExt};
16use crate::vtable::VTable;
17
18static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
19    let compute = ComputeFn::new("is_sorted".into(), ArcRef::new_ref(&IsSorted));
20    for kernel in inventory::iter::<IsSortedKernelRef> {
21        compute.register_kernel(kernel.0.clone());
22    }
23    compute
24});
25
26pub fn is_sorted(array: &dyn Array) -> VortexResult<bool> {
27    is_sorted_opts(array, false)
28}
29
30pub fn is_strict_sorted(array: &dyn Array) -> VortexResult<bool> {
31    is_sorted_opts(array, true)
32}
33
34pub fn is_sorted_opts(array: &dyn Array, strict: bool) -> VortexResult<bool> {
35    Ok(IS_SORTED_FN
36        .invoke(&InvocationArgs {
37            inputs: &[array.into()],
38            options: &IsSortedOptions { strict },
39        })?
40        .unwrap_scalar()?
41        .as_bool()
42        .value()
43        .vortex_expect("non-nullable"))
44}
45
46struct IsSorted;
47impl ComputeFnVTable for IsSorted {
48    fn invoke(
49        &self,
50        args: &InvocationArgs,
51        kernels: &[ArcRef<dyn Kernel>],
52    ) -> VortexResult<Output> {
53        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
54
55        // We currently don't support sorting struct arrays.
56        if array.dtype().is_struct() {
57            return Ok(Scalar::from(false).into());
58        }
59
60        let is_sorted = if strict {
61            if let Some(Precision::Exact(value)) =
62                array.statistics().get_as::<bool>(Stat::IsStrictSorted)
63            {
64                return Ok(Scalar::from(value).into());
65            }
66
67            let is_strict_sorted = is_sorted_impl(array, kernels, true)?;
68            let array_stats = array.statistics();
69
70            if is_strict_sorted {
71                array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
72                array_stats.set(Stat::IsStrictSorted, Precision::Exact(true.into()));
73            } else {
74                array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
75            }
76
77            is_strict_sorted
78        } else {
79            if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsSorted)
80            {
81                return Ok(Scalar::from(value).into());
82            }
83
84            let is_sorted = is_sorted_impl(array, kernels, false)?;
85            let array_stats = array.statistics();
86
87            if is_sorted {
88                array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
89            } else {
90                array_stats.set(Stat::IsSorted, Precision::Exact(false.into()));
91                array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
92            }
93
94            is_sorted
95        };
96
97        Ok(Scalar::from(is_sorted).into())
98    }
99
100    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
101        Ok(DType::Bool(Nullability::NonNullable))
102    }
103
104    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
105        Ok(1)
106    }
107
108    fn is_elementwise(&self) -> bool {
109        true
110    }
111}
112
113struct IsSortedArgs<'a> {
114    array: &'a dyn Array,
115    strict: bool,
116}
117
118impl<'a> TryFrom<&InvocationArgs<'a>> for IsSortedArgs<'a> {
119    type Error = VortexError;
120
121    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
122        if value.inputs.len() != 1 {
123            vortex_bail!(
124                "IsSorted function requires exactly one argument, got {}",
125                value.inputs.len()
126            );
127        }
128        let array = value.inputs[0]
129            .array()
130            .ok_or_else(|| vortex_err!("Invalid argument type for is sorted function"))?;
131        let options = *value
132            .options
133            .as_any()
134            .downcast_ref::<IsSortedOptions>()
135            .ok_or_else(|| vortex_err!("Invalid options type for is sorted function"))?;
136
137        Ok(IsSortedArgs {
138            array,
139            strict: options.strict,
140        })
141    }
142}
143
144#[derive(Clone, Copy)]
145struct IsSortedOptions {
146    strict: bool,
147}
148
149impl Options for IsSortedOptions {
150    fn as_any(&self) -> &dyn Any {
151        self
152    }
153}
154
155pub struct IsSortedKernelRef(ArcRef<dyn Kernel>);
156inventory::collect!(IsSortedKernelRef);
157
158#[derive(Debug)]
159pub struct IsSortedKernelAdapter<V: VTable>(pub V);
160
161impl<V: VTable + IsSortedKernel> IsSortedKernelAdapter<V> {
162    pub const fn lift(&'static self) -> IsSortedKernelRef {
163        IsSortedKernelRef(ArcRef::new_ref(self))
164    }
165}
166
167impl<V: VTable + IsSortedKernel> Kernel for IsSortedKernelAdapter<V> {
168    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
169        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
170        let Some(array) = array.as_opt::<V>() else {
171            return Ok(None);
172        };
173
174        let is_sorted = if strict {
175            V::is_strict_sorted(&self.0, array)?
176        } else {
177            V::is_sorted(&self.0, array)?
178        };
179
180        Ok(Some(Scalar::from(is_sorted).into()))
181    }
182}
183
184pub trait IsSortedKernel: VTable {
185    /// # Preconditions
186    /// - The array's length is > 1.
187    /// - The array is not encoded as `NullArray` or `ConstantArray`.
188    /// - If doing a `strict` check, if the array is nullable, it'll have at most 1 null element
189    ///   as the first item in the array.
190    fn is_sorted(&self, array: &Self::Array) -> VortexResult<bool>;
191
192    fn is_strict_sorted(&self, array: &Self::Array) -> VortexResult<bool>;
193}
194
195#[allow(clippy::wrong_self_convention)]
196/// Helper methods to check sortedness with strictness
197pub trait IsSortedIteratorExt: Iterator
198where
199    <Self as Iterator>::Item: PartialOrd,
200{
201    fn is_strict_sorted(self) -> bool
202    where
203        Self: Sized,
204        Self::Item: PartialOrd,
205    {
206        self.is_sorted_by(|a, b| a < b)
207    }
208}
209
210impl<T> IsSortedIteratorExt for T
211where
212    T: Iterator + ?Sized,
213    T::Item: PartialOrd,
214{
215}
216
217fn is_sorted_impl(
218    array: &dyn Array,
219    kernels: &[ArcRef<dyn Kernel>],
220    strict: bool,
221) -> VortexResult<bool> {
222    // Arrays with 0 or 1 elements are strict sorted.
223    if array.len() <= 1 {
224        return Ok(true);
225    }
226
227    // Constant and null arrays are always sorted, but not strict sorted.
228    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
229        return Ok(!strict);
230    }
231
232    // Enforce strictness before we even try to check if the array is sorted.
233    if strict {
234        let invalid_count = array.invalid_count()?;
235        match invalid_count {
236            // We can keep going
237            0 => {}
238            // If we have a potential null value - it has to be the first one.
239            1 => {
240                if !array.is_invalid(0)? {
241                    return Ok(false);
242                }
243            }
244            _ => return Ok(false),
245        }
246    }
247
248    let args = InvocationArgs {
249        inputs: &[array.into()],
250        options: &IsSortedOptions { strict },
251    };
252
253    for kernel in kernels {
254        if let Some(output) = kernel.invoke(&args)? {
255            return Ok(output
256                .unwrap_scalar()?
257                .as_bool()
258                .value()
259                .vortex_expect("non-nullable"));
260        }
261    }
262    if let Some(output) = array.invoke(&IS_SORTED_FN, &args)? {
263        return Ok(output
264            .unwrap_scalar()?
265            .as_bool()
266            .value()
267            .vortex_expect("non-nullable"));
268    }
269
270    if !array.is_canonical() {
271        log::debug!(
272            "No is_sorted implementation found for {}",
273            array.encoding_id()
274        );
275
276        // Recurse to canonical implementation
277        let array = array.to_canonical()?;
278
279        return if strict {
280            is_strict_sorted(array.as_ref())
281        } else {
282            is_sorted(array.as_ref())
283        };
284    }
285
286    vortex_bail!(
287        "No is_sorted function for canonical array: {}",
288        array.encoding_id(),
289    )
290}
291
292#[cfg(test)]
293mod tests {
294    use vortex_buffer::buffer;
295
296    use crate::IntoArray;
297    use crate::arrays::{BoolArray, PrimitiveArray};
298    use crate::compute::{is_sorted, is_strict_sorted};
299    use crate::validity::Validity;
300    #[test]
301    fn test_is_sorted() {
302        assert!(
303            is_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
304                .unwrap()
305        );
306        assert!(
307            is_sorted(
308                PrimitiveArray::new(
309                    buffer!(0, 1, 2, 3),
310                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
311                )
312                .as_ref()
313            )
314            .unwrap()
315        );
316        assert!(
317            !is_sorted(
318                PrimitiveArray::new(
319                    buffer!(0, 1, 2, 3),
320                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array())
321                )
322                .as_ref()
323            )
324            .unwrap()
325        );
326
327        assert!(
328            !is_sorted(PrimitiveArray::new(buffer!(0, 1, 3, 2), Validity::AllValid).as_ref())
329                .unwrap()
330        );
331        assert!(
332            !is_sorted(
333                PrimitiveArray::new(
334                    buffer!(0, 1, 3, 2),
335                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
336                )
337                .as_ref()
338            )
339            .unwrap(),
340        );
341    }
342
343    #[test]
344    fn test_is_strict_sorted() {
345        assert!(
346            is_strict_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
347                .unwrap()
348        );
349        assert!(
350            is_strict_sorted(
351                PrimitiveArray::new(
352                    buffer!(0, 1, 2, 3),
353                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
354                )
355                .as_ref()
356            )
357            .unwrap()
358        );
359        assert!(
360            !is_strict_sorted(
361                PrimitiveArray::new(
362                    buffer!(0, 1, 2, 3),
363                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
364                )
365                .as_ref()
366            )
367            .unwrap(),
368        );
369
370        assert!(
371            !is_strict_sorted(
372                PrimitiveArray::new(
373                    buffer!(0, 1, 3, 2),
374                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
375                )
376                .as_ref()
377            )
378            .unwrap(),
379        );
380    }
381}