vortex_array/compute/
is_sorted.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::{DType, Nullability};
9use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
10use vortex_scalar::Scalar;
11
12use crate::Array;
13use crate::arrays::{ConstantVTable, NullVTable};
14use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
15use crate::stats::{Precision, Stat};
16use crate::vtable::VTable;
17
18pub fn is_sorted(array: &dyn Array) -> VortexResult<bool> {
19    is_sorted_opts(array, false)
20}
21
22pub fn is_strict_sorted(array: &dyn Array) -> VortexResult<bool> {
23    is_sorted_opts(array, true)
24}
25
26pub fn is_sorted_opts(array: &dyn Array, strict: bool) -> VortexResult<bool> {
27    Ok(IS_SORTED_FN
28        .invoke(&InvocationArgs {
29            inputs: &[array.into()],
30            options: &IsSortedOptions { strict },
31        })?
32        .unwrap_scalar()?
33        .as_bool()
34        .value()
35        .vortex_expect("non-nullable"))
36}
37
38struct IsSorted;
39impl ComputeFnVTable for IsSorted {
40    fn invoke(
41        &self,
42        args: &InvocationArgs,
43        kernels: &[ArcRef<dyn Kernel>],
44    ) -> VortexResult<Output> {
45        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
46
47        // We currently don't support sorting struct arrays.
48        if array.dtype().is_struct() {
49            return Ok(Scalar::from(false).into());
50        }
51
52        let is_sorted = if strict {
53            if let Some(Precision::Exact(value)) =
54                array.statistics().get_as::<bool>(Stat::IsStrictSorted)
55            {
56                return Ok(Scalar::from(value).into());
57            }
58
59            let is_strict_sorted = is_sorted_impl(array, kernels, true)?;
60            let array_stats = array.statistics();
61
62            if is_strict_sorted {
63                array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
64                array_stats.set(Stat::IsStrictSorted, Precision::Exact(true.into()));
65            } else {
66                array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
67            }
68
69            is_strict_sorted
70        } else {
71            if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsSorted)
72            {
73                return Ok(Scalar::from(value).into());
74            }
75
76            let is_sorted = is_sorted_impl(array, kernels, false)?;
77            let array_stats = array.statistics();
78
79            if is_sorted {
80                array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
81            } else {
82                array_stats.set(Stat::IsSorted, Precision::Exact(false.into()));
83                array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
84            }
85
86            is_sorted
87        };
88
89        Ok(Scalar::from(is_sorted).into())
90    }
91
92    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
93        Ok(DType::Bool(Nullability::NonNullable))
94    }
95
96    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
97        Ok(1)
98    }
99
100    fn is_elementwise(&self) -> bool {
101        true
102    }
103}
104
105struct IsSortedArgs<'a> {
106    array: &'a dyn Array,
107    strict: bool,
108}
109
110impl<'a> TryFrom<&InvocationArgs<'a>> for IsSortedArgs<'a> {
111    type Error = VortexError;
112
113    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
114        if value.inputs.len() != 1 {
115            vortex_bail!(
116                "IsSorted function requires exactly one argument, got {}",
117                value.inputs.len()
118            );
119        }
120        let array = value.inputs[0]
121            .array()
122            .ok_or_else(|| vortex_err!("Invalid argument type for is sorted function"))?;
123        let options = *value
124            .options
125            .as_any()
126            .downcast_ref::<IsSortedOptions>()
127            .ok_or_else(|| vortex_err!("Invalid options type for is sorted function"))?;
128
129        Ok(IsSortedArgs {
130            array,
131            strict: options.strict,
132        })
133    }
134}
135
136#[derive(Clone, Copy)]
137struct IsSortedOptions {
138    strict: bool,
139}
140
141impl Options for IsSortedOptions {
142    fn as_any(&self) -> &dyn Any {
143        self
144    }
145}
146
147pub static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
148    let compute = ComputeFn::new("is_sorted".into(), ArcRef::new_ref(&IsSorted));
149    for kernel in inventory::iter::<IsSortedKernelRef> {
150        compute.register_kernel(kernel.0.clone());
151    }
152    compute
153});
154
155pub struct IsSortedKernelRef(ArcRef<dyn Kernel>);
156inventory::collect!(IsSortedKernelRef);
157
158#[derive(Debug)]
159pub struct IsSortedKernelAdapter<V: VTable>(pub V);
160
161impl<V: VTable + IsSortedKernel> IsSortedKernelAdapter<V> {
162    pub const fn lift(&'static self) -> IsSortedKernelRef {
163        IsSortedKernelRef(ArcRef::new_ref(self))
164    }
165}
166
167impl<V: VTable + IsSortedKernel> Kernel for IsSortedKernelAdapter<V> {
168    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
169        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
170        let Some(array) = array.as_opt::<V>() else {
171            return Ok(None);
172        };
173
174        let is_sorted = if strict {
175            V::is_strict_sorted(&self.0, array)?
176        } else {
177            V::is_sorted(&self.0, array)?
178        };
179
180        Ok(Some(Scalar::from(is_sorted).into()))
181    }
182}
183
184pub trait IsSortedKernel: VTable {
185    /// # Preconditions
186    /// - The array's length is > 1.
187    /// - The array is not encoded as `NullArray` or `ConstantArray`.
188    /// - If doing a `strict` check, if the array is nullable, it'll have at most 1 null element
189    ///   as the first item in the array.
190    fn is_sorted(&self, array: &Self::Array) -> VortexResult<bool>;
191
192    fn is_strict_sorted(&self, array: &Self::Array) -> VortexResult<bool>;
193}
194
195#[allow(clippy::wrong_self_convention)]
196/// Helper methods to check sortedness with strictness
197pub trait IsSortedIteratorExt: Iterator
198where
199    <Self as Iterator>::Item: PartialOrd,
200{
201    fn is_strict_sorted(self) -> bool
202    where
203        Self: Sized,
204        Self::Item: PartialOrd,
205    {
206        self.is_sorted_by(|a, b| a < b)
207    }
208}
209
210impl<T> IsSortedIteratorExt for T
211where
212    T: Iterator + ?Sized,
213    T::Item: PartialOrd,
214{
215}
216
217fn is_sorted_impl(
218    array: &dyn Array,
219    kernels: &[ArcRef<dyn Kernel>],
220    strict: bool,
221) -> VortexResult<bool> {
222    // Arrays with 0 or 1 elements are strict sorted.
223    if array.len() <= 1 {
224        return Ok(true);
225    }
226
227    // Constant and null arrays are always sorted, but not strict sorted.
228    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
229        return Ok(!strict);
230    }
231
232    let invalid_count = array.invalid_count()?;
233
234    // Enforce strictness before we even try to check if the array is sorted.
235    if strict {
236        match invalid_count {
237            // We can keep going
238            0 => {}
239            // If we have a potential null value - it has to be the first one.
240            1 => {
241                if !array.is_invalid(0)? {
242                    return Ok(false);
243                }
244            }
245            _ => return Ok(false),
246        }
247    }
248
249    let args = InvocationArgs {
250        inputs: &[array.into()],
251        options: &IsSortedOptions { strict },
252    };
253
254    for kernel in kernels {
255        if let Some(output) = kernel.invoke(&args)? {
256            return Ok(output
257                .unwrap_scalar()?
258                .as_bool()
259                .value()
260                .vortex_expect("non-nullable"));
261        }
262    }
263    if let Some(output) = array.invoke(&IS_SORTED_FN, &args)? {
264        return Ok(output
265            .unwrap_scalar()?
266            .as_bool()
267            .value()
268            .vortex_expect("non-nullable"));
269    }
270
271    if !array.is_canonical() {
272        log::debug!(
273            "No is_sorted implementation found for {}",
274            array.encoding_id()
275        );
276
277        // Recurse to canonical implementation
278        let array = array.to_canonical()?;
279
280        return if strict {
281            is_strict_sorted(array.as_ref())
282        } else {
283            is_sorted(array.as_ref())
284        };
285    }
286
287    vortex_bail!(
288        "No is_sorted function for canonical array: {}",
289        array.encoding_id(),
290    )
291}
292
293#[cfg(test)]
294mod tests {
295    use vortex_buffer::buffer;
296
297    use crate::IntoArray;
298    use crate::arrays::{BoolArray, PrimitiveArray};
299    use crate::compute::{is_sorted, is_strict_sorted};
300    use crate::validity::Validity;
301    #[test]
302    fn test_is_sorted() {
303        assert!(
304            is_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
305                .unwrap()
306        );
307        assert!(
308            is_sorted(
309                PrimitiveArray::new(
310                    buffer!(0, 1, 2, 3),
311                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
312                )
313                .as_ref()
314            )
315            .unwrap()
316        );
317        assert!(
318            !is_sorted(
319                PrimitiveArray::new(
320                    buffer!(0, 1, 2, 3),
321                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array())
322                )
323                .as_ref()
324            )
325            .unwrap()
326        );
327
328        assert!(
329            !is_sorted(PrimitiveArray::new(buffer!(0, 1, 3, 2), Validity::AllValid).as_ref())
330                .unwrap()
331        );
332        assert!(
333            !is_sorted(
334                PrimitiveArray::new(
335                    buffer!(0, 1, 3, 2),
336                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
337                )
338                .as_ref()
339            )
340            .unwrap(),
341        );
342    }
343
344    #[test]
345    fn test_is_strict_sorted() {
346        assert!(
347            is_strict_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
348                .unwrap()
349        );
350        assert!(
351            is_strict_sorted(
352                PrimitiveArray::new(
353                    buffer!(0, 1, 2, 3),
354                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
355                )
356                .as_ref()
357            )
358            .unwrap()
359        );
360        assert!(
361            !is_strict_sorted(
362                PrimitiveArray::new(
363                    buffer!(0, 1, 2, 3),
364                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
365                )
366                .as_ref()
367            )
368            .unwrap(),
369        );
370
371        assert!(
372            !is_strict_sorted(
373                PrimitiveArray::new(
374                    buffer!(0, 1, 3, 2),
375                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
376                )
377                .as_ref()
378            )
379            .unwrap(),
380        );
381    }
382}