vortex_array/compute/
is_sorted.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::DType;
9use vortex_dtype::Nullability;
10use vortex_error::VortexError;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_scalar::Scalar;
15
16use crate::Array;
17use crate::arrays::ConstantVTable;
18use crate::arrays::NullVTable;
19use crate::compute::ComputeFn;
20use crate::compute::ComputeFnVTable;
21use crate::compute::InvocationArgs;
22use crate::compute::Kernel;
23use crate::compute::Options;
24use crate::compute::Output;
25use crate::expr::stats::Precision;
26use crate::expr::stats::Stat;
27use crate::expr::stats::StatsProviderExt;
28use crate::vtable::VTable;
29
30static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
31    let compute = ComputeFn::new("is_sorted".into(), ArcRef::new_ref(&IsSorted));
32    for kernel in inventory::iter::<IsSortedKernelRef> {
33        compute.register_kernel(kernel.0.clone());
34    }
35    compute
36});
37
38pub(crate) fn warm_up_vtable() -> usize {
39    IS_SORTED_FN.kernels().len()
40}
41
42pub fn is_sorted(array: &dyn Array) -> VortexResult<Option<bool>> {
43    is_sorted_opts(array, false)
44}
45
46pub fn is_strict_sorted(array: &dyn Array) -> VortexResult<Option<bool>> {
47    is_sorted_opts(array, true)
48}
49
50pub fn is_sorted_opts(array: &dyn Array, strict: bool) -> VortexResult<Option<bool>> {
51    Ok(IS_SORTED_FN
52        .invoke(&InvocationArgs {
53            inputs: &[array.into()],
54            options: &IsSortedOptions { strict },
55        })?
56        .unwrap_scalar()?
57        .as_bool()
58        .value())
59}
60
61struct IsSorted;
62impl ComputeFnVTable for IsSorted {
63    fn invoke(
64        &self,
65        args: &InvocationArgs,
66        kernels: &[ArcRef<dyn Kernel>],
67    ) -> VortexResult<Output> {
68        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
69
70        // We currently don't support sorting struct arrays.
71        if array.dtype().is_struct() {
72            return Ok(Scalar::from(Some(false)).into());
73        }
74
75        let is_sorted = if strict {
76            if let Some(Precision::Exact(value)) =
77                array.statistics().get_as::<bool>(Stat::IsStrictSorted)
78            {
79                return Ok(Scalar::from(Some(value)).into());
80            }
81
82            let is_strict_sorted = is_sorted_impl(array, kernels, true)?;
83            let array_stats = array.statistics();
84
85            if is_strict_sorted.is_some() {
86                if is_strict_sorted.unwrap_or(false) {
87                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
88                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(true.into()));
89                } else {
90                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
91                }
92            }
93
94            is_strict_sorted
95        } else {
96            if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsSorted)
97            {
98                return Ok(Scalar::from(Some(value)).into());
99            }
100
101            let is_sorted = is_sorted_impl(array, kernels, false)?;
102            let array_stats = array.statistics();
103
104            if is_sorted.is_some() {
105                if is_sorted.unwrap_or(false) {
106                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
107                } else {
108                    array_stats.set(Stat::IsSorted, Precision::Exact(false.into()));
109                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
110                }
111            }
112
113            is_sorted
114        };
115
116        Ok(Scalar::from(is_sorted).into())
117    }
118
119    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
120        // We always return a nullable boolean where `null` indicates we couldn't determine
121        // whether the array is constant.
122        Ok(DType::Bool(Nullability::Nullable))
123    }
124
125    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
126        Ok(1)
127    }
128
129    fn is_elementwise(&self) -> bool {
130        true
131    }
132}
133
134struct IsSortedArgs<'a> {
135    array: &'a dyn Array,
136    strict: bool,
137}
138
139impl<'a> TryFrom<&InvocationArgs<'a>> for IsSortedArgs<'a> {
140    type Error = VortexError;
141
142    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
143        if value.inputs.len() != 1 {
144            vortex_bail!(
145                "IsSorted function requires exactly one argument, got {}",
146                value.inputs.len()
147            );
148        }
149        let array = value.inputs[0]
150            .array()
151            .ok_or_else(|| vortex_err!("Invalid argument type for is sorted function"))?;
152        let options = *value
153            .options
154            .as_any()
155            .downcast_ref::<IsSortedOptions>()
156            .ok_or_else(|| vortex_err!("Invalid options type for is sorted function"))?;
157
158        Ok(IsSortedArgs {
159            array,
160            strict: options.strict,
161        })
162    }
163}
164
165#[derive(Clone, Copy)]
166struct IsSortedOptions {
167    strict: bool,
168}
169
170impl Options for IsSortedOptions {
171    fn as_any(&self) -> &dyn Any {
172        self
173    }
174}
175
176pub struct IsSortedKernelRef(ArcRef<dyn Kernel>);
177inventory::collect!(IsSortedKernelRef);
178
179#[derive(Debug)]
180pub struct IsSortedKernelAdapter<V: VTable>(pub V);
181
182impl<V: VTable + IsSortedKernel> IsSortedKernelAdapter<V> {
183    pub const fn lift(&'static self) -> IsSortedKernelRef {
184        IsSortedKernelRef(ArcRef::new_ref(self))
185    }
186}
187
188impl<V: VTable + IsSortedKernel> Kernel for IsSortedKernelAdapter<V> {
189    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
190        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
191        let Some(array) = array.as_opt::<V>() else {
192            return Ok(None);
193        };
194
195        let is_sorted = if strict {
196            V::is_strict_sorted(&self.0, array)?
197        } else {
198            V::is_sorted(&self.0, array)?
199        };
200
201        Ok(Some(Scalar::from(is_sorted).into()))
202    }
203}
204
205pub trait IsSortedKernel: VTable {
206    /// # Preconditions
207    /// - The array's length is > 1.
208    /// - The array is not encoded as `NullArray` or `ConstantArray`.
209    /// - If doing a `strict` check, if the array is nullable, it'll have at most 1 null element
210    ///   as the first item in the array.
211    fn is_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
212
213    fn is_strict_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
214}
215
216#[expect(
217    clippy::wrong_self_convention,
218    reason = "is_* naming follows Iterator::is_sorted convention"
219)]
220/// Helper methods to check sortedness with strictness
221pub trait IsSortedIteratorExt: Iterator
222where
223    <Self as Iterator>::Item: PartialOrd,
224{
225    fn is_strict_sorted(self) -> bool
226    where
227        Self: Sized,
228        Self::Item: PartialOrd,
229    {
230        self.is_sorted_by(|a, b| a < b)
231    }
232}
233
234impl<T> IsSortedIteratorExt for T
235where
236    T: Iterator + ?Sized,
237    T::Item: PartialOrd,
238{
239}
240
241fn is_sorted_impl(
242    array: &dyn Array,
243    kernels: &[ArcRef<dyn Kernel>],
244    strict: bool,
245) -> VortexResult<Option<bool>> {
246    // Arrays with 0 or 1 elements are strict sorted.
247    if array.len() <= 1 {
248        return Ok(Some(true));
249    }
250
251    // Constant and null arrays are always sorted, but not strict sorted.
252    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
253        return Ok(Some(!strict));
254    }
255
256    // Enforce strictness before we even try to check if the array is sorted.
257    if strict {
258        let invalid_count = array.invalid_count();
259        match invalid_count {
260            // We can keep going
261            0 => {}
262            // If we have a potential null value - it has to be the first one.
263            1 => {
264                if !array.is_invalid(0) {
265                    return Ok(Some(false));
266                }
267            }
268            _ => return Ok(Some(false)),
269        }
270    }
271
272    let args = InvocationArgs {
273        inputs: &[array.into()],
274        options: &IsSortedOptions { strict },
275    };
276
277    for kernel in kernels {
278        if let Some(output) = kernel.invoke(&args)? {
279            return Ok(output.unwrap_scalar()?.as_bool().value());
280        }
281    }
282    if let Some(output) = array.invoke(&IS_SORTED_FN, &args)? {
283        return Ok(output.unwrap_scalar()?.as_bool().value());
284    }
285
286    if !array.is_canonical() {
287        log::debug!(
288            "No is_sorted implementation found for {}",
289            array.encoding_id()
290        );
291
292        // Recurse to canonical implementation
293        let array = array.to_canonical();
294
295        return if strict {
296            is_strict_sorted(array.as_ref())
297        } else {
298            is_sorted(array.as_ref())
299        };
300    }
301
302    vortex_bail!(
303        "No is_sorted function for canonical array: {}",
304        array.encoding_id(),
305    )
306}
307
308#[cfg(test)]
309mod tests {
310    use vortex_buffer::buffer;
311
312    use crate::IntoArray;
313    use crate::arrays::BoolArray;
314    use crate::arrays::PrimitiveArray;
315    use crate::compute::is_sorted;
316    use crate::compute::is_strict_sorted;
317    use crate::validity::Validity;
318    #[test]
319    fn test_is_sorted() {
320        assert!(
321            is_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
322                .unwrap()
323                .unwrap()
324        );
325        assert!(
326            is_sorted(
327                PrimitiveArray::new(
328                    buffer!(0, 1, 2, 3),
329                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
330                )
331                .as_ref()
332            )
333            .unwrap()
334            .unwrap()
335        );
336        assert!(
337            !is_sorted(
338                PrimitiveArray::new(
339                    buffer!(0, 1, 2, 3),
340                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array())
341                )
342                .as_ref()
343            )
344            .unwrap()
345            .unwrap()
346        );
347
348        assert!(
349            !is_sorted(PrimitiveArray::new(buffer!(0, 1, 3, 2), Validity::AllValid).as_ref())
350                .unwrap()
351                .unwrap()
352        );
353        assert!(
354            !is_sorted(
355                PrimitiveArray::new(
356                    buffer!(0, 1, 3, 2),
357                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
358                )
359                .as_ref()
360            )
361            .unwrap()
362            .unwrap()
363        );
364    }
365
366    #[test]
367    fn test_is_strict_sorted() {
368        assert!(
369            is_strict_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
370                .unwrap()
371                .unwrap()
372        );
373        assert!(
374            is_strict_sorted(
375                PrimitiveArray::new(
376                    buffer!(0, 1, 2, 3),
377                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
378                )
379                .as_ref()
380            )
381            .unwrap()
382            .unwrap()
383        );
384        assert!(
385            !is_strict_sorted(
386                PrimitiveArray::new(
387                    buffer!(0, 1, 2, 3),
388                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
389                )
390                .as_ref()
391            )
392            .unwrap()
393            .unwrap()
394        );
395
396        assert!(
397            !is_strict_sorted(
398                PrimitiveArray::new(
399                    buffer!(0, 1, 3, 2),
400                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
401                )
402                .as_ref()
403            )
404            .unwrap()
405            .unwrap()
406        );
407    }
408}