Skip to main content

vortex_array/compute/
is_sorted.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::LazyLock;
6
7use arcref::ArcRef;
8use vortex_dtype::DType;
9use vortex_dtype::Nullability;
10use vortex_error::VortexError;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14
15use crate::Array;
16use crate::arrays::ConstantVTable;
17use crate::arrays::NullVTable;
18use crate::compute::ComputeFn;
19use crate::compute::ComputeFnVTable;
20use crate::compute::InvocationArgs;
21use crate::compute::Kernel;
22use crate::compute::Options;
23use crate::compute::Output;
24use crate::expr::stats::Precision;
25use crate::expr::stats::Stat;
26use crate::expr::stats::StatsProviderExt;
27use crate::scalar::Scalar;
28use crate::vtable::VTable;
29
30static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
31    let compute = ComputeFn::new("is_sorted".into(), ArcRef::new_ref(&IsSorted));
32    for kernel in inventory::iter::<IsSortedKernelRef> {
33        compute.register_kernel(kernel.0.clone());
34    }
35    compute
36});
37
38pub(crate) fn warm_up_vtable() -> usize {
39    IS_SORTED_FN.kernels().len()
40}
41
42pub fn is_sorted(array: &dyn Array) -> VortexResult<Option<bool>> {
43    is_sorted_opts(array, false)
44}
45
46pub fn is_strict_sorted(array: &dyn Array) -> VortexResult<Option<bool>> {
47    is_sorted_opts(array, true)
48}
49
50pub fn is_sorted_opts(array: &dyn Array, strict: bool) -> VortexResult<Option<bool>> {
51    Ok(IS_SORTED_FN
52        .invoke(&InvocationArgs {
53            inputs: &[array.into()],
54            options: &IsSortedOptions { strict },
55        })?
56        .unwrap_scalar()?
57        .as_bool()
58        .value())
59}
60
61struct IsSorted;
62impl ComputeFnVTable for IsSorted {
63    fn invoke(
64        &self,
65        args: &InvocationArgs,
66        kernels: &[ArcRef<dyn Kernel>],
67    ) -> VortexResult<Output> {
68        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
69
70        // We currently don't support sorting struct arrays.
71        if array.dtype().is_struct() {
72            let scalar: Scalar = Some(false).into();
73            return Ok(scalar.into());
74        }
75
76        let is_sorted = if strict {
77            if let Some(Precision::Exact(value)) =
78                array.statistics().get_as::<bool>(Stat::IsStrictSorted)
79            {
80                let scalar: Scalar = Some(value).into();
81                return Ok(scalar.into());
82            }
83
84            let is_strict_sorted = is_sorted_impl(array, kernels, true)?;
85            let array_stats = array.statistics();
86
87            if is_strict_sorted.is_some() {
88                if is_strict_sorted.unwrap_or(false) {
89                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
90                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(true.into()));
91                } else {
92                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
93                }
94            }
95
96            is_strict_sorted
97        } else {
98            if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsSorted)
99            {
100                let scalar: Scalar = Some(value).into();
101                return Ok(scalar.into());
102            }
103
104            let is_sorted = is_sorted_impl(array, kernels, false)?;
105            let array_stats = array.statistics();
106
107            if is_sorted.is_some() {
108                if is_sorted.unwrap_or(false) {
109                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
110                } else {
111                    array_stats.set(Stat::IsSorted, Precision::Exact(false.into()));
112                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
113                }
114            }
115
116            is_sorted
117        };
118
119        let scalar: Scalar = is_sorted.into();
120        Ok(scalar.into())
121    }
122
123    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
124        // We always return a nullable boolean where `null` indicates we couldn't determine
125        // whether the array is constant.
126        Ok(DType::Bool(Nullability::Nullable))
127    }
128
129    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
130        Ok(1)
131    }
132
133    fn is_elementwise(&self) -> bool {
134        true
135    }
136}
137
138struct IsSortedArgs<'a> {
139    array: &'a dyn Array,
140    strict: bool,
141}
142
143impl<'a> TryFrom<&InvocationArgs<'a>> for IsSortedArgs<'a> {
144    type Error = VortexError;
145
146    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
147        if value.inputs.len() != 1 {
148            vortex_bail!(
149                "IsSorted function requires exactly one argument, got {}",
150                value.inputs.len()
151            );
152        }
153        let array = value.inputs[0]
154            .array()
155            .ok_or_else(|| vortex_err!("Invalid argument type for is sorted function"))?;
156        let options = *value
157            .options
158            .as_any()
159            .downcast_ref::<IsSortedOptions>()
160            .ok_or_else(|| vortex_err!("Invalid options type for is sorted function"))?;
161
162        Ok(IsSortedArgs {
163            array,
164            strict: options.strict,
165        })
166    }
167}
168
169#[derive(Clone, Copy)]
170struct IsSortedOptions {
171    strict: bool,
172}
173
174impl Options for IsSortedOptions {
175    fn as_any(&self) -> &dyn Any {
176        self
177    }
178}
179
180pub struct IsSortedKernelRef(ArcRef<dyn Kernel>);
181inventory::collect!(IsSortedKernelRef);
182
183#[derive(Debug)]
184pub struct IsSortedKernelAdapter<V: VTable>(pub V);
185
186impl<V: VTable + IsSortedKernel> IsSortedKernelAdapter<V> {
187    pub const fn lift(&'static self) -> IsSortedKernelRef {
188        IsSortedKernelRef(ArcRef::new_ref(self))
189    }
190}
191
192impl<V: VTable + IsSortedKernel> Kernel for IsSortedKernelAdapter<V> {
193    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
194        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
195        let Some(array) = array.as_opt::<V>() else {
196            return Ok(None);
197        };
198
199        let is_sorted = if strict {
200            V::is_strict_sorted(&self.0, array)?
201        } else {
202            V::is_sorted(&self.0, array)?
203        };
204
205        let scalar: Scalar = is_sorted.into();
206        Ok(Some(scalar.into()))
207    }
208}
209
210pub trait IsSortedKernel: VTable {
211    /// # Preconditions
212    /// - The array's length is > 1.
213    /// - The array is not encoded as `NullArray` or `ConstantArray`.
214    /// - If doing a `strict` check, if the array is nullable, it'll have at most 1 null element
215    ///   as the first item in the array.
216    fn is_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
217
218    fn is_strict_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
219}
220
221#[expect(
222    clippy::wrong_self_convention,
223    reason = "is_* naming follows Iterator::is_sorted convention"
224)]
225/// Helper methods to check sortedness with strictness
226pub trait IsSortedIteratorExt: Iterator
227where
228    <Self as Iterator>::Item: PartialOrd,
229{
230    fn is_strict_sorted(self) -> bool
231    where
232        Self: Sized,
233        Self::Item: PartialOrd,
234    {
235        self.is_sorted_by(|a, b| a < b)
236    }
237}
238
239impl<T> IsSortedIteratorExt for T
240where
241    T: Iterator + ?Sized,
242    T::Item: PartialOrd,
243{
244}
245
246fn is_sorted_impl(
247    array: &dyn Array,
248    kernels: &[ArcRef<dyn Kernel>],
249    strict: bool,
250) -> VortexResult<Option<bool>> {
251    // Arrays with 0 or 1 elements are strict sorted.
252    if array.len() <= 1 {
253        return Ok(Some(true));
254    }
255
256    // Constant and null arrays are always sorted, but not strict sorted.
257    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
258        return Ok(Some(!strict));
259    }
260
261    // Enforce strictness before we even try to check if the array is sorted.
262    if strict {
263        let invalid_count = array.invalid_count()?;
264        match invalid_count {
265            // We can keep going
266            0 => {}
267            // If we have a potential null value - it has to be the first one.
268            1 => {
269                if !array.is_invalid(0)? {
270                    return Ok(Some(false));
271                }
272            }
273            _ => return Ok(Some(false)),
274        }
275    }
276
277    let args = InvocationArgs {
278        inputs: &[array.into()],
279        options: &IsSortedOptions { strict },
280    };
281
282    for kernel in kernels {
283        if let Some(output) = kernel.invoke(&args)? {
284            return Ok(output.unwrap_scalar()?.as_bool().value());
285        }
286    }
287
288    if !array.is_canonical() {
289        tracing::debug!(
290            "No is_sorted implementation found for {}",
291            array.encoding_id()
292        );
293
294        // Recurse to canonical implementation
295        let array = array.to_canonical()?;
296
297        return if strict {
298            is_strict_sorted(array.as_ref())
299        } else {
300            is_sorted(array.as_ref())
301        };
302    }
303
304    vortex_bail!(
305        "No is_sorted function for canonical array: {}",
306        array.encoding_id(),
307    )
308}
309
310#[cfg(test)]
311mod tests {
312    use vortex_buffer::buffer;
313
314    use crate::IntoArray;
315    use crate::arrays::BoolArray;
316    use crate::arrays::PrimitiveArray;
317    use crate::compute::is_sorted;
318    use crate::compute::is_strict_sorted;
319    use crate::validity::Validity;
320    #[test]
321    fn test_is_sorted() {
322        assert!(
323            is_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
324                .unwrap()
325                .unwrap()
326        );
327        assert!(
328            is_sorted(
329                PrimitiveArray::new(
330                    buffer!(0, 1, 2, 3),
331                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
332                )
333                .as_ref()
334            )
335            .unwrap()
336            .unwrap()
337        );
338        assert!(
339            !is_sorted(
340                PrimitiveArray::new(
341                    buffer!(0, 1, 2, 3),
342                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array())
343                )
344                .as_ref()
345            )
346            .unwrap()
347            .unwrap()
348        );
349
350        assert!(
351            !is_sorted(PrimitiveArray::new(buffer!(0, 1, 3, 2), Validity::AllValid).as_ref())
352                .unwrap()
353                .unwrap()
354        );
355        assert!(
356            !is_sorted(
357                PrimitiveArray::new(
358                    buffer!(0, 1, 3, 2),
359                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
360                )
361                .as_ref()
362            )
363            .unwrap()
364            .unwrap()
365        );
366    }
367
368    #[test]
369    fn test_is_strict_sorted() {
370        assert!(
371            is_strict_sorted(PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).as_ref())
372                .unwrap()
373                .unwrap()
374        );
375        assert!(
376            is_strict_sorted(
377                PrimitiveArray::new(
378                    buffer!(0, 1, 2, 3),
379                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array())
380                )
381                .as_ref()
382            )
383            .unwrap()
384            .unwrap()
385        );
386        assert!(
387            !is_strict_sorted(
388                PrimitiveArray::new(
389                    buffer!(0, 1, 2, 3),
390                    Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
391                )
392                .as_ref()
393            )
394            .unwrap()
395            .unwrap()
396        );
397
398        assert!(
399            !is_strict_sorted(
400                PrimitiveArray::new(
401                    buffer!(0, 1, 3, 2),
402                    Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
403                )
404                .as_ref()
405            )
406            .unwrap()
407            .unwrap()
408        );
409    }
410}