vortex-array 0.62.0

Vortex in memory columnar data format
Documentation
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::any::Any;
use std::sync::LazyLock;

use arcref::ArcRef;
use vortex_error::VortexError;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_err;

use crate::ArrayRef;
use crate::DynArray;
use crate::IntoArray as _;
use crate::arrays::ConstantVTable;
use crate::arrays::NullVTable;
use crate::compute::ComputeFn;
use crate::compute::ComputeFnVTable;
use crate::compute::InvocationArgs;
use crate::compute::Kernel;
use crate::compute::Options;
use crate::compute::Output;
use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::expr::stats::Precision;
use crate::expr::stats::Stat;
use crate::expr::stats::StatsProviderExt;
use crate::scalar::Scalar;
use crate::vtable::VTable;

static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
    let compute = ComputeFn::new("is_sorted".into(), ArcRef::new_ref(&IsSorted));
    for kernel in inventory::iter::<IsSortedKernelRef> {
        compute.register_kernel(kernel.0.clone());
    }
    compute
});

pub(crate) fn warm_up_vtable() -> usize {
    IS_SORTED_FN.kernels().len()
}

pub fn is_sorted(array: &ArrayRef) -> VortexResult<Option<bool>> {
    is_sorted_opts(array, false)
}

pub fn is_strict_sorted(array: &ArrayRef) -> VortexResult<Option<bool>> {
    is_sorted_opts(array, true)
}

pub fn is_sorted_opts(array: &ArrayRef, strict: bool) -> VortexResult<Option<bool>> {
    Ok(IS_SORTED_FN
        .invoke(&InvocationArgs {
            inputs: &[array.into()],
            options: &IsSortedOptions { strict },
        })?
        .unwrap_scalar()?
        .as_bool()
        .value())
}

struct IsSorted;
impl ComputeFnVTable for IsSorted {
    fn invoke(
        &self,
        args: &InvocationArgs,
        kernels: &[ArcRef<dyn Kernel>],
    ) -> VortexResult<Output> {
        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
        let array = array.to_array();

        // We currently don't support sorting struct arrays.
        if array.dtype().is_struct() {
            let scalar: Scalar = Some(false).into();
            return Ok(scalar.into());
        }

        let is_sorted = if strict {
            if let Some(Precision::Exact(value)) =
                array.statistics().get_as::<bool>(Stat::IsStrictSorted)
            {
                let scalar: Scalar = Some(value).into();
                return Ok(scalar.into());
            }

            let is_strict_sorted = is_sorted_impl(&array, kernels, true)?;
            let array_stats = array.statistics();

            if is_strict_sorted.is_some() {
                if is_strict_sorted.unwrap_or(false) {
                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(true.into()));
                } else {
                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
                }
            }

            is_strict_sorted
        } else {
            if let Some(Precision::Exact(value)) = array.statistics().get_as::<bool>(Stat::IsSorted)
            {
                let scalar: Scalar = Some(value).into();
                return Ok(scalar.into());
            }

            let is_sorted = is_sorted_impl(&array, kernels, false)?;
            let array_stats = array.statistics();

            if is_sorted.is_some() {
                if is_sorted.unwrap_or(false) {
                    array_stats.set(Stat::IsSorted, Precision::Exact(true.into()));
                } else {
                    array_stats.set(Stat::IsSorted, Precision::Exact(false.into()));
                    array_stats.set(Stat::IsStrictSorted, Precision::Exact(false.into()));
                }
            }

            is_sorted
        };

        let scalar: Scalar = is_sorted.into();
        Ok(scalar.into())
    }

    fn return_dtype(&self, _args: &InvocationArgs) -> VortexResult<DType> {
        // We always return a nullable boolean where `null` indicates we couldn't determine
        // whether the array is constant.
        Ok(DType::Bool(Nullability::Nullable))
    }

    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
        Ok(1)
    }

    fn is_elementwise(&self) -> bool {
        true
    }
}

struct IsSortedArgs<'a> {
    array: &'a dyn DynArray,
    strict: bool,
}

impl<'a> TryFrom<&InvocationArgs<'a>> for IsSortedArgs<'a> {
    type Error = VortexError;

    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
        if value.inputs.len() != 1 {
            vortex_bail!(
                "IsSorted function requires exactly one argument, got {}",
                value.inputs.len()
            );
        }
        let array = value.inputs[0]
            .array()
            .ok_or_else(|| vortex_err!("Invalid argument type for is sorted function"))?;
        let options = *value
            .options
            .as_any()
            .downcast_ref::<IsSortedOptions>()
            .ok_or_else(|| vortex_err!("Invalid options type for is sorted function"))?;

        Ok(IsSortedArgs {
            array,
            strict: options.strict,
        })
    }
}

#[derive(Clone, Copy)]
struct IsSortedOptions {
    strict: bool,
}

impl Options for IsSortedOptions {
    fn as_any(&self) -> &dyn Any {
        self
    }
}

pub struct IsSortedKernelRef(ArcRef<dyn Kernel>);
inventory::collect!(IsSortedKernelRef);

#[derive(Debug)]
pub struct IsSortedKernelAdapter<V: VTable>(pub V);

impl<V: VTable + IsSortedKernel> IsSortedKernelAdapter<V> {
    pub const fn lift(&'static self) -> IsSortedKernelRef {
        IsSortedKernelRef(ArcRef::new_ref(self))
    }
}

impl<V: VTable + IsSortedKernel> Kernel for IsSortedKernelAdapter<V> {
    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
        let IsSortedArgs { array, strict } = IsSortedArgs::try_from(args)?;
        let Some(array) = array.as_opt::<V>() else {
            return Ok(None);
        };

        let is_sorted = if strict {
            V::is_strict_sorted(&self.0, array)?
        } else {
            V::is_sorted(&self.0, array)?
        };

        let scalar: Scalar = is_sorted.into();
        Ok(Some(scalar.into()))
    }
}

pub trait IsSortedKernel: VTable {
    /// # Preconditions
    /// - The array's length is > 1.
    /// - The array is not encoded as `NullArray` or `ConstantArray`.
    /// - If doing a `strict` check, if the array is nullable, it'll have at most 1 null element
    ///   as the first item in the array.
    fn is_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;

    fn is_strict_sorted(&self, array: &Self::Array) -> VortexResult<Option<bool>>;
}

#[expect(
    clippy::wrong_self_convention,
    reason = "is_* naming follows Iterator::is_sorted convention"
)]
/// Helper methods to check sortedness with strictness
pub trait IsSortedIteratorExt: Iterator
where
    <Self as Iterator>::Item: PartialOrd,
{
    fn is_strict_sorted(self) -> bool
    where
        Self: Sized,
        Self::Item: PartialOrd,
    {
        self.is_sorted_by(|a, b| a < b)
    }
}

impl<T> IsSortedIteratorExt for T
where
    T: Iterator + ?Sized,
    T::Item: PartialOrd,
{
}

fn is_sorted_impl(
    array: &ArrayRef,
    kernels: &[ArcRef<dyn Kernel>],
    strict: bool,
) -> VortexResult<Option<bool>> {
    // Arrays with 0 or 1 elements are strict sorted.
    if array.len() <= 1 {
        return Ok(Some(true));
    }

    // Constant and null arrays are always sorted, but not strict sorted.
    if array.is::<ConstantVTable>() || array.is::<NullVTable>() {
        return Ok(Some(!strict));
    }

    // Enforce strictness before we even try to check if the array is sorted.
    if strict {
        let invalid_count = array.invalid_count()?;
        match invalid_count {
            // We can keep going
            0 => {}
            // If we have a potential null value - it has to be the first one.
            1 => {
                if !array.is_invalid(0)? {
                    return Ok(Some(false));
                }
            }
            _ => return Ok(Some(false)),
        }
    }

    let args = InvocationArgs {
        inputs: &[array.into()],
        options: &IsSortedOptions { strict },
    };

    for kernel in kernels {
        if let Some(output) = kernel.invoke(&args)? {
            return Ok(output.unwrap_scalar()?.as_bool().value());
        }
    }

    if !array.is_canonical() {
        tracing::debug!(
            "No is_sorted implementation found for {}",
            array.encoding_id()
        );

        // Recurse to canonical implementation
        let array = array.to_canonical()?.into_array();

        return if strict {
            is_strict_sorted(&array)
        } else {
            is_sorted(&array)
        };
    }

    vortex_bail!(
        "No is_sorted function for canonical array: {}",
        array.encoding_id(),
    )
}

#[cfg(test)]
mod tests {
    use vortex_buffer::buffer;

    use crate::IntoArray;
    use crate::arrays::BoolArray;
    use crate::arrays::PrimitiveArray;
    use crate::compute::is_sorted;
    use crate::compute::is_strict_sorted;
    use crate::validity::Validity;
    #[test]
    fn test_is_sorted() {
        let arr = PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).into_array();
        assert!(is_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(
            buffer!(0, 1, 2, 3),
            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
        )
        .into_array();
        assert!(is_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(
            buffer!(0, 1, 2, 3),
            Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
        )
        .into_array();
        assert!(!is_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(buffer!(0, 1, 3, 2), Validity::AllValid).into_array();
        assert!(!is_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(
            buffer!(0, 1, 3, 2),
            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
        )
        .into_array();
        assert!(!is_sorted(&arr).unwrap().unwrap());
    }

    #[test]
    fn test_is_strict_sorted() {
        let arr = PrimitiveArray::new(buffer!(0, 1, 2, 3), Validity::AllValid).into_array();
        assert!(is_strict_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(
            buffer!(0, 1, 2, 3),
            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
        )
        .into_array();
        assert!(is_strict_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(
            buffer!(0, 1, 2, 3),
            Validity::Array(BoolArray::from_iter([true, false, true, true]).into_array()),
        )
        .into_array();
        assert!(!is_strict_sorted(&arr).unwrap().unwrap());

        let arr = PrimitiveArray::new(
            buffer!(0, 1, 3, 2),
            Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()),
        )
        .into_array();
        assert!(!is_strict_sorted(&arr).unwrap().unwrap());
    }
}