vortex-array 0.54.0

Vortex in memory columnar data format
Documentation
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#[cfg(test)]
mod test;

use std::sync::Arc;

use vortex_dtype::datetime::{DATE_ID, TIME_ID, TIMESTAMP_ID, TemporalMetadata, TimeUnit};
use vortex_dtype::{DType, ExtDType};
use vortex_error::{VortexError, vortex_err, vortex_panic};

use crate::arrays::{ExtensionArray, ExtensionVTable};
use crate::{Array, ArrayRef, IntoArray};

/// An array wrapper for primitive values that have an associated temporal meaning.
///
/// This is a wrapper around ExtensionArrays containing numeric types, each of which corresponds to
/// either a timestamp or julian date (both referenced to UNIX epoch), OR a time since midnight.
///
/// ## Arrow compatibility
///
/// TemporalArray can be created from Arrow arrays containing the following datatypes:
/// * `Time32`
/// * `Time64`
/// * `Timestamp`
/// * `Date32`
/// * `Date64`
///
/// Anything that can be constructed and held in a `TemporalArray` can also be zero-copy converted
/// back to the relevant Arrow datatype.
#[derive(Clone, Debug)]
pub struct TemporalArray {
    /// The underlying Vortex extension array holding all the numeric values.
    ext: ExtensionArray,

    /// In-memory representation of the ExtMetadata that is held by the underlying extension array.
    ///
    /// We hold this directly to avoid needing to deserialize the metadata to access things like
    /// timezone and TimeUnit of the underlying array.
    temporal_metadata: TemporalMetadata,
}

macro_rules! assert_width {
    ($width:ty, $array:expr) => {{
        let DType::Primitive(ptype, _) = $array.dtype() else {
            panic!("array must have primitive type");
        };

        assert_eq!(
            <$width as vortex_dtype::NativePType>::PTYPE,
            *ptype,
            "invalid ptype {} for array, expected {}",
            <$width as vortex_dtype::NativePType>::PTYPE,
            *ptype
        );
    }};
}

impl TemporalArray {
    /// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
    /// that are evenly divisible by the number of 86,400,000.
    ///
    /// This is equivalent to the data described by either of the `Date32` or `Date64` data types
    /// from Arrow.
    ///
    /// # Panics
    ///
    /// If the time unit is milliseconds, and the array is not of primitive I64 type, it panics.
    ///
    /// If the time unit is days, and the array is not of primitive I32 type, it panics.
    ///
    /// If any other time unit is provided, it panics.
    pub fn new_date(array: ArrayRef, time_unit: TimeUnit) -> Self {
        match time_unit {
            TimeUnit::Days => {
                assert_width!(i32, array);
            }
            TimeUnit::Milliseconds => {
                assert_width!(i64, array);
            }
            TimeUnit::Nanoseconds | TimeUnit::Microseconds | TimeUnit::Seconds => {
                vortex_panic!("invalid TimeUnit {time_unit} for vortex.date")
            }
        };

        let ext_dtype = ExtDType::new(
            DATE_ID.clone(),
            Arc::new(array.dtype().clone()),
            Some(TemporalMetadata::Date(time_unit).into()),
        );

        Self {
            ext: ExtensionArray::new(Arc::new(ext_dtype), array),
            temporal_metadata: TemporalMetadata::Date(time_unit),
        }
    }

    /// Create a new `TemporalArray` holding one of the following values:
    ///
    /// * `i32` values representing seconds since midnight
    /// * `i32` values representing milliseconds since midnight
    /// * `i64` values representing microseconds since midnight
    /// * `i64` values representing nanoseconds since midnight
    ///
    /// Note, this is equivalent to the set of values represented by the Time32 or Time64 types
    /// from Arrow.
    ///
    /// # Panics
    ///
    /// If the time unit is seconds, and the array is not of primitive I32 type, it panics.
    ///
    /// If the time unit is milliseconds, and the array is not of primitive I32 type, it panics.
    ///
    /// If the time unit is microseconds, and the array is not of primitive I64 type, it panics.
    ///
    /// If the time unit is nanoseconds, and the array is not of primitive I64 type, it panics.
    pub fn new_time(array: ArrayRef, time_unit: TimeUnit) -> Self {
        match time_unit {
            TimeUnit::Seconds | TimeUnit::Milliseconds => assert_width!(i32, array),
            TimeUnit::Microseconds | TimeUnit::Nanoseconds => assert_width!(i64, array),
            TimeUnit::Days => vortex_panic!("invalid unit D for vortex.time data"),
        }

        let temporal_metadata = TemporalMetadata::Time(time_unit);
        Self {
            ext: ExtensionArray::new(
                Arc::new(ExtDType::new(
                    TIME_ID.clone(),
                    Arc::new(array.dtype().clone()),
                    Some(temporal_metadata.clone().into()),
                )),
                array,
            ),
            temporal_metadata,
        }
    }

    /// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
    /// optional timezone.
    ///
    /// # Panics
    ///
    /// If `array` does not hold Primitive i64 data, the function will panic.
    ///
    /// If the time_unit is days, the function will panic.
    pub fn new_timestamp(array: ArrayRef, time_unit: TimeUnit, time_zone: Option<String>) -> Self {
        assert_width!(i64, array);

        let temporal_metadata = TemporalMetadata::Timestamp(time_unit, time_zone);

        Self {
            ext: ExtensionArray::new(
                Arc::new(ExtDType::new(
                    TIMESTAMP_ID.clone(),
                    Arc::new(array.dtype().clone()),
                    Some(temporal_metadata.clone().into()),
                )),
                array,
            ),
            temporal_metadata,
        }
    }
}

impl TemporalArray {
    /// Access the underlying temporal values in the underlying ExtensionArray storage.
    ///
    /// These values are to be interpreted based on the time unit and optional time-zone stored
    /// in the TemporalMetadata.
    pub fn temporal_values(&self) -> &ArrayRef {
        self.ext.storage()
    }

    /// Retrieve the temporal metadata.
    ///
    /// The metadata is used to provide semantic meaning to the temporal values Array, for example
    /// to understand the granularity of the samples and if they have an associated timezone.
    pub fn temporal_metadata(&self) -> &TemporalMetadata {
        &self.temporal_metadata
    }

    /// Retrieve the extension DType associated with the underlying array.
    pub fn ext_dtype(&self) -> Arc<ExtDType> {
        self.ext.ext_dtype().clone()
    }

    /// Retrieve the DType of the array. This will be a `DType::Extension` variant.
    pub fn dtype(&self) -> &DType {
        self.ext.dtype()
    }
}

impl From<TemporalArray> for ArrayRef {
    fn from(value: TemporalArray) -> Self {
        value.ext.into_array()
    }
}

impl IntoArray for TemporalArray {
    fn into_array(self) -> ArrayRef {
        self.into()
    }
}

impl TryFrom<ArrayRef> for TemporalArray {
    type Error = VortexError;

    /// Try to specialize a generic Vortex array as a TemporalArray.
    ///
    /// # Errors
    ///
    /// If the provided Array does not have `vortex.ext` encoding, an error will be returned.
    ///
    /// If the provided Array does not have recognized ExtMetadata corresponding to one of the known
    /// `TemporalMetadata` variants, an error is returned.
    fn try_from(value: ArrayRef) -> Result<Self, Self::Error> {
        let ext = value
            .as_opt::<ExtensionVTable>()
            .ok_or_else(|| vortex_err!("array must be an ExtensionArray"))?;
        let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype())?;
        Ok(Self {
            ext: ext.clone(),
            temporal_metadata,
        })
    }
}

// Conversions to/from ExtensionArray
impl From<&TemporalArray> for ExtensionArray {
    fn from(value: &TemporalArray) -> Self {
        value.ext.clone()
    }
}

impl From<TemporalArray> for ExtensionArray {
    fn from(value: TemporalArray) -> Self {
        value.ext
    }
}

impl TryFrom<ExtensionArray> for TemporalArray {
    type Error = VortexError;

    fn try_from(ext: ExtensionArray) -> Result<Self, Self::Error> {
        let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype().as_ref())?;
        Ok(Self {
            ext,
            temporal_metadata,
        })
    }
}