vortex_array/arrays/datetime/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4#[cfg(test)]
5mod test;
6
7use std::sync::Arc;
8
9use vortex_dtype::datetime::{DATE_ID, TIME_ID, TIMESTAMP_ID, TemporalMetadata, TimeUnit};
10use vortex_dtype::{DType, ExtDType};
11use vortex_error::{VortexError, vortex_err, vortex_panic};
12
13use crate::arrays::{ExtensionArray, ExtensionVTable};
14use crate::{Array, ArrayRef, IntoArray};
15
16/// An array wrapper for primitive values that have an associated temporal meaning.
17///
18/// This is a wrapper around ExtensionArrays containing numeric types, each of which corresponds to
19/// either a timestamp or julian date (both referenced to UNIX epoch), OR a time since midnight.
20///
21/// ## Arrow compatibility
22///
23/// TemporalArray can be created from Arrow arrays containing the following datatypes:
24/// * `Time32`
25/// * `Time64`
26/// * `Timestamp`
27/// * `Date32`
28/// * `Date64`
29///
30/// Anything that can be constructed and held in a `TemporalArray` can also be zero-copy converted
31/// back to the relevant Arrow datatype.
32#[derive(Clone, Debug)]
33pub struct TemporalArray {
34    /// The underlying Vortex extension array holding all the numeric values.
35    ext: ExtensionArray,
36
37    /// In-memory representation of the ExtMetadata that is held by the underlying extension array.
38    ///
39    /// We hold this directly to avoid needing to deserialize the metadata to access things like
40    /// timezone and TimeUnit of the underlying array.
41    temporal_metadata: TemporalMetadata,
42}
43
44macro_rules! assert_width {
45    ($width:ty, $array:expr) => {{
46        let DType::Primitive(ptype, _) = $array.dtype() else {
47            panic!("array must have primitive type");
48        };
49
50        assert_eq!(
51            <$width as vortex_dtype::NativePType>::PTYPE,
52            *ptype,
53            "invalid ptype {} for array, expected {}",
54            <$width as vortex_dtype::NativePType>::PTYPE,
55            *ptype
56        );
57    }};
58}
59
60impl TemporalArray {
61    /// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
62    /// that are evenly divisible by the number of 86,400,000.
63    ///
64    /// This is equivalent to the data described by either of the `Date32` or `Date64` data types
65    /// from Arrow.
66    ///
67    /// # Panics
68    ///
69    /// If the time unit is milliseconds, and the array is not of primitive I64 type, it panics.
70    ///
71    /// If the time unit is days, and the array is not of primitive I32 type, it panics.
72    ///
73    /// If any other time unit is provided, it panics.
74    pub fn new_date(array: ArrayRef, time_unit: TimeUnit) -> Self {
75        match time_unit {
76            TimeUnit::D => {
77                assert_width!(i32, array);
78            }
79            TimeUnit::Ms => {
80                assert_width!(i64, array);
81            }
82            TimeUnit::Ns | TimeUnit::Us | TimeUnit::S => {
83                vortex_panic!("invalid TimeUnit {time_unit} for vortex.date")
84            }
85        };
86
87        let ext_dtype = ExtDType::new(
88            DATE_ID.clone(),
89            Arc::new(array.dtype().clone()),
90            Some(TemporalMetadata::Date(time_unit).into()),
91        );
92
93        Self {
94            ext: ExtensionArray::new(Arc::new(ext_dtype), array),
95            temporal_metadata: TemporalMetadata::Date(time_unit),
96        }
97    }
98
99    /// Create a new `TemporalArray` holding one of the following values:
100    ///
101    /// * `i32` values representing seconds since midnight
102    /// * `i32` values representing milliseconds since midnight
103    /// * `i64` values representing microseconds since midnight
104    /// * `i64` values representing nanoseconds since midnight
105    ///
106    /// Note, this is equivalent to the set of values represented by the Time32 or Time64 types
107    /// from Arrow.
108    ///
109    /// # Panics
110    ///
111    /// If the time unit is seconds, and the array is not of primitive I32 type, it panics.
112    ///
113    /// If the time unit is milliseconds, and the array is not of primitive I32 type, it panics.
114    ///
115    /// If the time unit is microseconds, and the array is not of primitive I64 type, it panics.
116    ///
117    /// If the time unit is nanoseconds, and the array is not of primitive I64 type, it panics.
118    pub fn new_time(array: ArrayRef, time_unit: TimeUnit) -> Self {
119        match time_unit {
120            TimeUnit::S | TimeUnit::Ms => assert_width!(i32, array),
121            TimeUnit::Us | TimeUnit::Ns => assert_width!(i64, array),
122            TimeUnit::D => vortex_panic!("invalid unit D for vortex.time data"),
123        }
124
125        let temporal_metadata = TemporalMetadata::Time(time_unit);
126        Self {
127            ext: ExtensionArray::new(
128                Arc::new(ExtDType::new(
129                    TIME_ID.clone(),
130                    Arc::new(array.dtype().clone()),
131                    Some(temporal_metadata.clone().into()),
132                )),
133                array,
134            ),
135            temporal_metadata,
136        }
137    }
138
139    /// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
140    /// optional timezone.
141    ///
142    /// # Panics
143    ///
144    /// If `array` does not hold Primitive i64 data, the function will panic.
145    ///
146    /// If the time_unit is days, the function will panic.
147    pub fn new_timestamp(array: ArrayRef, time_unit: TimeUnit, time_zone: Option<String>) -> Self {
148        assert_width!(i64, array);
149
150        let temporal_metadata = TemporalMetadata::Timestamp(time_unit, time_zone);
151
152        Self {
153            ext: ExtensionArray::new(
154                Arc::new(ExtDType::new(
155                    TIMESTAMP_ID.clone(),
156                    Arc::new(array.dtype().clone()),
157                    Some(temporal_metadata.clone().into()),
158                )),
159                array,
160            ),
161            temporal_metadata,
162        }
163    }
164}
165
166impl TemporalArray {
167    /// Access the underlying temporal values in the underlying ExtensionArray storage.
168    ///
169    /// These values are to be interpreted based on the time unit and optional time-zone stored
170    /// in the TemporalMetadata.
171    pub fn temporal_values(&self) -> &ArrayRef {
172        self.ext.storage()
173    }
174
175    /// Retrieve the temporal metadata.
176    ///
177    /// The metadata is used to provide semantic meaning to the temporal values Array, for example
178    /// to understand the granularity of the samples and if they have an associated timezone.
179    pub fn temporal_metadata(&self) -> &TemporalMetadata {
180        &self.temporal_metadata
181    }
182
183    /// Retrieve the extension DType associated with the underlying array.
184    pub fn ext_dtype(&self) -> Arc<ExtDType> {
185        self.ext.ext_dtype().clone()
186    }
187
188    /// Retrieve the DType of the array. This will be a `DType::Extension` variant.
189    pub fn dtype(&self) -> &DType {
190        self.ext.dtype()
191    }
192}
193
194impl From<TemporalArray> for ArrayRef {
195    fn from(value: TemporalArray) -> Self {
196        value.ext.into_array()
197    }
198}
199
200impl IntoArray for TemporalArray {
201    fn into_array(self) -> ArrayRef {
202        self.into()
203    }
204}
205
206impl TryFrom<ArrayRef> for TemporalArray {
207    type Error = VortexError;
208
209    /// Try to specialize a generic Vortex array as a TemporalArray.
210    ///
211    /// # Errors
212    ///
213    /// If the provided Array does not have `vortex.ext` encoding, an error will be returned.
214    ///
215    /// If the provided Array does not have recognized ExtMetadata corresponding to one of the known
216    /// `TemporalMetadata` variants, an error is returned.
217    fn try_from(value: ArrayRef) -> Result<Self, Self::Error> {
218        let ext = value
219            .as_opt::<ExtensionVTable>()
220            .ok_or_else(|| vortex_err!("array must be an ExtensionArray"))?;
221        let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype())?;
222        Ok(Self {
223            ext: ext.clone(),
224            temporal_metadata,
225        })
226    }
227}
228
229// Conversions to/from ExtensionArray
230impl From<&TemporalArray> for ExtensionArray {
231    fn from(value: &TemporalArray) -> Self {
232        value.ext.clone()
233    }
234}
235
236impl From<TemporalArray> for ExtensionArray {
237    fn from(value: TemporalArray) -> Self {
238        value.ext
239    }
240}
241
242impl TryFrom<ExtensionArray> for TemporalArray {
243    type Error = VortexError;
244
245    fn try_from(ext: ExtensionArray) -> Result<Self, Self::Error> {
246        let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype().as_ref())?;
247        Ok(Self {
248            ext,
249            temporal_metadata,
250        })
251    }
252}