vortex_array/arrays/datetime/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4#[cfg(test)]
5mod test;
6
7use std::sync::Arc;
8
9use vortex_dtype::DType;
10use vortex_dtype::ExtDType;
11use vortex_dtype::datetime::DATE_ID;
12use vortex_dtype::datetime::TIME_ID;
13use vortex_dtype::datetime::TIMESTAMP_ID;
14use vortex_dtype::datetime::TemporalMetadata;
15use vortex_dtype::datetime::TimeUnit;
16use vortex_error::VortexError;
17use vortex_error::vortex_err;
18use vortex_error::vortex_panic;
19
20use crate::Array;
21use crate::ArrayRef;
22use crate::IntoArray;
23use crate::arrays::ExtensionArray;
24use crate::arrays::ExtensionVTable;
25
26/// An array wrapper for primitive values that have an associated temporal meaning.
27///
28/// This is a wrapper around ExtensionArrays containing numeric types, each of which corresponds to
29/// either a timestamp or julian date (both referenced to UNIX epoch), OR a time since midnight.
30///
31/// ## Arrow compatibility
32///
33/// TemporalArray can be created from Arrow arrays containing the following datatypes:
34/// * `Time32`
35/// * `Time64`
36/// * `Timestamp`
37/// * `Date32`
38/// * `Date64`
39///
40/// Anything that can be constructed and held in a `TemporalArray` can also be zero-copy converted
41/// back to the relevant Arrow datatype.
42#[derive(Clone, Debug)]
43pub struct TemporalArray {
44    /// The underlying Vortex extension array holding all the numeric values.
45    ext: ExtensionArray,
46
47    /// In-memory representation of the ExtMetadata that is held by the underlying extension array.
48    ///
49    /// We hold this directly to avoid needing to deserialize the metadata to access things like
50    /// timezone and TimeUnit of the underlying array.
51    temporal_metadata: TemporalMetadata,
52}
53
54macro_rules! assert_width {
55    ($width:ty, $array:expr) => {{
56        let DType::Primitive(ptype, _) = $array.dtype() else {
57            panic!("array must have primitive type");
58        };
59
60        assert_eq!(
61            <$width as vortex_dtype::NativePType>::PTYPE,
62            *ptype,
63            "invalid ptype {} for array, expected {}",
64            <$width as vortex_dtype::NativePType>::PTYPE,
65            *ptype
66        );
67    }};
68}
69
70impl TemporalArray {
71    /// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
72    /// that are evenly divisible by the number of 86,400,000.
73    ///
74    /// This is equivalent to the data described by either of the `Date32` or `Date64` data types
75    /// from Arrow.
76    ///
77    /// # Panics
78    ///
79    /// If the time unit is milliseconds, and the array is not of primitive I64 type, it panics.
80    ///
81    /// If the time unit is days, and the array is not of primitive I32 type, it panics.
82    ///
83    /// If any other time unit is provided, it panics.
84    pub fn new_date(array: ArrayRef, time_unit: TimeUnit) -> Self {
85        match time_unit {
86            TimeUnit::Days => {
87                assert_width!(i32, array);
88            }
89            TimeUnit::Milliseconds => {
90                assert_width!(i64, array);
91            }
92            TimeUnit::Nanoseconds | TimeUnit::Microseconds | TimeUnit::Seconds => {
93                vortex_panic!("invalid TimeUnit {time_unit} for vortex.date")
94            }
95        };
96
97        let ext_dtype = ExtDType::new(
98            DATE_ID.clone(),
99            Arc::new(array.dtype().clone()),
100            Some(TemporalMetadata::Date(time_unit).into()),
101        );
102
103        Self {
104            ext: ExtensionArray::new(Arc::new(ext_dtype), array),
105            temporal_metadata: TemporalMetadata::Date(time_unit),
106        }
107    }
108
109    /// Create a new `TemporalArray` holding one of the following values:
110    ///
111    /// * `i32` values representing seconds since midnight
112    /// * `i32` values representing milliseconds since midnight
113    /// * `i64` values representing microseconds since midnight
114    /// * `i64` values representing nanoseconds since midnight
115    ///
116    /// Note, this is equivalent to the set of values represented by the Time32 or Time64 types
117    /// from Arrow.
118    ///
119    /// # Panics
120    ///
121    /// If the time unit is seconds, and the array is not of primitive I32 type, it panics.
122    ///
123    /// If the time unit is milliseconds, and the array is not of primitive I32 type, it panics.
124    ///
125    /// If the time unit is microseconds, and the array is not of primitive I64 type, it panics.
126    ///
127    /// If the time unit is nanoseconds, and the array is not of primitive I64 type, it panics.
128    pub fn new_time(array: ArrayRef, time_unit: TimeUnit) -> Self {
129        match time_unit {
130            TimeUnit::Seconds | TimeUnit::Milliseconds => assert_width!(i32, array),
131            TimeUnit::Microseconds | TimeUnit::Nanoseconds => assert_width!(i64, array),
132            TimeUnit::Days => vortex_panic!("invalid unit D for vortex.time data"),
133        }
134
135        let temporal_metadata = TemporalMetadata::Time(time_unit);
136        Self {
137            ext: ExtensionArray::new(
138                Arc::new(ExtDType::new(
139                    TIME_ID.clone(),
140                    Arc::new(array.dtype().clone()),
141                    Some(temporal_metadata.clone().into()),
142                )),
143                array,
144            ),
145            temporal_metadata,
146        }
147    }
148
149    /// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
150    /// optional timezone.
151    ///
152    /// # Panics
153    ///
154    /// If `array` does not hold Primitive i64 data, the function will panic.
155    ///
156    /// If the time_unit is days, the function will panic.
157    pub fn new_timestamp(array: ArrayRef, time_unit: TimeUnit, time_zone: Option<String>) -> Self {
158        assert_width!(i64, array);
159
160        let temporal_metadata = TemporalMetadata::Timestamp(time_unit, time_zone);
161
162        Self {
163            ext: ExtensionArray::new(
164                Arc::new(ExtDType::new(
165                    TIMESTAMP_ID.clone(),
166                    Arc::new(array.dtype().clone()),
167                    Some(temporal_metadata.clone().into()),
168                )),
169                array,
170            ),
171            temporal_metadata,
172        }
173    }
174}
175
176impl TemporalArray {
177    /// Access the underlying temporal values in the underlying ExtensionArray storage.
178    ///
179    /// These values are to be interpreted based on the time unit and optional time-zone stored
180    /// in the TemporalMetadata.
181    pub fn temporal_values(&self) -> &ArrayRef {
182        self.ext.storage()
183    }
184
185    /// Retrieve the temporal metadata.
186    ///
187    /// The metadata is used to provide semantic meaning to the temporal values Array, for example
188    /// to understand the granularity of the samples and if they have an associated timezone.
189    pub fn temporal_metadata(&self) -> &TemporalMetadata {
190        &self.temporal_metadata
191    }
192
193    /// Retrieve the extension DType associated with the underlying array.
194    pub fn ext_dtype(&self) -> Arc<ExtDType> {
195        self.ext.ext_dtype().clone()
196    }
197
198    /// Retrieve the DType of the array. This will be a `DType::Extension` variant.
199    pub fn dtype(&self) -> &DType {
200        self.ext.dtype()
201    }
202}
203
204impl From<TemporalArray> for ArrayRef {
205    fn from(value: TemporalArray) -> Self {
206        value.ext.into_array()
207    }
208}
209
210impl IntoArray for TemporalArray {
211    fn into_array(self) -> ArrayRef {
212        self.into()
213    }
214}
215
216impl TryFrom<ArrayRef> for TemporalArray {
217    type Error = VortexError;
218
219    /// Try to specialize a generic Vortex array as a TemporalArray.
220    ///
221    /// # Errors
222    ///
223    /// If the provided Array does not have `vortex.ext` encoding, an error will be returned.
224    ///
225    /// If the provided Array does not have recognized ExtMetadata corresponding to one of the known
226    /// `TemporalMetadata` variants, an error is returned.
227    fn try_from(value: ArrayRef) -> Result<Self, Self::Error> {
228        let ext = value
229            .as_opt::<ExtensionVTable>()
230            .ok_or_else(|| vortex_err!("array must be an ExtensionArray"))?;
231        let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype())?;
232        Ok(Self {
233            ext: ext.clone(),
234            temporal_metadata,
235        })
236    }
237}
238
239// Conversions to/from ExtensionArray
240impl From<&TemporalArray> for ExtensionArray {
241    fn from(value: &TemporalArray) -> Self {
242        value.ext.clone()
243    }
244}
245
246impl From<TemporalArray> for ExtensionArray {
247    fn from(value: TemporalArray) -> Self {
248        value.ext
249    }
250}
251
252impl TryFrom<ExtensionArray> for TemporalArray {
253    type Error = VortexError;
254
255    fn try_from(ext: ExtensionArray) -> Result<Self, Self::Error> {
256        let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype().as_ref())?;
257        Ok(Self {
258            ext,
259            temporal_metadata,
260        })
261    }
262}