vortex_array/arrays/datetime/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4#[cfg(test)]
5mod test;
6
7use std::sync::Arc;
8
9use vortex_dtype::datetime::{DATE_ID, TIME_ID, TIMESTAMP_ID, TemporalMetadata, TimeUnit};
10use vortex_dtype::{DType, ExtDType};
11use vortex_error::{VortexError, vortex_err, vortex_panic};
12
13use crate::arrays::{ExtensionArray, ExtensionVTable};
14use crate::{Array, ArrayRef, IntoArray};
15
16/// An array wrapper for primitive values that have an associated temporal meaning.
17///
18/// This is a wrapper around ExtensionArrays containing numeric types, each of which corresponds to
19/// either a timestamp or julian date (both referenced to UNIX epoch), OR a time since midnight.
20///
21/// ## Arrow compatibility
22///
23/// TemporalArray can be created from Arrow arrays containing the following datatypes:
24/// * `Time32`
25/// * `Time64`
26/// * `Timestamp`
27/// * `Date32`
28/// * `Date64`
29///
30/// Anything that can be constructed and held in a `TemporalArray` can also be zero-copy converted
31/// back to the relevant Arrow datatype.
32#[derive(Clone, Debug)]
33pub struct TemporalArray {
34 /// The underlying Vortex extension array holding all the numeric values.
35 ext: ExtensionArray,
36
37 /// In-memory representation of the ExtMetadata that is held by the underlying extension array.
38 ///
39 /// We hold this directly to avoid needing to deserialize the metadata to access things like
40 /// timezone and TimeUnit of the underlying array.
41 temporal_metadata: TemporalMetadata,
42}
43
44macro_rules! assert_width {
45 ($width:ty, $array:expr) => {{
46 let DType::Primitive(ptype, _) = $array.dtype() else {
47 panic!("array must have primitive type");
48 };
49
50 assert_eq!(
51 <$width as vortex_dtype::NativePType>::PTYPE,
52 *ptype,
53 "invalid ptype {} for array, expected {}",
54 <$width as vortex_dtype::NativePType>::PTYPE,
55 *ptype
56 );
57 }};
58}
59
60impl TemporalArray {
61 /// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
62 /// that are evenly divisible by the number of 86,400,000.
63 ///
64 /// This is equivalent to the data described by either of the `Date32` or `Date64` data types
65 /// from Arrow.
66 ///
67 /// # Panics
68 ///
69 /// If the time unit is milliseconds, and the array is not of primitive I64 type, it panics.
70 ///
71 /// If the time unit is days, and the array is not of primitive I32 type, it panics.
72 ///
73 /// If any other time unit is provided, it panics.
74 pub fn new_date(array: ArrayRef, time_unit: TimeUnit) -> Self {
75 match time_unit {
76 TimeUnit::D => {
77 assert_width!(i32, array);
78 }
79 TimeUnit::Ms => {
80 assert_width!(i64, array);
81 }
82 _ => vortex_panic!("invalid TimeUnit {time_unit} for vortex.date"),
83 };
84
85 let ext_dtype = ExtDType::new(
86 DATE_ID.clone(),
87 Arc::new(array.dtype().clone()),
88 Some(TemporalMetadata::Date(time_unit).into()),
89 );
90
91 Self {
92 ext: ExtensionArray::new(Arc::new(ext_dtype), array),
93 temporal_metadata: TemporalMetadata::Date(time_unit),
94 }
95 }
96
97 /// Create a new `TemporalArray` holding one of the following values:
98 ///
99 /// * `i32` values representing seconds since midnight
100 /// * `i32` values representing milliseconds since midnight
101 /// * `i64` values representing microseconds since midnight
102 /// * `i64` values representing nanoseconds since midnight
103 ///
104 /// Note, this is equivalent to the set of values represented by the Time32 or Time64 types
105 /// from Arrow.
106 ///
107 /// # Panics
108 ///
109 /// If the time unit is seconds, and the array is not of primitive I32 type, it panics.
110 ///
111 /// If the time unit is milliseconds, and the array is not of primitive I32 type, it panics.
112 ///
113 /// If the time unit is microseconds, and the array is not of primitive I64 type, it panics.
114 ///
115 /// If the time unit is nanoseconds, and the array is not of primitive I64 type, it panics.
116 pub fn new_time(array: ArrayRef, time_unit: TimeUnit) -> Self {
117 match time_unit {
118 TimeUnit::S | TimeUnit::Ms => assert_width!(i32, array),
119 TimeUnit::Us | TimeUnit::Ns => assert_width!(i64, array),
120 TimeUnit::D => vortex_panic!("invalid unit D for vortex.time data"),
121 }
122
123 let temporal_metadata = TemporalMetadata::Time(time_unit);
124 Self {
125 ext: ExtensionArray::new(
126 Arc::new(ExtDType::new(
127 TIME_ID.clone(),
128 Arc::new(array.dtype().clone()),
129 Some(temporal_metadata.clone().into()),
130 )),
131 array,
132 ),
133 temporal_metadata,
134 }
135 }
136
137 /// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
138 /// optional timezone.
139 ///
140 /// # Panics
141 ///
142 /// If `array` does not hold Primitive i64 data, the function will panic.
143 ///
144 /// If the time_unit is days, the function will panic.
145 pub fn new_timestamp(array: ArrayRef, time_unit: TimeUnit, time_zone: Option<String>) -> Self {
146 assert_width!(i64, array);
147
148 let temporal_metadata = TemporalMetadata::Timestamp(time_unit, time_zone);
149
150 Self {
151 ext: ExtensionArray::new(
152 Arc::new(ExtDType::new(
153 TIMESTAMP_ID.clone(),
154 Arc::new(array.dtype().clone()),
155 Some(temporal_metadata.clone().into()),
156 )),
157 array,
158 ),
159 temporal_metadata,
160 }
161 }
162}
163
164impl TemporalArray {
165 /// Access the underlying temporal values in the underlying ExtensionArray storage.
166 ///
167 /// These values are to be interpreted based on the time unit and optional time-zone stored
168 /// in the TemporalMetadata.
169 pub fn temporal_values(&self) -> &ArrayRef {
170 self.ext.storage()
171 }
172
173 /// Retrieve the temporal metadata.
174 ///
175 /// The metadata is used to provide semantic meaning to the temporal values Array, for example
176 /// to understand the granularity of the samples and if they have an associated timezone.
177 pub fn temporal_metadata(&self) -> &TemporalMetadata {
178 &self.temporal_metadata
179 }
180
181 /// Retrieve the extension DType associated with the underlying array.
182 pub fn ext_dtype(&self) -> Arc<ExtDType> {
183 self.ext.ext_dtype().clone()
184 }
185
186 /// Retrieve the DType of the array. This will be a `DType::Extension` variant.
187 pub fn dtype(&self) -> &DType {
188 self.ext.dtype()
189 }
190}
191
192impl From<TemporalArray> for ArrayRef {
193 fn from(value: TemporalArray) -> Self {
194 value.ext.into_array()
195 }
196}
197
198impl IntoArray for TemporalArray {
199 fn into_array(self) -> ArrayRef {
200 self.into()
201 }
202}
203
204impl TryFrom<ArrayRef> for TemporalArray {
205 type Error = VortexError;
206
207 /// Try to specialize a generic Vortex array as a TemporalArray.
208 ///
209 /// # Errors
210 ///
211 /// If the provided Array does not have `vortex.ext` encoding, an error will be returned.
212 ///
213 /// If the provided Array does not have recognized ExtMetadata corresponding to one of the known
214 /// `TemporalMetadata` variants, an error is returned.
215 fn try_from(value: ArrayRef) -> Result<Self, Self::Error> {
216 let ext = value
217 .as_opt::<ExtensionVTable>()
218 .ok_or_else(|| vortex_err!("array must be an ExtensionArray"))?;
219 let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype())?;
220 Ok(Self {
221 ext: ext.clone(),
222 temporal_metadata,
223 })
224 }
225}
226
227// Conversions to/from ExtensionArray
228impl From<&TemporalArray> for ExtensionArray {
229 fn from(value: &TemporalArray) -> Self {
230 value.ext.clone()
231 }
232}
233
234impl From<TemporalArray> for ExtensionArray {
235 fn from(value: TemporalArray) -> Self {
236 value.ext
237 }
238}
239
240impl TryFrom<ExtensionArray> for TemporalArray {
241 type Error = VortexError;
242
243 fn try_from(ext: ExtensionArray) -> Result<Self, Self::Error> {
244 let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype().as_ref())?;
245 Ok(Self {
246 ext,
247 temporal_metadata,
248 })
249 }
250}