vortex_array/arrays/datetime/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4#[cfg(test)]
5mod test;
6
7use std::sync::Arc;
8
9use vortex_dtype::DType;
10use vortex_dtype::ExtDType;
11use vortex_dtype::datetime::DATE_ID;
12use vortex_dtype::datetime::TIME_ID;
13use vortex_dtype::datetime::TIMESTAMP_ID;
14use vortex_dtype::datetime::TemporalMetadata;
15use vortex_dtype::datetime::TimeUnit;
16use vortex_error::VortexError;
17use vortex_error::vortex_err;
18use vortex_error::vortex_panic;
19
20use crate::Array;
21use crate::ArrayRef;
22use crate::IntoArray;
23use crate::arrays::ExtensionArray;
24use crate::arrays::ExtensionVTable;
25
26/// An array wrapper for primitive values that have an associated temporal meaning.
27///
28/// This is a wrapper around ExtensionArrays containing numeric types, each of which corresponds to
29/// either a timestamp or julian date (both referenced to UNIX epoch), OR a time since midnight.
30///
31/// ## Arrow compatibility
32///
33/// TemporalArray can be created from Arrow arrays containing the following datatypes:
34/// * `Time32`
35/// * `Time64`
36/// * `Timestamp`
37/// * `Date32`
38/// * `Date64`
39///
40/// Anything that can be constructed and held in a `TemporalArray` can also be zero-copy converted
41/// back to the relevant Arrow datatype.
42#[derive(Clone, Debug)]
43pub struct TemporalArray {
44 /// The underlying Vortex extension array holding all the numeric values.
45 ext: ExtensionArray,
46
47 /// In-memory representation of the ExtMetadata that is held by the underlying extension array.
48 ///
49 /// We hold this directly to avoid needing to deserialize the metadata to access things like
50 /// timezone and TimeUnit of the underlying array.
51 temporal_metadata: TemporalMetadata,
52}
53
54macro_rules! assert_width {
55 ($width:ty, $array:expr) => {{
56 let DType::Primitive(ptype, _) = $array.dtype() else {
57 panic!("array must have primitive type");
58 };
59
60 assert_eq!(
61 <$width as vortex_dtype::NativePType>::PTYPE,
62 *ptype,
63 "invalid ptype {} for array, expected {}",
64 <$width as vortex_dtype::NativePType>::PTYPE,
65 *ptype
66 );
67 }};
68}
69
70impl TemporalArray {
71 /// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
72 /// that are evenly divisible by the number of 86,400,000.
73 ///
74 /// This is equivalent to the data described by either of the `Date32` or `Date64` data types
75 /// from Arrow.
76 ///
77 /// # Panics
78 ///
79 /// If the time unit is milliseconds, and the array is not of primitive I64 type, it panics.
80 ///
81 /// If the time unit is days, and the array is not of primitive I32 type, it panics.
82 ///
83 /// If any other time unit is provided, it panics.
84 pub fn new_date(array: ArrayRef, time_unit: TimeUnit) -> Self {
85 match time_unit {
86 TimeUnit::Days => {
87 assert_width!(i32, array);
88 }
89 TimeUnit::Milliseconds => {
90 assert_width!(i64, array);
91 }
92 TimeUnit::Nanoseconds | TimeUnit::Microseconds | TimeUnit::Seconds => {
93 vortex_panic!("invalid TimeUnit {time_unit} for vortex.date")
94 }
95 };
96
97 let ext_dtype = ExtDType::new(
98 DATE_ID.clone(),
99 Arc::new(array.dtype().clone()),
100 Some(TemporalMetadata::Date(time_unit).into()),
101 );
102
103 Self {
104 ext: ExtensionArray::new(Arc::new(ext_dtype), array),
105 temporal_metadata: TemporalMetadata::Date(time_unit),
106 }
107 }
108
109 /// Create a new `TemporalArray` holding one of the following values:
110 ///
111 /// * `i32` values representing seconds since midnight
112 /// * `i32` values representing milliseconds since midnight
113 /// * `i64` values representing microseconds since midnight
114 /// * `i64` values representing nanoseconds since midnight
115 ///
116 /// Note, this is equivalent to the set of values represented by the Time32 or Time64 types
117 /// from Arrow.
118 ///
119 /// # Panics
120 ///
121 /// If the time unit is seconds, and the array is not of primitive I32 type, it panics.
122 ///
123 /// If the time unit is milliseconds, and the array is not of primitive I32 type, it panics.
124 ///
125 /// If the time unit is microseconds, and the array is not of primitive I64 type, it panics.
126 ///
127 /// If the time unit is nanoseconds, and the array is not of primitive I64 type, it panics.
128 pub fn new_time(array: ArrayRef, time_unit: TimeUnit) -> Self {
129 match time_unit {
130 TimeUnit::Seconds | TimeUnit::Milliseconds => assert_width!(i32, array),
131 TimeUnit::Microseconds | TimeUnit::Nanoseconds => assert_width!(i64, array),
132 TimeUnit::Days => vortex_panic!("invalid unit D for vortex.time data"),
133 }
134
135 let temporal_metadata = TemporalMetadata::Time(time_unit);
136 Self {
137 ext: ExtensionArray::new(
138 Arc::new(ExtDType::new(
139 TIME_ID.clone(),
140 Arc::new(array.dtype().clone()),
141 Some(temporal_metadata.clone().into()),
142 )),
143 array,
144 ),
145 temporal_metadata,
146 }
147 }
148
149 /// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
150 /// optional timezone.
151 ///
152 /// # Panics
153 ///
154 /// If `array` does not hold Primitive i64 data, the function will panic.
155 ///
156 /// If the time_unit is days, the function will panic.
157 pub fn new_timestamp(array: ArrayRef, time_unit: TimeUnit, time_zone: Option<String>) -> Self {
158 assert_width!(i64, array);
159
160 let temporal_metadata = TemporalMetadata::Timestamp(time_unit, time_zone);
161
162 Self {
163 ext: ExtensionArray::new(
164 Arc::new(ExtDType::new(
165 TIMESTAMP_ID.clone(),
166 Arc::new(array.dtype().clone()),
167 Some(temporal_metadata.clone().into()),
168 )),
169 array,
170 ),
171 temporal_metadata,
172 }
173 }
174}
175
176impl TemporalArray {
177 /// Access the underlying temporal values in the underlying ExtensionArray storage.
178 ///
179 /// These values are to be interpreted based on the time unit and optional time-zone stored
180 /// in the TemporalMetadata.
181 pub fn temporal_values(&self) -> &ArrayRef {
182 self.ext.storage()
183 }
184
185 /// Retrieve the temporal metadata.
186 ///
187 /// The metadata is used to provide semantic meaning to the temporal values Array, for example
188 /// to understand the granularity of the samples and if they have an associated timezone.
189 pub fn temporal_metadata(&self) -> &TemporalMetadata {
190 &self.temporal_metadata
191 }
192
193 /// Retrieve the extension DType associated with the underlying array.
194 pub fn ext_dtype(&self) -> Arc<ExtDType> {
195 self.ext.ext_dtype().clone()
196 }
197
198 /// Retrieve the DType of the array. This will be a `DType::Extension` variant.
199 pub fn dtype(&self) -> &DType {
200 self.ext.dtype()
201 }
202}
203
204impl From<TemporalArray> for ArrayRef {
205 fn from(value: TemporalArray) -> Self {
206 value.ext.into_array()
207 }
208}
209
210impl IntoArray for TemporalArray {
211 fn into_array(self) -> ArrayRef {
212 self.into()
213 }
214}
215
216impl TryFrom<ArrayRef> for TemporalArray {
217 type Error = VortexError;
218
219 /// Try to specialize a generic Vortex array as a TemporalArray.
220 ///
221 /// # Errors
222 ///
223 /// If the provided Array does not have `vortex.ext` encoding, an error will be returned.
224 ///
225 /// If the provided Array does not have recognized ExtMetadata corresponding to one of the known
226 /// `TemporalMetadata` variants, an error is returned.
227 fn try_from(value: ArrayRef) -> Result<Self, Self::Error> {
228 let ext = value
229 .as_opt::<ExtensionVTable>()
230 .ok_or_else(|| vortex_err!("array must be an ExtensionArray"))?;
231 let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype())?;
232 Ok(Self {
233 ext: ext.clone(),
234 temporal_metadata,
235 })
236 }
237}
238
239// Conversions to/from ExtensionArray
240impl From<&TemporalArray> for ExtensionArray {
241 fn from(value: &TemporalArray) -> Self {
242 value.ext.clone()
243 }
244}
245
246impl From<TemporalArray> for ExtensionArray {
247 fn from(value: TemporalArray) -> Self {
248 value.ext
249 }
250}
251
252impl TryFrom<ExtensionArray> for TemporalArray {
253 type Error = VortexError;
254
255 fn try_from(ext: ExtensionArray) -> Result<Self, Self::Error> {
256 let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype().as_ref())?;
257 Ok(Self {
258 ext,
259 temporal_metadata,
260 })
261 }
262}