vortex_array/arrays/datetime/mod.rs
1#[cfg(test)]
2mod test;
3
4use std::sync::Arc;
5
6use vortex_dtype::datetime::{DATE_ID, TIME_ID, TIMESTAMP_ID, TemporalMetadata, TimeUnit};
7use vortex_dtype::{DType, ExtDType};
8use vortex_error::{VortexError, vortex_err, vortex_panic};
9
10use crate::arrays::{ExtensionArray, ExtensionVTable};
11use crate::{Array, ArrayRef, IntoArray};
12
13/// An array wrapper for primitive values that have an associated temporal meaning.
14///
15/// This is a wrapper around ExtensionArrays containing numeric types, each of which corresponds to
16/// either a timestamp or julian date (both referenced to UNIX epoch), OR a time since midnight.
17///
18/// ## Arrow compatibility
19///
20/// TemporalArray can be created from Arrow arrays containing the following datatypes:
21/// * `Time32`
22/// * `Time64`
23/// * `Timestamp`
24/// * `Date32`
25/// * `Date64`
26///
27/// Anything that can be constructed and held in a `TemporalArray` can also be zero-copy converted
28/// back to the relevant Arrow datatype.
29#[derive(Clone, Debug)]
30pub struct TemporalArray {
31 /// The underlying Vortex extension array holding all the numeric values.
32 ext: ExtensionArray,
33
34 /// In-memory representation of the ExtMetadata that is held by the underlying extension array.
35 ///
36 /// We hold this directly to avoid needing to deserialize the metadata to access things like
37 /// timezone and TimeUnit of the underlying array.
38 temporal_metadata: TemporalMetadata,
39}
40
41macro_rules! assert_width {
42 ($width:ty, $array:expr) => {{
43 let DType::Primitive(ptype, _) = $array.dtype() else {
44 panic!("array must have primitive type");
45 };
46
47 assert_eq!(
48 <$width as vortex_dtype::NativePType>::PTYPE,
49 *ptype,
50 "invalid ptype {} for array, expected {}",
51 <$width as vortex_dtype::NativePType>::PTYPE,
52 *ptype
53 );
54 }};
55}
56
57impl TemporalArray {
58 /// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
59 /// that are evenly divisible by the number of 86,400,000.
60 ///
61 /// This is equivalent to the data described by either of the `Date32` or `Date64` data types
62 /// from Arrow.
63 ///
64 /// # Panics
65 ///
66 /// If the time unit is milliseconds, and the array is not of primitive I64 type, it panics.
67 ///
68 /// If the time unit is days, and the array is not of primitive I32 type, it panics.
69 ///
70 /// If any other time unit is provided, it panics.
71 pub fn new_date(array: ArrayRef, time_unit: TimeUnit) -> Self {
72 match time_unit {
73 TimeUnit::D => {
74 assert_width!(i32, array);
75 }
76 TimeUnit::Ms => {
77 assert_width!(i64, array);
78 }
79 _ => vortex_panic!("invalid TimeUnit {time_unit} for vortex.date"),
80 };
81
82 let ext_dtype = ExtDType::new(
83 DATE_ID.clone(),
84 Arc::new(array.dtype().clone()),
85 Some(TemporalMetadata::Date(time_unit).into()),
86 );
87
88 Self {
89 ext: ExtensionArray::new(Arc::new(ext_dtype), array),
90 temporal_metadata: TemporalMetadata::Date(time_unit),
91 }
92 }
93
94 /// Create a new `TemporalArray` holding one of the following values:
95 ///
96 /// * `i32` values representing seconds since midnight
97 /// * `i32` values representing milliseconds since midnight
98 /// * `i64` values representing microseconds since midnight
99 /// * `i64` values representing nanoseconds since midnight
100 ///
101 /// Note, this is equivalent to the set of values represented by the Time32 or Time64 types
102 /// from Arrow.
103 ///
104 /// # Panics
105 ///
106 /// If the time unit is seconds, and the array is not of primitive I32 type, it panics.
107 ///
108 /// If the time unit is milliseconds, and the array is not of primitive I32 type, it panics.
109 ///
110 /// If the time unit is microseconds, and the array is not of primitive I64 type, it panics.
111 ///
112 /// If the time unit is nanoseconds, and the array is not of primitive I64 type, it panics.
113 pub fn new_time(array: ArrayRef, time_unit: TimeUnit) -> Self {
114 match time_unit {
115 TimeUnit::S | TimeUnit::Ms => assert_width!(i32, array),
116 TimeUnit::Us | TimeUnit::Ns => assert_width!(i64, array),
117 TimeUnit::D => vortex_panic!("invalid unit D for vortex.time data"),
118 }
119
120 let temporal_metadata = TemporalMetadata::Time(time_unit);
121 Self {
122 ext: ExtensionArray::new(
123 Arc::new(ExtDType::new(
124 TIME_ID.clone(),
125 Arc::new(array.dtype().clone()),
126 Some(temporal_metadata.clone().into()),
127 )),
128 array,
129 ),
130 temporal_metadata,
131 }
132 }
133
134 /// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
135 /// optional timezone.
136 ///
137 /// # Panics
138 ///
139 /// If `array` does not hold Primitive i64 data, the function will panic.
140 ///
141 /// If the time_unit is days, the function will panic.
142 pub fn new_timestamp(array: ArrayRef, time_unit: TimeUnit, time_zone: Option<String>) -> Self {
143 assert_width!(i64, array);
144
145 let temporal_metadata = TemporalMetadata::Timestamp(time_unit, time_zone);
146
147 Self {
148 ext: ExtensionArray::new(
149 Arc::new(ExtDType::new(
150 TIMESTAMP_ID.clone(),
151 Arc::new(array.dtype().clone()),
152 Some(temporal_metadata.clone().into()),
153 )),
154 array,
155 ),
156 temporal_metadata,
157 }
158 }
159}
160
161impl TemporalArray {
162 /// Access the underlying temporal values in the underlying ExtensionArray storage.
163 ///
164 /// These values are to be interpreted based on the time unit and optional time-zone stored
165 /// in the TemporalMetadata.
166 pub fn temporal_values(&self) -> &ArrayRef {
167 self.ext.storage()
168 }
169
170 /// Retrieve the temporal metadata.
171 ///
172 /// The metadata is used to provide semantic meaning to the temporal values Array, for example
173 /// to understand the granularity of the samples and if they have an associated timezone.
174 pub fn temporal_metadata(&self) -> &TemporalMetadata {
175 &self.temporal_metadata
176 }
177
178 /// Retrieve the extension DType associated with the underlying array.
179 pub fn ext_dtype(&self) -> Arc<ExtDType> {
180 self.ext.ext_dtype().clone()
181 }
182
183 /// Retrieve the DType of the array. This will be a `DType::Extension` variant.
184 pub fn dtype(&self) -> &DType {
185 self.ext.dtype()
186 }
187}
188
189impl From<TemporalArray> for ArrayRef {
190 fn from(value: TemporalArray) -> Self {
191 value.ext.into_array()
192 }
193}
194
195impl IntoArray for TemporalArray {
196 fn into_array(self) -> ArrayRef {
197 self.into()
198 }
199}
200
201impl TryFrom<ArrayRef> for TemporalArray {
202 type Error = VortexError;
203
204 /// Try to specialize a generic Vortex array as a TemporalArray.
205 ///
206 /// # Errors
207 ///
208 /// If the provided Array does not have `vortex.ext` encoding, an error will be returned.
209 ///
210 /// If the provided Array does not have recognized ExtMetadata corresponding to one of the known
211 /// `TemporalMetadata` variants, an error is returned.
212 fn try_from(value: ArrayRef) -> Result<Self, Self::Error> {
213 let ext = value
214 .as_opt::<ExtensionVTable>()
215 .ok_or_else(|| vortex_err!("array must be an ExtensionArray"))?;
216 let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype())?;
217 Ok(Self {
218 ext: ext.clone(),
219 temporal_metadata,
220 })
221 }
222}
223
224// Conversions to/from ExtensionArray
225impl From<&TemporalArray> for ExtensionArray {
226 fn from(value: &TemporalArray) -> Self {
227 value.ext.clone()
228 }
229}
230
231impl From<TemporalArray> for ExtensionArray {
232 fn from(value: TemporalArray) -> Self {
233 value.ext
234 }
235}
236
237impl TryFrom<ExtensionArray> for TemporalArray {
238 type Error = VortexError;
239
240 fn try_from(ext: ExtensionArray) -> Result<Self, Self::Error> {
241 let temporal_metadata = TemporalMetadata::try_from(ext.ext_dtype().as_ref())?;
242 Ok(Self {
243 ext,
244 temporal_metadata,
245 })
246 }
247}