vortex_datetime_parts/
compress.rs

1use vortex_array::arrays::{PrimitiveArray, TemporalArray};
2use vortex_array::compute::cast;
3use vortex_array::vtable::ValidityHelper;
4use vortex_array::{ArrayRef, IntoArray, ToCanonical};
5use vortex_buffer::BufferMut;
6use vortex_dtype::{DType, PType};
7use vortex_error::{VortexError, VortexResult};
8
9use crate::{DateTimePartsArray, timestamp};
10
11pub struct TemporalParts {
12    pub days: ArrayRef,
13    pub seconds: ArrayRef,
14    pub subseconds: ArrayRef,
15}
16
17/// Compress a `TemporalArray` into day, second, and subseconds components.
18///
19/// Splitting the components by granularity creates more small values, which enables better
20/// cascading compression.
21pub fn split_temporal(array: TemporalArray) -> VortexResult<TemporalParts> {
22    let temporal_values = array.temporal_values().to_primitive()?;
23
24    // After this operation, timestamps will be a PrimitiveArray<i64>
25    let timestamps = cast(
26        temporal_values.as_ref(),
27        &DType::Primitive(PType::I64, temporal_values.dtype().nullability()),
28    )?
29    .to_primitive()?;
30
31    let length = timestamps.len();
32    let mut days = BufferMut::with_capacity(length);
33    let mut seconds = BufferMut::with_capacity(length);
34    let mut subseconds = BufferMut::with_capacity(length);
35
36    for &ts in timestamps.as_slice::<i64>() {
37        let ts_parts = timestamp::split(ts, array.temporal_metadata().time_unit())?;
38        days.push(ts_parts.days);
39        seconds.push(ts_parts.seconds);
40        subseconds.push(ts_parts.subseconds);
41    }
42
43    Ok(TemporalParts {
44        days: PrimitiveArray::new(days, temporal_values.validity().clone()).into_array(),
45        seconds: seconds.into_array(),
46        subseconds: subseconds.into_array(),
47    })
48}
49
50impl TryFrom<TemporalArray> for DateTimePartsArray {
51    type Error = VortexError;
52
53    fn try_from(array: TemporalArray) -> Result<Self, Self::Error> {
54        let ext_dtype = array.ext_dtype();
55        let TemporalParts {
56            days,
57            seconds,
58            subseconds,
59        } = split_temporal(array)?;
60        DateTimePartsArray::try_new(DType::Extension(ext_dtype), days, seconds, subseconds)
61    }
62}
63
64#[cfg(test)]
65mod tests {
66    use rstest::rstest;
67    use vortex_array::arrays::{PrimitiveArray, TemporalArray};
68    use vortex_array::validity::Validity;
69    use vortex_array::vtable::ValidityHelper;
70    use vortex_array::{IntoArray, ToCanonical};
71    use vortex_buffer::buffer;
72    use vortex_dtype::datetime::TimeUnit;
73
74    use crate::{TemporalParts, split_temporal};
75
76    #[rstest]
77    #[case(Validity::NonNullable)]
78    #[case(Validity::AllValid)]
79    #[case(Validity::AllInvalid)]
80    #[case(Validity::from_iter([true, false, true]))]
81    fn test_split_temporal(#[case] validity: Validity) {
82        let milliseconds = PrimitiveArray::new(
83            buffer![
84                86_400i64,            // element with only day component
85                86_400i64 + 1000,     // element with day + second components
86                86_400i64 + 1000 + 1, // element with day + second + sub-second components
87            ],
88            validity.clone(),
89        )
90        .into_array();
91        let temporal_array =
92            TemporalArray::new_timestamp(milliseconds, TimeUnit::Ms, Some("UTC".to_string()));
93        let TemporalParts {
94            days,
95            seconds,
96            subseconds,
97        } = split_temporal(temporal_array).unwrap();
98        assert_eq!(days.to_primitive().unwrap().validity(), &validity);
99        assert_eq!(
100            seconds.to_primitive().unwrap().validity(),
101            &Validity::NonNullable
102        );
103        assert_eq!(
104            subseconds.to_primitive().unwrap().validity(),
105            &Validity::NonNullable
106        );
107    }
108}