vortex_datetime_parts/
compress.rs

1use vortex_array::arrays::{PrimitiveArray, TemporalArray};
2use vortex_array::compute::cast;
3use vortex_array::vtable::ValidityHelper;
4use vortex_array::{ArrayRef, IntoArray, ToCanonical};
5use vortex_buffer::BufferMut;
6use vortex_dtype::{DType, PType};
7use vortex_error::{VortexError, VortexResult};
8
9use crate::{DateTimePartsArray, timestamp};
10
11pub struct TemporalParts {
12    pub days: ArrayRef,
13    pub seconds: ArrayRef,
14    pub subseconds: ArrayRef,
15}
16
17/// Compress a `TemporalArray` into day, second, and subseconds components.
18///
19/// Splitting the components by granularity creates more small values, which enables better
20/// cascading compression.
21pub fn split_temporal(array: TemporalArray) -> VortexResult<TemporalParts> {
22    let temporal_values = array.temporal_values().to_primitive()?;
23    let validity = temporal_values.validity().clone();
24
25    // After this operation, timestamps will be non-nullable PrimitiveArray<i64>
26    let timestamps = cast(
27        temporal_values.as_ref(),
28        &DType::Primitive(PType::I64, temporal_values.dtype().nullability()),
29    )?
30    .to_primitive()?;
31
32    let length = timestamps.len();
33    let mut days = BufferMut::with_capacity(length);
34    let mut seconds = BufferMut::with_capacity(length);
35    let mut subseconds = BufferMut::with_capacity(length);
36
37    for &ts in timestamps.as_slice::<i64>().iter() {
38        let ts_parts = timestamp::split(ts, array.temporal_metadata().time_unit())?;
39        days.push(ts_parts.days);
40        seconds.push(ts_parts.seconds);
41        subseconds.push(ts_parts.subseconds);
42    }
43
44    Ok(TemporalParts {
45        days: PrimitiveArray::new(days, validity).into_array(),
46        seconds: seconds.into_array(),
47        subseconds: subseconds.into_array(),
48    })
49}
50
51impl TryFrom<TemporalArray> for DateTimePartsArray {
52    type Error = VortexError;
53
54    fn try_from(array: TemporalArray) -> Result<Self, Self::Error> {
55        let ext_dtype = array.ext_dtype();
56        let TemporalParts {
57            days,
58            seconds,
59            subseconds,
60        } = split_temporal(array)?;
61        DateTimePartsArray::try_new(DType::Extension(ext_dtype), days, seconds, subseconds)
62    }
63}
64
65#[cfg(test)]
66mod tests {
67    use rstest::rstest;
68    use vortex_array::arrays::{PrimitiveArray, TemporalArray};
69    use vortex_array::validity::Validity;
70    use vortex_array::vtable::ValidityHelper;
71    use vortex_array::{IntoArray, ToCanonical};
72    use vortex_buffer::buffer;
73    use vortex_dtype::datetime::TimeUnit;
74
75    use crate::{TemporalParts, split_temporal};
76
77    #[rstest]
78    #[case(Validity::NonNullable)]
79    #[case(Validity::AllValid)]
80    #[case(Validity::AllInvalid)]
81    #[case(Validity::from_iter([true, false, true]))]
82    fn test_split_temporal(#[case] validity: Validity) {
83        let milliseconds = PrimitiveArray::new(
84            buffer![
85                86_400i64,            // element with only day component
86                86_400i64 + 1000,     // element with day + second components
87                86_400i64 + 1000 + 1, // element with day + second + sub-second components
88            ],
89            validity.clone(),
90        )
91        .into_array();
92        let temporal_array =
93            TemporalArray::new_timestamp(milliseconds, TimeUnit::Ms, Some("UTC".to_string()));
94        let TemporalParts {
95            days,
96            seconds,
97            subseconds,
98        } = split_temporal(temporal_array).unwrap();
99        assert_eq!(days.to_primitive().unwrap().validity(), &validity);
100        assert_eq!(
101            seconds.to_primitive().unwrap().validity(),
102            &Validity::NonNullable
103        );
104        assert_eq!(
105            subseconds.to_primitive().unwrap().validity(),
106            &Validity::NonNullable
107        );
108    }
109}