Skip to main content

vortex_datetime_parts/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::ArrayRef;
5use vortex_array::IntoArray;
6use vortex_array::ToCanonical;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::arrays::TemporalArray;
9use vortex_array::builtins::ArrayBuiltins;
10use vortex_array::dtype::DType;
11use vortex_array::dtype::PType;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexError;
14use vortex_error::VortexResult;
15
16use crate::DateTimePartsData;
17use crate::timestamp;
18pub struct TemporalParts {
19    pub days: ArrayRef,
20    pub seconds: ArrayRef,
21    pub subseconds: ArrayRef,
22}
23
24/// Compress a `TemporalArray` into day, second, and subseconds components.
25///
26/// Splitting the components by granularity creates more small values, which enables better
27/// cascading compression.
28pub fn split_temporal(array: TemporalArray) -> VortexResult<TemporalParts> {
29    let temporal_values = array.temporal_values().to_primitive();
30
31    // After this operation, timestamps will be a PrimitiveArray<i64>
32    let timestamps = temporal_values
33        .clone()
34        .into_array()
35        .cast(DType::Primitive(
36            PType::I64,
37            temporal_values.dtype().nullability(),
38        ))?
39        .to_primitive();
40
41    let length = timestamps.len();
42    let mut days = BufferMut::with_capacity(length);
43    let mut seconds = BufferMut::with_capacity(length);
44    let mut subseconds = BufferMut::with_capacity(length);
45
46    for &ts in timestamps.as_slice::<i64>() {
47        let ts_parts = timestamp::split(ts, array.temporal_metadata().time_unit())?;
48        days.push(ts_parts.days);
49        seconds.push(ts_parts.seconds);
50        subseconds.push(ts_parts.subseconds);
51    }
52
53    Ok(TemporalParts {
54        days: PrimitiveArray::new(days, temporal_values.validity()?).into_array(),
55        seconds: seconds.into_array(),
56        subseconds: subseconds.into_array(),
57    })
58}
59
60impl TryFrom<TemporalArray> for DateTimePartsData {
61    type Error = VortexError;
62
63    fn try_from(array: TemporalArray) -> Result<Self, Self::Error> {
64        let ext_dtype = array.ext_dtype();
65        let TemporalParts {
66            days,
67            seconds,
68            subseconds,
69        } = split_temporal(array)?;
70        DateTimePartsData::validate(
71            &DType::Extension(ext_dtype),
72            &days,
73            &seconds,
74            &subseconds,
75            days.len(),
76        )?;
77        Ok(DateTimePartsData {})
78    }
79}
80
81#[cfg(test)]
82mod tests {
83    use rstest::rstest;
84    use vortex_array::IntoArray;
85    use vortex_array::LEGACY_SESSION;
86    use vortex_array::ToCanonical;
87    use vortex_array::VortexSessionExecute;
88    use vortex_array::arrays::PrimitiveArray;
89    use vortex_array::arrays::TemporalArray;
90    use vortex_array::extension::datetime::TimeUnit;
91    use vortex_array::validity::Validity;
92    use vortex_buffer::buffer;
93    use vortex_error::VortexExpect;
94
95    use crate::TemporalParts;
96    use crate::split_temporal;
97
98    #[rstest]
99    #[case(Validity::NonNullable)]
100    #[case(Validity::AllValid)]
101    #[case(Validity::AllInvalid)]
102    #[case(Validity::from_iter([true, false, true]))]
103    fn test_split_temporal(#[case] validity: Validity) {
104        let milliseconds = PrimitiveArray::new(
105            buffer![
106                86_400i64,            // element with only day component
107                86_400i64 + 1000,     // element with day + second components
108                86_400i64 + 1000 + 1, // element with day + second + sub-second components
109            ],
110            validity.clone(),
111        )
112        .into_array();
113        let temporal_array =
114            TemporalArray::new_timestamp(milliseconds, TimeUnit::Milliseconds, Some("UTC".into()));
115        let TemporalParts {
116            days,
117            seconds,
118            subseconds,
119        } = split_temporal(temporal_array).unwrap();
120
121        let mut ctx = LEGACY_SESSION.create_execution_ctx();
122        assert!(
123            days.to_primitive()
124                .validity()
125                .vortex_expect("days validity should be derivable")
126                .mask_eq(&validity, &mut ctx)
127                .unwrap()
128        );
129        assert!(matches!(
130            seconds
131                .to_primitive()
132                .validity()
133                .vortex_expect("seconds validity should be derivable"),
134            Validity::NonNullable
135        ));
136        assert!(matches!(
137            subseconds
138                .to_primitive()
139                .validity()
140                .vortex_expect("subseconds validity should be derivable"),
141            Validity::NonNullable
142        ));
143    }
144}