vortex_datetime_parts/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::arrays::{PrimitiveArray, TemporalArray};
5use vortex_array::compute::cast;
6use vortex_array::vtable::ValidityHelper;
7use vortex_array::{ArrayRef, IntoArray, ToCanonical};
8use vortex_buffer::BufferMut;
9use vortex_dtype::{DType, PType};
10use vortex_error::{VortexError, VortexResult};
11
12use crate::{DateTimePartsArray, timestamp};
13
14pub struct TemporalParts {
15    pub days: ArrayRef,
16    pub seconds: ArrayRef,
17    pub subseconds: ArrayRef,
18}
19
20/// Compress a `TemporalArray` into day, second, and subseconds components.
21///
22/// Splitting the components by granularity creates more small values, which enables better
23/// cascading compression.
24pub fn split_temporal(array: TemporalArray) -> VortexResult<TemporalParts> {
25    let temporal_values = array.temporal_values().to_primitive()?;
26
27    // After this operation, timestamps will be a PrimitiveArray<i64>
28    let timestamps = cast(
29        temporal_values.as_ref(),
30        &DType::Primitive(PType::I64, temporal_values.dtype().nullability()),
31    )?
32    .to_primitive()?;
33
34    let length = timestamps.len();
35    let mut days = BufferMut::with_capacity(length);
36    let mut seconds = BufferMut::with_capacity(length);
37    let mut subseconds = BufferMut::with_capacity(length);
38
39    for &ts in timestamps.as_slice::<i64>() {
40        let ts_parts = timestamp::split(ts, array.temporal_metadata().time_unit())?;
41        days.push(ts_parts.days);
42        seconds.push(ts_parts.seconds);
43        subseconds.push(ts_parts.subseconds);
44    }
45
46    Ok(TemporalParts {
47        days: PrimitiveArray::new(days, temporal_values.validity().clone()).into_array(),
48        seconds: seconds.into_array(),
49        subseconds: subseconds.into_array(),
50    })
51}
52
53impl TryFrom<TemporalArray> for DateTimePartsArray {
54    type Error = VortexError;
55
56    fn try_from(array: TemporalArray) -> Result<Self, Self::Error> {
57        let ext_dtype = array.ext_dtype();
58        let TemporalParts {
59            days,
60            seconds,
61            subseconds,
62        } = split_temporal(array)?;
63        DateTimePartsArray::try_new(DType::Extension(ext_dtype), days, seconds, subseconds)
64    }
65}
66
67#[cfg(test)]
68mod tests {
69    use rstest::rstest;
70    use vortex_array::arrays::{PrimitiveArray, TemporalArray};
71    use vortex_array::validity::Validity;
72    use vortex_array::vtable::ValidityHelper;
73    use vortex_array::{IntoArray, ToCanonical};
74    use vortex_buffer::buffer;
75    use vortex_dtype::datetime::TimeUnit;
76
77    use crate::{TemporalParts, split_temporal};
78
79    #[rstest]
80    #[case(Validity::NonNullable)]
81    #[case(Validity::AllValid)]
82    #[case(Validity::AllInvalid)]
83    #[case(Validity::from_iter([true, false, true]))]
84    fn test_split_temporal(#[case] validity: Validity) {
85        let milliseconds = PrimitiveArray::new(
86            buffer![
87                86_400i64,            // element with only day component
88                86_400i64 + 1000,     // element with day + second components
89                86_400i64 + 1000 + 1, // element with day + second + sub-second components
90            ],
91            validity.clone(),
92        )
93        .into_array();
94        let temporal_array =
95            TemporalArray::new_timestamp(milliseconds, TimeUnit::Ms, Some("UTC".to_string()));
96        let TemporalParts {
97            days,
98            seconds,
99            subseconds,
100        } = split_temporal(temporal_array).unwrap();
101        assert_eq!(days.to_primitive().unwrap().validity(), &validity);
102        assert_eq!(
103            seconds.to_primitive().unwrap().validity(),
104            &Validity::NonNullable
105        );
106        assert_eq!(
107            subseconds.to_primitive().unwrap().validity(),
108            &Validity::NonNullable
109        );
110    }
111}