vortex_array/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4// TODO(ngates): make this a function on a PrimitiveArray
5use vortex_dtype::{DType, PType};
6use vortex_error::{VortexExpect, VortexResult};
7use vortex_scalar::Scalar;
8
9use crate::arrays::{ConstantArray, PrimitiveArray, PrimitiveEncoding, PrimitiveVTable};
10use crate::compute::{cast, min_max};
11use crate::{Array, ArrayRef, IntoArray, ToCanonical};
12
13/// Downscale a primitive array to the narrowest PType that fits all the values.
14pub fn downscale_integer_array(array: ArrayRef) -> VortexResult<ArrayRef> {
15    if !array.is_encoding(PrimitiveEncoding.id()) {
16        // This can happen if e.g. the array is ConstantArray.
17        return Ok(array);
18    }
19    if array.is_empty() {
20        return Ok(array);
21    }
22    let array = array
23        .as_opt::<PrimitiveVTable>()
24        .vortex_expect("Checked earlier");
25
26    let Some(min_max) = min_max(array.as_ref())? else {
27        // This array but be all nulls.
28        return Ok(
29            ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
30        );
31    };
32
33    // If we can't cast to i64, then leave the array as its original type.
34    // It's too big to downcast anyway.
35    let Ok(min) = i64::try_from(&min_max.min.cast(&PType::I64.into())?) else {
36        return Ok(array.to_array());
37    };
38    let Ok(max) = i64::try_from(&min_max.max.cast(&PType::I64.into())?) else {
39        return Ok(array.to_array());
40    };
41
42    downscale_primitive_integer_array(array.clone(), min, max).map(|a| a.into_array())
43}
44
45/// Downscale a primitive array to the narrowest PType that fits all the values.
46fn downscale_primitive_integer_array(
47    array: PrimitiveArray,
48    min: i64,
49    max: i64,
50) -> VortexResult<PrimitiveArray> {
51    if min < 0 || max < 0 {
52        // Signed
53        if min >= i8::MIN as i64 && max <= i8::MAX as i64 {
54            return cast(
55                array.as_ref(),
56                &DType::Primitive(PType::I8, array.dtype().nullability()),
57            )?
58            .to_primitive();
59        }
60
61        if min >= i16::MIN as i64 && max <= i16::MAX as i64 {
62            return cast(
63                array.as_ref(),
64                &DType::Primitive(PType::I16, array.dtype().nullability()),
65            )?
66            .to_primitive();
67        }
68
69        if min >= i32::MIN as i64 && max <= i32::MAX as i64 {
70            return cast(
71                array.as_ref(),
72                &DType::Primitive(PType::I32, array.dtype().nullability()),
73            )?
74            .to_primitive();
75        }
76    } else {
77        // Unsigned
78        if max <= u8::MAX as i64 {
79            return cast(
80                array.as_ref(),
81                &DType::Primitive(PType::U8, array.dtype().nullability()),
82            )?
83            .to_primitive();
84        }
85
86        if max <= u16::MAX as i64 {
87            return cast(
88                array.as_ref(),
89                &DType::Primitive(PType::U16, array.dtype().nullability()),
90            )?
91            .to_primitive();
92        }
93
94        if max <= u32::MAX as i64 {
95            return cast(
96                array.as_ref(),
97                &DType::Primitive(PType::U32, array.dtype().nullability()),
98            )?
99            .to_primitive();
100        }
101    }
102
103    Ok(array)
104}