vortex_array/
compress.rs

1// TODO(ngates): make this a function on a PrimitiveArray
2use vortex_dtype::{DType, PType};
3use vortex_error::{VortexExpect, VortexResult};
4use vortex_scalar::Scalar;
5
6use crate::arrays::{ConstantArray, PrimitiveArray, PrimitiveEncoding, PrimitiveVTable};
7use crate::compute::{cast, min_max};
8use crate::{Array, ArrayRef, IntoArray, ToCanonical};
9
10/// Downscale a primitive array to the narrowest PType that fits all the values.
11pub fn downscale_integer_array(array: ArrayRef) -> VortexResult<ArrayRef> {
12    if !array.is_encoding(PrimitiveEncoding.id()) {
13        // This can happen if e.g. the array is ConstantArray.
14        return Ok(array);
15    }
16    if array.is_empty() {
17        return Ok(array);
18    }
19    let array = array
20        .as_opt::<PrimitiveVTable>()
21        .vortex_expect("Checked earlier");
22
23    let Some(min_max) = min_max(array.as_ref())? else {
24        // This array but be all nulls.
25        return Ok(
26            ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
27        );
28    };
29
30    // If we can't cast to i64, then leave the array as its original type.
31    // It's too big to downcast anyway.
32    let Ok(min) = i64::try_from(min_max.min.value()) else {
33        return Ok(array.to_array());
34    };
35    let Ok(max) = i64::try_from(min_max.max.value()) else {
36        return Ok(array.to_array());
37    };
38
39    downscale_primitive_integer_array(array.clone(), min, max).map(|a| a.into_array())
40}
41
42/// Downscale a primitive array to the narrowest PType that fits all the values.
43fn downscale_primitive_integer_array(
44    array: PrimitiveArray,
45    min: i64,
46    max: i64,
47) -> VortexResult<PrimitiveArray> {
48    if min < 0 || max < 0 {
49        // Signed
50        if min >= i8::MIN as i64 && max <= i8::MAX as i64 {
51            return cast(
52                array.as_ref(),
53                &DType::Primitive(PType::I8, array.dtype().nullability()),
54            )?
55            .to_primitive();
56        }
57
58        if min >= i16::MIN as i64 && max <= i16::MAX as i64 {
59            return cast(
60                array.as_ref(),
61                &DType::Primitive(PType::I16, array.dtype().nullability()),
62            )?
63            .to_primitive();
64        }
65
66        if min >= i32::MIN as i64 && max <= i32::MAX as i64 {
67            return cast(
68                array.as_ref(),
69                &DType::Primitive(PType::I32, array.dtype().nullability()),
70            )?
71            .to_primitive();
72        }
73    } else {
74        // Unsigned
75        if max <= u8::MAX as i64 {
76            return cast(
77                array.as_ref(),
78                &DType::Primitive(PType::U8, array.dtype().nullability()),
79            )?
80            .to_primitive();
81        }
82
83        if max <= u16::MAX as i64 {
84            return cast(
85                array.as_ref(),
86                &DType::Primitive(PType::U16, array.dtype().nullability()),
87            )?
88            .to_primitive();
89        }
90
91        if max <= u32::MAX as i64 {
92            return cast(
93                array.as_ref(),
94                &DType::Primitive(PType::U32, array.dtype().nullability()),
95            )?
96            .to_primitive();
97        }
98    }
99
100    Ok(array)
101}