vortex_btrblocks/float/
dictionary.rs1use vortex_array::IntoArray;
7use vortex_array::arrays::{DictArray, PrimitiveArray};
8use vortex_array::validity::Validity;
9use vortex_array::vtable::ValidityHelper;
10use vortex_buffer::Buffer;
11use vortex_dtype::half::f16;
12
13use crate::float::stats::{ErasedDistinctValues, FloatStats};
14
15macro_rules! typed_encode {
16 ($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
17 let values: Buffer<$typ> = $typed.values.iter().map(|x| x.0).collect();
18
19 let max_code = values.len();
20 let codes = if max_code <= u8::MAX as usize {
21 let buf =
22 <DictEncoder as Encode<$typ, u8>>::encode(&values, $stats.src.as_slice::<$typ>());
23 PrimitiveArray::new(buf, $validity.clone()).into_array()
24 } else if max_code <= u16::MAX as usize {
25 let buf =
26 <DictEncoder as Encode<$typ, u16>>::encode(&values, $stats.src.as_slice::<$typ>());
27 PrimitiveArray::new(buf, $validity.clone()).into_array()
28 } else {
29 let buf =
30 <DictEncoder as Encode<$typ, u32>>::encode(&values, $stats.src.as_slice::<$typ>());
31 PrimitiveArray::new(buf, $validity.clone()).into_array()
32 };
33
34 let values_validity = match $validity {
35 Validity::NonNullable => Validity::NonNullable,
36 _ => Validity::AllValid,
37 };
38 let values = PrimitiveArray::new(values, values_validity).into_array();
39
40 unsafe { DictArray::new_unchecked(codes, values) }
42 }};
43}
44
45pub fn dictionary_encode(stats: &FloatStats) -> DictArray {
47 let validity = stats.src.validity();
48 match &stats.distinct_values {
49 ErasedDistinctValues::F16(typed) => typed_encode!(stats, typed, validity, f16),
50 ErasedDistinctValues::F32(typed) => typed_encode!(stats, typed, validity, f32),
51 ErasedDistinctValues::F64(typed) => typed_encode!(stats, typed, validity, f64),
52 }
53}
54
55struct DictEncoder;
56
57trait Encode<T, I> {
58 fn encode(distinct: &[T], values: &[T]) -> Buffer<I>;
60}
61
62macro_rules! impl_encode {
63 ($typ:ty, $utyp:ty) => { impl_encode!($typ, $utyp, u8, u16, u32); };
64 ($typ:ty, $utyp:ty, $($ityp:ty),+) => {
65 $(
66 impl Encode<$typ, $ityp> for DictEncoder {
67 #[allow(clippy::cast_possible_truncation)]
68 fn encode(distinct: &[$typ], values: &[$typ]) -> Buffer<$ityp> {
69 let mut codes =
70 vortex_utils::aliases::hash_map::HashMap::<$utyp, $ityp>::with_capacity(
71 distinct.len(),
72 );
73 for (code, &value) in distinct.iter().enumerate() {
74 codes.insert(value.to_bits(), code as $ityp);
75 }
76
77 let mut output = vortex_buffer::BufferMut::with_capacity(values.len());
78 for value in values {
79 output.push(codes.get(&value.to_bits()).copied().unwrap_or_default());
82 }
83
84 return output.freeze();
85 }
86 }
87 )*
88 };
89}
90
91impl_encode!(f16, u16);
92impl_encode!(f32, u32);
93impl_encode!(f64, u64);
94
95#[cfg(test)]
96mod tests {
97 use vortex_array::arrays::{BoolArray, PrimitiveArray};
98 use vortex_array::validity::Validity;
99 use vortex_array::{Array, IntoArray, assert_arrays_eq};
100 use vortex_buffer::buffer;
101
102 use crate::CompressorStats;
103 use crate::float::dictionary::dictionary_encode;
104 use crate::float::stats::FloatStats;
105
106 #[test]
107 fn test_float_dict_encode() {
108 let values = buffer![1f32, 2f32, 2f32, 0f32, 1f32];
110 let validity =
111 Validity::Array(BoolArray::from_iter([true, true, true, false, true]).into_array());
112 let array = PrimitiveArray::new(values, validity);
113
114 let stats = FloatStats::generate(&array);
115 let dict_array = dictionary_encode(&stats);
116 assert_eq!(dict_array.values().len(), 2);
117 assert_eq!(dict_array.codes().len(), 5);
118
119 let undict = dict_array;
120
121 let expected = PrimitiveArray::new(
124 buffer![1f32, 2f32, 2f32, 1f32, 1f32],
125 Validity::Array(BoolArray::from_iter([true, true, true, false, true]).into_array()),
126 )
127 .into_array();
128 assert_arrays_eq!(undict.as_ref(), expected.as_ref());
129 }
130}