vortex_btrblocks/float/
dictionary.rs1use vortex_array::IntoArray;
7use vortex_array::arrays::DictArray;
8use vortex_array::arrays::PrimitiveArray;
9use vortex_array::validity::Validity;
10use vortex_array::vtable::ValidityHelper;
11use vortex_buffer::Buffer;
12use vortex_dtype::half::f16;
13
14use crate::float::stats::ErasedDistinctValues;
15use crate::float::stats::FloatStats;
16
17macro_rules! typed_encode {
18 ($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
19 let values: Buffer<$typ> = $typed.values.iter().map(|x| x.0).collect();
20
21 let max_code = values.len();
22 let codes = if max_code <= u8::MAX as usize {
23 let buf =
24 <DictEncoder as Encode<$typ, u8>>::encode(&values, $stats.src.as_slice::<$typ>());
25 PrimitiveArray::new(buf, $validity.clone()).into_array()
26 } else if max_code <= u16::MAX as usize {
27 let buf =
28 <DictEncoder as Encode<$typ, u16>>::encode(&values, $stats.src.as_slice::<$typ>());
29 PrimitiveArray::new(buf, $validity.clone()).into_array()
30 } else {
31 let buf =
32 <DictEncoder as Encode<$typ, u32>>::encode(&values, $stats.src.as_slice::<$typ>());
33 PrimitiveArray::new(buf, $validity.clone()).into_array()
34 };
35
36 let values_validity = match $validity {
37 Validity::NonNullable => Validity::NonNullable,
38 _ => Validity::AllValid,
39 };
40 let values = PrimitiveArray::new(values, values_validity).into_array();
41
42 unsafe { DictArray::new_unchecked(codes, values).set_all_values_referenced(true) }
44 }};
45}
46
47pub fn dictionary_encode(stats: &FloatStats) -> DictArray {
49 let validity = stats.src.validity();
50 match &stats.distinct_values {
51 ErasedDistinctValues::F16(typed) => typed_encode!(stats, typed, validity, f16),
52 ErasedDistinctValues::F32(typed) => typed_encode!(stats, typed, validity, f32),
53 ErasedDistinctValues::F64(typed) => typed_encode!(stats, typed, validity, f64),
54 }
55}
56
57struct DictEncoder;
58
59trait Encode<T, I> {
60 fn encode(distinct: &[T], values: &[T]) -> Buffer<I>;
62}
63
64macro_rules! impl_encode {
65 ($typ:ty, $utyp:ty) => { impl_encode!($typ, $utyp, u8, u16, u32); };
66 ($typ:ty, $utyp:ty, $($ityp:ty),+) => {
67 $(
68 impl Encode<$typ, $ityp> for DictEncoder {
69 #[allow(clippy::cast_possible_truncation)]
70 fn encode(distinct: &[$typ], values: &[$typ]) -> Buffer<$ityp> {
71 let mut codes =
72 vortex_utils::aliases::hash_map::HashMap::<$utyp, $ityp>::with_capacity(
73 distinct.len(),
74 );
75 for (code, &value) in distinct.iter().enumerate() {
76 codes.insert(value.to_bits(), code as $ityp);
77 }
78
79 let mut output = vortex_buffer::BufferMut::with_capacity(values.len());
80 for value in values {
81 output.push(codes.get(&value.to_bits()).copied().unwrap_or_default());
84 }
85
86 return output.freeze();
87 }
88 }
89 )*
90 };
91}
92
93impl_encode!(f16, u16);
94impl_encode!(f32, u32);
95impl_encode!(f64, u64);
96
97#[cfg(test)]
98mod tests {
99 use vortex_array::Array;
100 use vortex_array::IntoArray;
101 use vortex_array::arrays::BoolArray;
102 use vortex_array::arrays::PrimitiveArray;
103 use vortex_array::assert_arrays_eq;
104 use vortex_array::validity::Validity;
105 use vortex_buffer::buffer;
106
107 use crate::CompressorStats;
108 use crate::float::dictionary::dictionary_encode;
109 use crate::float::stats::FloatStats;
110
111 #[test]
112 fn test_float_dict_encode() {
113 let values = buffer![1f32, 2f32, 2f32, 0f32, 1f32];
115 let validity =
116 Validity::Array(BoolArray::from_iter([true, true, true, false, true]).into_array());
117 let array = PrimitiveArray::new(values, validity);
118
119 let stats = FloatStats::generate(&array);
120 let dict_array = dictionary_encode(&stats);
121 assert_eq!(dict_array.values().len(), 2);
122 assert_eq!(dict_array.codes().len(), 5);
123
124 let undict = dict_array;
125
126 let expected = PrimitiveArray::new(
129 buffer![1f32, 2f32, 2f32, 1f32, 1f32],
130 Validity::Array(BoolArray::from_iter([true, true, true, false, true]).into_array()),
131 )
132 .into_array();
133 assert_arrays_eq!(undict.as_ref(), expected.as_ref());
134 }
135}