vortex_btrblocks/float/
dictionary.rs1use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::validity::Validity;
9use vortex_array::vtable::ValidityHelper;
10use vortex_buffer::Buffer;
11use vortex_dict::DictArray;
12use vortex_dtype::half::f16;
13
14use crate::float::stats::{ErasedDistinctValues, FloatStats};
15
16macro_rules! typed_encode {
17 ($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
18 let values: Buffer<$typ> = $typed.values.iter().map(|x| x.0).collect();
19
20 let max_code = values.len();
21 let codes = if max_code <= u8::MAX as usize {
22 let buf =
23 <DictEncoder as Encode<$typ, u8>>::encode(&values, $stats.src.as_slice::<$typ>());
24 PrimitiveArray::new(buf, $validity.clone()).into_array()
25 } else if max_code <= u16::MAX as usize {
26 let buf =
27 <DictEncoder as Encode<$typ, u16>>::encode(&values, $stats.src.as_slice::<$typ>());
28 PrimitiveArray::new(buf, $validity.clone()).into_array()
29 } else {
30 let buf =
31 <DictEncoder as Encode<$typ, u32>>::encode(&values, $stats.src.as_slice::<$typ>());
32 PrimitiveArray::new(buf, $validity.clone()).into_array()
33 };
34
35 let values_validity = match $validity {
36 Validity::NonNullable => Validity::NonNullable,
37 _ => Validity::AllValid,
38 };
39 let values = PrimitiveArray::new(values, values_validity).into_array();
40
41 unsafe { DictArray::new_unchecked(codes, values) }
43 }};
44}
45
46pub fn dictionary_encode(stats: &FloatStats) -> DictArray {
48 let validity = stats.src.validity();
49 match &stats.distinct_values {
50 ErasedDistinctValues::F16(typed) => typed_encode!(stats, typed, validity, f16),
51 ErasedDistinctValues::F32(typed) => typed_encode!(stats, typed, validity, f32),
52 ErasedDistinctValues::F64(typed) => typed_encode!(stats, typed, validity, f64),
53 }
54}
55
56struct DictEncoder;
57
58trait Encode<T, I> {
59 fn encode(distinct: &[T], values: &[T]) -> Buffer<I>;
61}
62
63macro_rules! impl_encode {
64 ($typ:ty, $utyp:ty) => { impl_encode!($typ, $utyp, u8, u16, u32); };
65 ($typ:ty, $utyp:ty, $($ityp:ty),+) => {
66 $(
67 impl Encode<$typ, $ityp> for DictEncoder {
68 #[allow(clippy::cast_possible_truncation)]
69 fn encode(distinct: &[$typ], values: &[$typ]) -> Buffer<$ityp> {
70 let mut codes =
71 vortex_utils::aliases::hash_map::HashMap::<$utyp, $ityp>::with_capacity(
72 distinct.len(),
73 );
74 for (code, &value) in distinct.iter().enumerate() {
75 codes.insert(value.to_bits(), code as $ityp);
76 }
77
78 let mut output = vortex_buffer::BufferMut::with_capacity(values.len());
79 for value in values {
80 output.push(codes.get(&value.to_bits()).copied().unwrap_or_default());
83 }
84
85 return output.freeze();
86 }
87 }
88 )*
89 };
90}
91
92impl_encode!(f16, u16);
93impl_encode!(f32, u32);
94impl_encode!(f64, u64);
95
96#[cfg(test)]
97mod tests {
98 use vortex_array::arrays::{BoolArray, PrimitiveArray};
99 use vortex_array::validity::Validity;
100 use vortex_array::{Array, IntoArray, ToCanonical};
101 use vortex_buffer::buffer;
102
103 use crate::CompressorStats;
104 use crate::float::dictionary::dictionary_encode;
105 use crate::float::stats::FloatStats;
106
107 #[test]
108 fn test_float_dict_encode() {
109 let values = buffer![1f32, 2f32, 2f32, 0f32, 1f32];
111 let validity =
112 Validity::Array(BoolArray::from_iter([true, true, true, false, true]).into_array());
113 let array = PrimitiveArray::new(values, validity);
114
115 let stats = FloatStats::generate(&array);
116 let dict_array = dictionary_encode(&stats);
117 assert_eq!(dict_array.values().len(), 2);
118 assert_eq!(dict_array.codes().len(), 5);
119
120 let undict = dict_array.to_primitive();
121
122 assert_eq!(undict.as_slice::<f32>(), &[1f32, 2f32, 2f32, 1f32, 1f32]);
125 }
126}