vortex_dict/compute/
mod.rs

1mod binary_numeric;
2mod compare;
3mod fill_null;
4mod is_constant;
5mod is_sorted;
6mod like;
7mod min_max;
8
9use vortex_array::compute::{
10    FilterKernel, FilterKernelAdapter, TakeKernel, TakeKernelAdapter, filter, take,
11};
12use vortex_array::{Array, ArrayRef, IntoArray, register_kernel};
13use vortex_error::VortexResult;
14use vortex_mask::Mask;
15
16use crate::{DictArray, DictVTable};
17
18impl TakeKernel for DictVTable {
19    fn take(&self, array: &DictArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
20        let codes = take(array.codes(), indices)?;
21        DictArray::try_new(codes, array.values().clone()).map(|a| a.into_array())
22    }
23}
24
25register_kernel!(TakeKernelAdapter(DictVTable).lift());
26
27impl FilterKernel for DictVTable {
28    fn filter(&self, array: &DictArray, mask: &Mask) -> VortexResult<ArrayRef> {
29        let codes = filter(array.codes(), mask)?;
30        DictArray::try_new(codes, array.values().clone()).map(|a| a.into_array())
31    }
32}
33
34register_kernel!(FilterKernelAdapter(DictVTable).lift());
35
36#[cfg(test)]
37mod test {
38    use vortex_array::accessor::ArrayAccessor;
39    use vortex_array::arrays::{ConstantArray, PrimitiveArray, VarBinArray, VarBinViewArray};
40    use vortex_array::compute::conformance::mask::test_mask;
41    use vortex_array::compute::{Operator, compare};
42    use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
43    use vortex_dtype::{DType, Nullability};
44    use vortex_scalar::Scalar;
45
46    use crate::builders::dict_encode;
47
48    #[test]
49    fn canonicalise_nullable_primitive() {
50        let values: Vec<Option<i32>> = (0..65)
51            .map(|i| match i % 3 {
52                0 => Some(42),
53                1 => Some(-9),
54                2 => None,
55                _ => unreachable!(),
56            })
57            .collect();
58
59        let dict = dict_encode(PrimitiveArray::from_option_iter(values.clone()).as_ref()).unwrap();
60        let actual = dict.to_primitive().unwrap();
61
62        let expected: Vec<i32> = (0..65)
63            .map(|i| match i % 3 {
64                // Compressor puts 0 as a code for invalid values which we end up using in take
65                // thus invalid values on decompression turn into whatever is at 0th position in dictionary
66                0 | 2 => 42,
67                1 => -9,
68                _ => unreachable!(),
69            })
70            .collect();
71
72        assert_eq!(actual.as_slice::<i32>(), expected.as_slice());
73
74        let expected_valid_count = values.iter().filter(|x| x.is_some()).count();
75        assert_eq!(
76            actual.validity_mask().unwrap().true_count(),
77            expected_valid_count
78        );
79    }
80
81    #[test]
82    fn canonicalise_non_nullable_primitive_32_unique_values() {
83        let unique_values: Vec<i32> = (0..32).collect();
84        let expected: Vec<i32> = (0..1000).map(|i| unique_values[i % 32]).collect();
85
86        let dict =
87            dict_encode(PrimitiveArray::from_iter(expected.iter().copied()).as_ref()).unwrap();
88        let actual = dict.to_primitive().unwrap();
89
90        assert_eq!(actual.as_slice::<i32>(), expected.as_slice());
91    }
92
93    #[test]
94    fn canonicalise_non_nullable_primitive_100_unique_values() {
95        let unique_values: Vec<i32> = (0..100).collect();
96        let expected: Vec<i32> = (0..1000).map(|i| unique_values[i % 100]).collect();
97
98        let dict =
99            dict_encode(PrimitiveArray::from_iter(expected.iter().copied()).as_ref()).unwrap();
100        let actual = dict.to_primitive().unwrap();
101
102        assert_eq!(actual.as_slice::<i32>(), expected.as_slice());
103    }
104
105    #[test]
106    fn canonicalise_nullable_varbin() {
107        let reference = VarBinViewArray::from_iter(
108            vec![Some("a"), Some("b"), None, Some("a"), None, Some("b")],
109            DType::Utf8(Nullability::Nullable),
110        );
111        assert_eq!(reference.len(), 6);
112        let dict = dict_encode(reference.as_ref()).unwrap();
113        let flattened_dict = dict.to_varbinview().unwrap();
114        assert_eq!(
115            flattened_dict
116                .with_iterator(|iter| iter
117                    .map(|slice| slice.map(|s| s.to_vec()))
118                    .collect::<Vec<_>>())
119                .unwrap(),
120            reference
121                .with_iterator(|iter| iter
122                    .map(|slice| slice.map(|s| s.to_vec()))
123                    .collect::<Vec<_>>())
124                .unwrap(),
125        );
126    }
127
128    fn sliced_dict_array() -> ArrayRef {
129        let reference = PrimitiveArray::from_option_iter([
130            Some(42),
131            Some(-9),
132            None,
133            Some(42),
134            Some(1),
135            Some(5),
136        ]);
137        let dict = dict_encode(reference.as_ref()).unwrap();
138        dict.slice(1, 4).unwrap()
139    }
140
141    #[test]
142    fn compare_sliced_dict() {
143        let sliced = sliced_dict_array();
144        let compared = compare(&sliced, ConstantArray::new(42, 3).as_ref(), Operator::Eq).unwrap();
145
146        assert_eq!(
147            compared.scalar_at(0).unwrap(),
148            Scalar::bool(false, Nullability::Nullable)
149        );
150        assert_eq!(
151            compared.scalar_at(1).unwrap(),
152            Scalar::null(DType::Bool(Nullability::Nullable))
153        );
154        assert_eq!(
155            compared.scalar_at(2).unwrap(),
156            Scalar::bool(true, Nullability::Nullable)
157        );
158    }
159
160    #[test]
161    fn test_mask_dict_array() {
162        let array = dict_encode(&PrimitiveArray::from_iter([2, 0, 2, 0, 10]).into_array()).unwrap();
163        test_mask(array.as_ref());
164
165        let array = dict_encode(
166            PrimitiveArray::from_option_iter([Some(2), None, Some(2), Some(0), Some(10)]).as_ref(),
167        )
168        .unwrap();
169        test_mask(array.as_ref());
170
171        let array = dict_encode(
172            &VarBinArray::from_iter(
173                [
174                    Some("hello"),
175                    None,
176                    Some("hello"),
177                    Some("good"),
178                    Some("good"),
179                ],
180                DType::Utf8(Nullability::Nullable),
181            )
182            .into_array(),
183        )
184        .unwrap();
185        test_mask(array.as_ref());
186    }
187}