vortex_array/arrays/dict/compute/
fill_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexResult;
5use vortex_scalar::Scalar;
6use vortex_scalar::ScalarValue;
7
8use super::DictArray;
9use super::DictVTable;
10use crate::Array;
11use crate::ArrayRef;
12use crate::IntoArray;
13use crate::ToCanonical;
14use crate::arrays::ConstantArray;
15use crate::compute::FillNullKernel;
16use crate::compute::FillNullKernelAdapter;
17use crate::compute::Operator;
18use crate::compute::compare;
19use crate::compute::fill_null;
20use crate::register_kernel;
21
22impl FillNullKernel for DictVTable {
23    fn fill_null(&self, array: &DictArray, fill_value: &Scalar) -> VortexResult<ArrayRef> {
24        // If the fill value exists in the dictionary, we can simply rewrite the null codes to
25        // point to the value.
26        let found_fill_values = compare(
27            array.values(),
28            ConstantArray::new(fill_value.clone(), array.values().len()).as_ref(),
29            Operator::Eq,
30        )?
31        .to_bool();
32
33        let Some(first_fill_value) = found_fill_values.bit_buffer().set_indices().next() else {
34            // No fill values found, so we must canonicalize and fill_null.
35            // TODO(ngates): compute kernels should all return Option<ArrayRef> to support this
36            //  fall back.
37            return fill_null(&array.to_canonical().into_array(), fill_value);
38        };
39
40        // Now we rewrite the nullable codes to point at the fill value.
41        let codes = fill_null(
42            array.codes(),
43            &Scalar::new(
44                array
45                    .codes()
46                    .dtype()
47                    .with_nullability(fill_value.dtype().nullability()),
48                ScalarValue::from(first_fill_value),
49            ),
50        )?;
51        // And fill nulls in the values
52        let values = fill_null(array.values(), fill_value)?;
53
54        // SAFETY: invariants are still satisfied after patching nulls
55        unsafe {
56            Ok(DictArray::new_unchecked(codes, values)
57                .set_all_values_referenced(array.has_all_values_referenced())
58                .into_array())
59        }
60    }
61}
62
63register_kernel!(FillNullKernelAdapter(DictVTable).lift());
64
65#[cfg(test)]
66mod tests {
67    use vortex_buffer::BitBuffer;
68    use vortex_buffer::buffer;
69    use vortex_dtype::Nullability;
70    use vortex_error::VortexUnwrap;
71    use vortex_scalar::Scalar;
72
73    use crate::IntoArray;
74    use crate::ToCanonical;
75    use crate::arrays::PrimitiveArray;
76    use crate::arrays::dict::DictArray;
77    use crate::assert_arrays_eq;
78    use crate::compute::fill_null;
79    use crate::validity::Validity;
80
81    #[test]
82    fn nullable_codes_fill_in_values() {
83        let dict = DictArray::try_new(
84            PrimitiveArray::new(
85                buffer![0u32, 1, 2],
86                Validity::from(BitBuffer::from(vec![true, false, true])),
87            )
88            .into_array(),
89            PrimitiveArray::new(buffer![10, 20, 20], Validity::AllValid).into_array(),
90        )
91        .vortex_unwrap();
92
93        let filled = fill_null(
94            dict.as_ref(),
95            &Scalar::primitive(20, Nullability::NonNullable),
96        )
97        .vortex_unwrap();
98        let filled_primitive = filled.to_primitive();
99        assert_arrays_eq!(filled_primitive, PrimitiveArray::from_iter([10, 20, 20]));
100        assert!(filled_primitive.all_valid());
101    }
102}