vortex_dict/compute/
fill_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::arrays::ConstantArray;
5use vortex_array::compute::{FillNullKernel, FillNullKernelAdapter, Operator, compare, fill_null};
6use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel};
7use vortex_error::VortexResult;
8use vortex_scalar::{Scalar, ScalarValue};
9
10use crate::{DictArray, DictVTable};
11
12impl FillNullKernel for DictVTable {
13    fn fill_null(&self, array: &DictArray, fill_value: &Scalar) -> VortexResult<ArrayRef> {
14        // If the fill value exists in the dictionary, we can simply rewrite the null codes to
15        // point to the value.
16        let found_fill_values = compare(
17            array.values(),
18            ConstantArray::new(fill_value.clone(), array.values().len()).as_ref(),
19            Operator::Eq,
20        )?
21        .to_bool()?;
22
23        let Some(first_fill_value) = found_fill_values.boolean_buffer().set_indices().next() else {
24            // No fill values found, so we must canonicalize and fill_null.
25            // TODO(ngates): compute kernels should all return Option<ArrayRef> to support this
26            //  fall back.
27            return fill_null(&array.to_canonical()?.into_array(), fill_value);
28        };
29
30        // Now we rewrite the nullable codes to point at the fill value.
31        let codes = fill_null(
32            array.codes(),
33            &Scalar::new(
34                array
35                    .codes()
36                    .dtype()
37                    .with_nullability(fill_value.dtype().nullability()),
38                ScalarValue::from(first_fill_value),
39            ),
40        )?;
41        // And fill nulls in the values
42        let values = fill_null(array.values(), fill_value)?;
43
44        // SAFETY: invariants are still satisfied after patching nulls
45        unsafe { Ok(DictArray::new_unchecked(codes, values).into_array()) }
46    }
47}
48
49register_kernel!(FillNullKernelAdapter(DictVTable).lift());
50
51#[cfg(test)]
52mod tests {
53    use arrow_buffer::BooleanBuffer;
54    use vortex_array::arrays::PrimitiveArray;
55    use vortex_array::compute::fill_null;
56    use vortex_array::validity::Validity;
57    use vortex_array::{IntoArray, ToCanonical};
58    use vortex_buffer::buffer;
59    use vortex_dtype::Nullability;
60    use vortex_error::VortexUnwrap;
61    use vortex_scalar::Scalar;
62
63    use crate::DictArray;
64
65    #[test]
66    fn nullable_codes_fill_in_values() {
67        let dict = DictArray::try_new(
68            PrimitiveArray::new(
69                buffer![0u32, 1, 2],
70                Validity::from(BooleanBuffer::from(vec![true, false, true])),
71            )
72            .into_array(),
73            PrimitiveArray::new(buffer![10, 20, 20], Validity::AllValid).into_array(),
74        )
75        .vortex_unwrap();
76
77        let filled = fill_null(
78            dict.as_ref(),
79            &Scalar::primitive(20, Nullability::NonNullable),
80        )
81        .vortex_unwrap();
82        let filled_primitive = filled.to_primitive().vortex_unwrap();
83        assert_eq!(filled_primitive.as_slice::<i32>(), [10, 20, 20]);
84        assert!(filled_primitive.all_valid().vortex_unwrap());
85    }
86}