Skip to main content

vortex_array/arrays/dict/compute/
fill_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexResult;
5
6use super::Dict;
7use super::DictArray;
8use crate::ArrayRef;
9use crate::Canonical;
10use crate::ExecutionCtx;
11use crate::IntoArray;
12use crate::array::ArrayView;
13use crate::arrays::BoolArray;
14use crate::arrays::ConstantArray;
15use crate::arrays::bool::BoolArrayExt;
16use crate::arrays::dict::DictArrayExt;
17use crate::arrays::dict::DictArraySlotsExt;
18use crate::builtins::ArrayBuiltins;
19use crate::match_each_integer_ptype;
20use crate::scalar::Scalar;
21use crate::scalar::ScalarValue;
22use crate::scalar_fn::fns::fill_null::FillNullKernel;
23use crate::scalar_fn::fns::operators::Operator;
24
25impl FillNullKernel for Dict {
26    fn fill_null(
27        array: ArrayView<'_, Dict>,
28        fill_value: &Scalar,
29        ctx: &mut ExecutionCtx,
30    ) -> VortexResult<Option<ArrayRef>> {
31        // If the fill value already exists in the dictionary, we can simply rewrite the null codes
32        // to point to the value.
33        let found_fill_values = array
34            .values()
35            .clone()
36            .binary(
37                ConstantArray::new(fill_value.clone(), array.values().len()).into_array(),
38                Operator::Eq,
39            )?
40            .execute::<BoolArray>(ctx)?;
41
42        // We found the fill value already in the values at this given index.
43        let Some(existing_fill_value_index) =
44            found_fill_values.to_bit_buffer().set_indices().next()
45        else {
46            // No fill values found, so we must canonicalize and fill_null.
47            return Ok(Some(
48                array
49                    .array()
50                    .clone()
51                    .execute::<Canonical>(ctx)?
52                    .into_array()
53                    .fill_null(fill_value.clone())?,
54            ));
55        };
56
57        // Now we rewrite the nullable codes to point at the fill value.
58        let codes = array.codes();
59
60        // Cast the index to the correct unsigned integer type matching the codes' ptype.
61        let codes_ptype = codes.dtype().as_ptype();
62
63        #[expect(
64            clippy::cast_possible_truncation,
65            reason = "The existing index must be representable by the existing ptype"
66        )]
67        let fill_scalar_value = match_each_integer_ptype!(codes_ptype, |P| {
68            ScalarValue::from(existing_fill_value_index as P)
69        });
70
71        // Fill nulls in both the codes and the values. Note that the precondition of this function
72        // states that the fill value is non-null, so we do not have to worry about the nullability.
73        let codes = codes.clone().fill_null(Scalar::try_new(
74            codes.dtype().as_nonnullable(),
75            Some(fill_scalar_value),
76        )?)?;
77        let values = array.values().clone().fill_null(fill_value.clone())?;
78
79        // SAFETY: invariants are still satisfied after patching nulls.
80        unsafe {
81            Ok(Some(
82                DictArray::new_unchecked(codes, values)
83                    .set_all_values_referenced(array.has_all_values_referenced())
84                    .into_array(),
85            ))
86        }
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use vortex_buffer::BitBuffer;
93    use vortex_buffer::buffer;
94    use vortex_error::VortexExpect;
95
96    use crate::IntoArray;
97    use crate::ToCanonical;
98    use crate::arrays::DictArray;
99    use crate::arrays::PrimitiveArray;
100    use crate::assert_arrays_eq;
101    use crate::builtins::ArrayBuiltins;
102    use crate::dtype::Nullability;
103    use crate::scalar::Scalar;
104    use crate::validity::Validity;
105
106    #[test]
107    fn nullable_codes_fill_in_values() {
108        let dict = DictArray::try_new(
109            PrimitiveArray::new(
110                buffer![0u32, 1, 2],
111                Validity::from(BitBuffer::from(vec![true, false, true])),
112            )
113            .into_array(),
114            PrimitiveArray::new(buffer![10, 20, 20], Validity::AllValid).into_array(),
115        )
116        .vortex_expect("operation should succeed in test");
117
118        let filled = dict
119            .into_array()
120            .fill_null(Scalar::primitive(20, Nullability::NonNullable))
121            .vortex_expect("operation should succeed in test");
122        let filled_primitive = filled.to_primitive();
123        assert_arrays_eq!(filled_primitive, PrimitiveArray::from_iter([10, 20, 20]));
124        assert!(filled_primitive.all_valid().unwrap());
125    }
126}