Skip to main content

vortex_array/arrays/dict/compute/
fill_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexResult;
5
6use super::DictArray;
7use super::DictVTable;
8use crate::Array;
9use crate::ArrayRef;
10use crate::Canonical;
11use crate::ExecutionCtx;
12use crate::IntoArray;
13use crate::arrays::BoolArray;
14use crate::arrays::ConstantArray;
15use crate::builtins::ArrayBuiltins;
16use crate::match_each_integer_ptype;
17use crate::scalar::Scalar;
18use crate::scalar::ScalarValue;
19use crate::scalar_fn::fns::fill_null::FillNullKernel;
20use crate::scalar_fn::fns::operators::Operator;
21
22impl FillNullKernel for DictVTable {
23    fn fill_null(
24        array: &DictArray,
25        fill_value: &Scalar,
26        ctx: &mut ExecutionCtx,
27    ) -> VortexResult<Option<ArrayRef>> {
28        // If the fill value already exists in the dictionary, we can simply rewrite the null codes
29        // to point to the value.
30        let found_fill_values = array
31            .values()
32            .to_array()
33            .binary(
34                ConstantArray::new(fill_value.clone(), array.values().len()).to_array(),
35                Operator::Eq,
36            )?
37            .execute::<BoolArray>(ctx)?;
38
39        // We found the fill value already in the values at this given index.
40        let Some(existing_fill_value_index) =
41            found_fill_values.to_bit_buffer().set_indices().next()
42        else {
43            // No fill values found, so we must canonicalize and fill_null.
44            return Ok(Some(
45                array
46                    .to_array()
47                    .execute::<Canonical>(ctx)?
48                    .into_array()
49                    .fill_null(fill_value.clone())?,
50            ));
51        };
52
53        // Now we rewrite the nullable codes to point at the fill value.
54        let codes = array.codes();
55
56        // Cast the index to the correct unsigned integer type matching the codes' ptype.
57        let codes_ptype = codes.dtype().as_ptype();
58
59        #[expect(
60            clippy::cast_possible_truncation,
61            reason = "The existing index must be representable by the existing ptype"
62        )]
63        let fill_scalar_value = match_each_integer_ptype!(codes_ptype, |P| {
64            ScalarValue::from(existing_fill_value_index as P)
65        });
66
67        // Fill nulls in both the codes and the values. Note that the precondition of this function
68        // states that the fill value is non-null, so we do not have to worry about the nullability.
69        let codes = codes.to_array().fill_null(Scalar::try_new(
70            codes.dtype().as_nonnullable(),
71            Some(fill_scalar_value),
72        )?)?;
73        let values = array.values().to_array().fill_null(fill_value.clone())?;
74
75        // SAFETY: invariants are still satisfied after patching nulls.
76        unsafe {
77            Ok(Some(
78                DictArray::new_unchecked(codes, values)
79                    .set_all_values_referenced(array.has_all_values_referenced())
80                    .into_array(),
81            ))
82        }
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use vortex_buffer::BitBuffer;
89    use vortex_buffer::buffer;
90    use vortex_error::VortexExpect;
91
92    use crate::IntoArray;
93    use crate::ToCanonical;
94    use crate::arrays::PrimitiveArray;
95    use crate::arrays::dict::DictArray;
96    use crate::assert_arrays_eq;
97    use crate::builtins::ArrayBuiltins;
98    use crate::dtype::Nullability;
99    use crate::scalar::Scalar;
100    use crate::validity::Validity;
101
102    #[test]
103    fn nullable_codes_fill_in_values() {
104        let dict = DictArray::try_new(
105            PrimitiveArray::new(
106                buffer![0u32, 1, 2],
107                Validity::from(BitBuffer::from(vec![true, false, true])),
108            )
109            .into_array(),
110            PrimitiveArray::new(buffer![10, 20, 20], Validity::AllValid).into_array(),
111        )
112        .vortex_expect("operation should succeed in test");
113
114        let filled = dict
115            .to_array()
116            .fill_null(Scalar::primitive(20, Nullability::NonNullable))
117            .vortex_expect("operation should succeed in test");
118        let filled_primitive = filled.to_primitive();
119        assert_arrays_eq!(filled_primitive, PrimitiveArray::from_iter([10, 20, 20]));
120        assert!(filled_primitive.all_valid().unwrap());
121    }
122}