vortex_array/compute/
fill_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::DType;
8use vortex_error::VortexError;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_err;
12use vortex_scalar::Scalar;
13
14use crate::Array;
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::arrays::ConstantArray;
18use crate::compute::ComputeFn;
19use crate::compute::ComputeFnVTable;
20use crate::compute::InvocationArgs;
21use crate::compute::Kernel;
22use crate::compute::Output;
23use crate::compute::cast;
24use crate::vtable::VTable;
25
26static FILL_NULL_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
27    let compute = ComputeFn::new("fill_null".into(), ArcRef::new_ref(&FillNull));
28    for kernel in inventory::iter::<FillNullKernelRef> {
29        compute.register_kernel(kernel.0.clone());
30    }
31    compute
32});
33
34pub(crate) fn warm_up_vtable() -> usize {
35    FILL_NULL_FN.kernels().len()
36}
37
38/// Replace nulls in the array with another value.
39///
40/// # Examples
41///
42/// ```
43/// use vortex_array::arrays::{PrimitiveArray};
44/// use vortex_array::compute::{fill_null};
45/// use vortex_scalar::Scalar;
46///
47/// let array =
48///     PrimitiveArray::from_option_iter([Some(0i32), None, Some(1i32), None, Some(2i32)]);
49/// let array = fill_null(array.as_ref(), &Scalar::from(42i32)).unwrap();
50/// assert_eq!(array.display_values().to_string(), "[0i32, 42i32, 1i32, 42i32, 2i32]");
51/// ```
52pub fn fill_null(array: &dyn Array, fill_value: &Scalar) -> VortexResult<ArrayRef> {
53    FILL_NULL_FN
54        .invoke(&InvocationArgs {
55            inputs: &[array.into(), fill_value.into()],
56            options: &(),
57        })?
58        .unwrap_array()
59}
60
61pub trait FillNullKernel: VTable {
62    /// Kernel for replacing null values in an array with a fill value.
63    ///
64    /// TODO(connor): Actually enforce these constraints (so that casts do not fail).
65    ///
66    /// Implementations can assume that:
67    /// - The array has at least one null value (not all valid, not all invalid).
68    /// - The fill value is non-null.
69    /// - For decimal arrays, the fill value can be successfully cast to the array's storage type.
70    fn fill_null(&self, array: &Self::Array, fill_value: &Scalar) -> VortexResult<ArrayRef>;
71}
72
73pub struct FillNullKernelRef(ArcRef<dyn Kernel>);
74inventory::collect!(FillNullKernelRef);
75
76#[derive(Debug)]
77pub struct FillNullKernelAdapter<V: VTable>(pub V);
78
79impl<V: VTable + FillNullKernel> FillNullKernelAdapter<V> {
80    pub const fn lift(&'static self) -> FillNullKernelRef {
81        FillNullKernelRef(ArcRef::new_ref(self))
82    }
83}
84
85impl<V: VTable + FillNullKernel> Kernel for FillNullKernelAdapter<V> {
86    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
87        let inputs = FillNullArgs::try_from(args)?;
88        let Some(array) = inputs.array.as_opt::<V>() else {
89            return Ok(None);
90        };
91        Ok(Some(
92            V::fill_null(&self.0, array, inputs.fill_value)?.into(),
93        ))
94    }
95}
96
97struct FillNull;
98
99impl ComputeFnVTable for FillNull {
100    fn invoke(
101        &self,
102        args: &InvocationArgs,
103        kernels: &[ArcRef<dyn Kernel>],
104    ) -> VortexResult<Output> {
105        let FillNullArgs { array, fill_value } = FillNullArgs::try_from(args)?;
106
107        if !array.dtype().is_nullable() || array.all_valid() {
108            return Ok(cast(array, fill_value.dtype())?.into());
109        }
110
111        if array.all_invalid() {
112            return Ok(ConstantArray::new(fill_value.clone(), array.len())
113                .into_array()
114                .into());
115        }
116
117        if fill_value.is_null() {
118            vortex_bail!("Cannot fill_null with a null value")
119        }
120
121        for kernel in kernels {
122            if let Some(output) = kernel.invoke(args)? {
123                return Ok(output);
124            }
125        }
126        if let Some(output) = array.invoke(&FILL_NULL_FN, args)? {
127            return Ok(output);
128        }
129
130        tracing::debug!("FillNullFn not implemented for {}", array.encoding_id());
131        if !array.is_canonical() {
132            let canonical_arr = array.to_canonical().into_array();
133            return Ok(fill_null(canonical_arr.as_ref(), fill_value)?.into());
134        }
135
136        // TODO(joe): update fuzzer when fixed
137        vortex_bail!("fill null not implemented for DType {}", array.dtype())
138    }
139
140    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
141        let FillNullArgs { array, fill_value } = FillNullArgs::try_from(args)?;
142        if !array.dtype().eq_ignore_nullability(fill_value.dtype()) {
143            vortex_bail!("FillNull value must match array type (ignoring nullability)");
144        }
145        Ok(fill_value.dtype().clone())
146    }
147
148    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
149        let FillNullArgs { array, .. } = FillNullArgs::try_from(args)?;
150        Ok(array.len())
151    }
152
153    fn is_elementwise(&self) -> bool {
154        true
155    }
156}
157
158struct FillNullArgs<'a> {
159    array: &'a dyn Array,
160    fill_value: &'a Scalar,
161}
162
163impl<'a> TryFrom<&InvocationArgs<'a>> for FillNullArgs<'a> {
164    type Error = VortexError;
165
166    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
167        if value.inputs.len() != 2 {
168            vortex_bail!("FillNull requires 2 arguments");
169        }
170
171        let array = value.inputs[0]
172            .array()
173            .ok_or_else(|| vortex_err!("FillNull requires an array"))?;
174        let fill_value = value.inputs[1]
175            .scalar()
176            .ok_or_else(|| vortex_err!("FillNull requires a scalar"))?;
177
178        Ok(FillNullArgs { array, fill_value })
179    }
180}