vortex_array/expr/exprs/
is_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5use std::ops::Not;
6
7use vortex_dtype::DType;
8use vortex_dtype::Nullability;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_mask::Mask;
12use vortex_vector::Datum;
13use vortex_vector::ScalarOps;
14use vortex_vector::VectorOps;
15use vortex_vector::bool::BoolScalar;
16use vortex_vector::bool::BoolVector;
17
18use crate::Array;
19use crate::ArrayRef;
20use crate::IntoArray;
21use crate::arrays::BoolArray;
22use crate::arrays::ConstantArray;
23use crate::expr::Arity;
24use crate::expr::ChildName;
25use crate::expr::EmptyOptions;
26use crate::expr::ExecutionArgs;
27use crate::expr::ExprId;
28use crate::expr::Expression;
29use crate::expr::StatsCatalog;
30use crate::expr::VTable;
31use crate::expr::VTableExt;
32use crate::expr::exprs::binary::eq;
33use crate::expr::exprs::literal::lit;
34use crate::expr::stats::Stat;
35
36/// Expression that checks for null values.
37pub struct IsNull;
38
39impl VTable for IsNull {
40    type Options = EmptyOptions;
41
42    fn id(&self) -> ExprId {
43        ExprId::new_ref("is_null")
44    }
45
46    fn serialize(&self, _instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
47        Ok(Some(vec![]))
48    }
49
50    fn deserialize(&self, _metadata: &[u8]) -> VortexResult<Self::Options> {
51        Ok(EmptyOptions)
52    }
53
54    fn arity(&self, _options: &Self::Options) -> Arity {
55        Arity::Exact(1)
56    }
57
58    fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName {
59        match child_idx {
60            0 => ChildName::from("input"),
61            _ => unreachable!("Invalid child index {} for IsNull expression", child_idx),
62        }
63    }
64
65    fn fmt_sql(
66        &self,
67        _options: &Self::Options,
68        expr: &Expression,
69        f: &mut Formatter<'_>,
70    ) -> std::fmt::Result {
71        write!(f, "is_null(")?;
72        expr.child(0).fmt_sql(f)?;
73        write!(f, ")")
74    }
75
76    fn return_dtype(&self, _options: &Self::Options, _arg_dtypes: &[DType]) -> VortexResult<DType> {
77        Ok(DType::Bool(Nullability::NonNullable))
78    }
79
80    fn evaluate(
81        &self,
82        _options: &Self::Options,
83        expr: &Expression,
84        scope: &ArrayRef,
85    ) -> VortexResult<ArrayRef> {
86        let array = expr.child(0).evaluate(scope)?;
87        match array.validity_mask() {
88            Mask::AllTrue(len) => Ok(ConstantArray::new(false, len).into_array()),
89            Mask::AllFalse(len) => Ok(ConstantArray::new(true, len).into_array()),
90            Mask::Values(mask) => Ok(BoolArray::from(mask.bit_buffer().not()).into_array()),
91        }
92    }
93
94    fn execute(&self, _data: &Self::Options, mut args: ExecutionArgs) -> VortexResult<Datum> {
95        let child = args.datums.pop().vortex_expect("Missing input child");
96        Ok(match child {
97            Datum::Scalar(s) => Datum::Scalar(BoolScalar::new(Some(s.is_null())).into()),
98            Datum::Vector(v) => Datum::Vector(
99                BoolVector::new(v.validity().to_bit_buffer().not(), Mask::new_true(v.len())).into(),
100            ),
101        })
102    }
103
104    fn stat_falsification(
105        &self,
106        _options: &Self::Options,
107        expr: &Expression,
108        catalog: &dyn StatsCatalog,
109    ) -> Option<Expression> {
110        let null_count_expr = expr.child(0).stat_expression(Stat::NullCount, catalog)?;
111        Some(eq(null_count_expr, lit(0u64)))
112    }
113
114    fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
115        true
116    }
117
118    fn is_fallible(&self, _instance: &Self::Options) -> bool {
119        false
120    }
121}
122
123/// Creates an expression that checks for null values.
124///
125/// Returns a boolean array indicating which positions contain null values.
126///
127/// ```rust
128/// # use vortex_array::expr::{is_null, root};
129/// let expr = is_null(root());
130/// ```
131pub fn is_null(child: Expression) -> Expression {
132    IsNull.new_expr(EmptyOptions, vec![child])
133}
134
135#[cfg(test)]
136mod tests {
137    use vortex_buffer::buffer;
138    use vortex_dtype::DType;
139    use vortex_dtype::Field;
140    use vortex_dtype::FieldPath;
141    use vortex_dtype::FieldPathSet;
142    use vortex_dtype::Nullability;
143    use vortex_error::VortexExpect as _;
144    use vortex_scalar::Scalar;
145    use vortex_utils::aliases::hash_map::HashMap;
146    use vortex_utils::aliases::hash_set::HashSet;
147
148    use super::is_null;
149    use crate::IntoArray;
150    use crate::arrays::PrimitiveArray;
151    use crate::arrays::StructArray;
152    use crate::expr::exprs::binary::eq;
153    use crate::expr::exprs::get_item::col;
154    use crate::expr::exprs::get_item::get_item;
155    use crate::expr::exprs::literal::lit;
156    use crate::expr::exprs::root::root;
157    use crate::expr::pruning::checked_pruning_expr;
158    use crate::expr::stats::Stat;
159    use crate::expr::test_harness;
160
161    #[test]
162    fn dtype() {
163        let dtype = test_harness::struct_dtype();
164        assert_eq!(
165            is_null(root()).return_dtype(&dtype).unwrap(),
166            DType::Bool(Nullability::NonNullable)
167        );
168    }
169
170    #[test]
171    fn replace_children() {
172        let expr = is_null(root());
173        expr.with_children([root()])
174            .vortex_expect("operation should succeed in test");
175    }
176
177    #[test]
178    fn evaluate_mask() {
179        let test_array =
180            PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2), None, Some(3)])
181                .into_array();
182        let expected = [false, true, false, true, false];
183
184        let result = is_null(root()).evaluate(&test_array.clone()).unwrap();
185
186        assert_eq!(result.len(), test_array.len());
187        assert_eq!(result.dtype(), &DType::Bool(Nullability::NonNullable));
188
189        for (i, expected_value) in expected.iter().enumerate() {
190            assert_eq!(
191                result.scalar_at(i),
192                Scalar::bool(*expected_value, Nullability::NonNullable)
193            );
194        }
195    }
196
197    #[test]
198    fn evaluate_all_false() {
199        let test_array = buffer![1, 2, 3, 4, 5].into_array();
200
201        let result = is_null(root()).evaluate(&test_array.clone()).unwrap();
202
203        assert_eq!(result.len(), test_array.len());
204        assert_eq!(
205            result.as_constant().unwrap(),
206            Scalar::bool(false, Nullability::NonNullable)
207        );
208    }
209
210    #[test]
211    fn evaluate_all_true() {
212        let test_array =
213            PrimitiveArray::from_option_iter(vec![None::<i32>, None, None, None, None])
214                .into_array();
215
216        let result = is_null(root()).evaluate(&test_array.clone()).unwrap();
217
218        assert_eq!(result.len(), test_array.len());
219        assert_eq!(
220            result.as_constant().unwrap(),
221            Scalar::bool(true, Nullability::NonNullable)
222        );
223    }
224
225    #[test]
226    fn evaluate_struct() {
227        let test_array = StructArray::from_fields(&[(
228            "a",
229            PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2), None, Some(3)])
230                .into_array(),
231        )])
232        .unwrap()
233        .into_array();
234        let expected = [false, true, false, true, false];
235
236        let result = is_null(get_item("a", root()))
237            .evaluate(&test_array.clone())
238            .unwrap();
239
240        assert_eq!(result.len(), test_array.len());
241        assert_eq!(result.dtype(), &DType::Bool(Nullability::NonNullable));
242
243        for (i, expected_value) in expected.iter().enumerate() {
244            assert_eq!(
245                result.scalar_at(i),
246                Scalar::bool(*expected_value, Nullability::NonNullable)
247            );
248        }
249    }
250
251    #[test]
252    fn test_display() {
253        let expr = is_null(get_item("name", root()));
254        assert_eq!(expr.to_string(), "is_null($.name)");
255
256        let expr2 = is_null(root());
257        assert_eq!(expr2.to_string(), "is_null($)");
258    }
259
260    #[test]
261    fn test_is_null_falsification() {
262        let expr = is_null(col("a"));
263
264        let (pruning_expr, st) = checked_pruning_expr(
265            &expr,
266            &FieldPathSet::from_iter([FieldPath::from_iter([
267                Field::Name("a".into()),
268                Field::Name("null_count".into()),
269            ])]),
270        )
271        .unwrap();
272
273        assert_eq!(&pruning_expr, &eq(col("a_null_count"), lit(0u64)));
274        assert_eq!(
275            st.map(),
276            &HashMap::from_iter([(FieldPath::from_name("a"), HashSet::from([Stat::NullCount]))])
277        );
278    }
279
280    #[test]
281    fn test_is_null_sensitive() {
282        // is_null itself is null-sensitive
283        assert!(is_null(col("a")).signature().is_null_sensitive());
284    }
285}