Skip to main content

vortex_array/scalar_fn/fns/
is_not_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5
6use vortex_error::VortexResult;
7use vortex_session::VortexSession;
8
9use crate::ArrayRef;
10use crate::ExecutionCtx;
11use crate::IntoArray;
12use crate::arrays::ConstantArray;
13use crate::dtype::DType;
14use crate::dtype::Nullability;
15use crate::expr::Expression;
16use crate::expr::StatsCatalog;
17use crate::expr::and;
18use crate::expr::eq;
19use crate::expr::gt;
20use crate::expr::lit;
21use crate::expr::stats::Stat;
22use crate::scalar_fn::Arity;
23use crate::scalar_fn::ChildName;
24use crate::scalar_fn::EmptyOptions;
25use crate::scalar_fn::ExecutionArgs;
26use crate::scalar_fn::ScalarFnId;
27use crate::scalar_fn::ScalarFnVTable;
28use crate::validity::Validity;
29
30/// Expression that checks for non-null values.
31#[derive(Clone)]
32pub struct IsNotNull;
33
34impl ScalarFnVTable for IsNotNull {
35    type Options = EmptyOptions;
36
37    fn id(&self) -> ScalarFnId {
38        ScalarFnId::new("vortex.is_not_null")
39    }
40
41    fn serialize(&self, _instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
42        Ok(Some(vec![]))
43    }
44
45    fn deserialize(
46        &self,
47        _metadata: &[u8],
48        _session: &VortexSession,
49    ) -> VortexResult<Self::Options> {
50        Ok(EmptyOptions)
51    }
52
53    fn arity(&self, _options: &Self::Options) -> Arity {
54        Arity::Exact(1)
55    }
56
57    fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName {
58        match child_idx {
59            0 => ChildName::from("input"),
60            _ => unreachable!("Invalid child index {} for IsNotNull expression", child_idx),
61        }
62    }
63
64    fn fmt_sql(
65        &self,
66        _options: &Self::Options,
67        expr: &Expression,
68        f: &mut Formatter<'_>,
69    ) -> std::fmt::Result {
70        write!(f, "is_not_null(")?;
71        expr.child(0).fmt_sql(f)?;
72        write!(f, ")")
73    }
74
75    fn return_dtype(&self, _options: &Self::Options, _arg_dtypes: &[DType]) -> VortexResult<DType> {
76        Ok(DType::Bool(Nullability::NonNullable))
77    }
78
79    fn execute(
80        &self,
81        _data: &Self::Options,
82        args: &dyn ExecutionArgs,
83        _ctx: &mut ExecutionCtx,
84    ) -> VortexResult<ArrayRef> {
85        let child = args.get(0)?;
86        match child.validity()? {
87            Validity::NonNullable | Validity::AllValid => {
88                Ok(ConstantArray::new(true, args.row_count()).into_array())
89            }
90            Validity::AllInvalid => Ok(ConstantArray::new(false, args.row_count()).into_array()),
91            Validity::Array(a) => Ok(a),
92        }
93    }
94
95    fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
96        true
97    }
98
99    fn is_fallible(&self, _instance: &Self::Options) -> bool {
100        false
101    }
102
103    fn stat_falsification(
104        &self,
105        _options: &Self::Options,
106        expr: &Expression,
107        catalog: &dyn StatsCatalog,
108    ) -> Option<Expression> {
109        // is_not_null is falsified when ALL values are null, i.e. null_count == len.
110        // Since there is no len stat in the zone map, we approximate using IsConstant:
111        // if the zone is constant and has any nulls, then all values must be null.
112        //
113        // TODO(#7187): Add a len stat to enable the more general falsification:
114        //   null_count == len => is_not_null is all false.
115        let null_count_expr = expr.child(0).stat_expression(Stat::NullCount, catalog)?;
116        let is_constant_expr = expr.child(0).stat_expression(Stat::IsConstant, catalog)?;
117        // If the zone is constant (is_constant == true) and has nulls (null_count > 0),
118        // then all values must be null, so is_not_null is all false.
119        Some(and(
120            eq(is_constant_expr, lit(true)),
121            gt(null_count_expr, lit(0u64)),
122        ))
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use vortex_buffer::buffer;
129    use vortex_error::VortexExpect as _;
130
131    use crate::IntoArray;
132    use crate::LEGACY_SESSION;
133    use crate::VortexSessionExecute;
134    use crate::arrays::PrimitiveArray;
135    use crate::arrays::StructArray;
136    use crate::dtype::DType;
137    use crate::dtype::Nullability;
138    use crate::expr::get_item;
139    use crate::expr::is_not_null;
140    use crate::expr::root;
141    use crate::expr::test_harness;
142    use crate::scalar::Scalar;
143
144    #[test]
145    fn dtype() {
146        let dtype = test_harness::struct_dtype();
147        assert_eq!(
148            is_not_null(root()).return_dtype(&dtype).unwrap(),
149            DType::Bool(Nullability::NonNullable)
150        );
151    }
152
153    #[test]
154    fn replace_children() {
155        let expr = is_not_null(root());
156        expr.with_children([root()])
157            .vortex_expect("operation should succeed in test");
158    }
159
160    #[test]
161    fn evaluate_mask() {
162        let test_array =
163            PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2), None, Some(3)])
164                .into_array();
165        let expected = [true, false, true, false, true];
166
167        let result = test_array.clone().apply(&is_not_null(root())).unwrap();
168
169        assert_eq!(result.len(), test_array.len());
170        assert_eq!(result.dtype(), &DType::Bool(Nullability::NonNullable));
171
172        for (i, expected_value) in expected.iter().enumerate() {
173            assert_eq!(
174                result
175                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
176                    .unwrap(),
177                Scalar::bool(*expected_value, Nullability::NonNullable)
178            );
179        }
180    }
181
182    #[test]
183    fn evaluate_all_true() {
184        let test_array = buffer![1, 2, 3, 4, 5].into_array();
185
186        let result = test_array.clone().apply(&is_not_null(root())).unwrap();
187
188        assert_eq!(result.len(), test_array.len());
189        for i in 0..result.len() {
190            assert_eq!(
191                result
192                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
193                    .unwrap(),
194                Scalar::bool(true, Nullability::NonNullable)
195            );
196        }
197    }
198
199    #[test]
200    fn evaluate_all_false() {
201        let test_array =
202            PrimitiveArray::from_option_iter(vec![None::<i32>, None, None, None, None])
203                .into_array();
204
205        let result = test_array.clone().apply(&is_not_null(root())).unwrap();
206
207        assert_eq!(result.len(), test_array.len());
208        for i in 0..result.len() {
209            assert_eq!(
210                result
211                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
212                    .unwrap(),
213                Scalar::bool(false, Nullability::NonNullable)
214            );
215        }
216    }
217
218    #[test]
219    fn evaluate_struct() {
220        let test_array = StructArray::from_fields(&[(
221            "a",
222            PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2), None, Some(3)])
223                .into_array(),
224        )])
225        .unwrap()
226        .into_array();
227        let expected = [true, false, true, false, true];
228
229        let result = test_array
230            .clone()
231            .apply(&is_not_null(get_item("a", root())))
232            .unwrap();
233
234        assert_eq!(result.len(), test_array.len());
235        assert_eq!(result.dtype(), &DType::Bool(Nullability::NonNullable));
236
237        for (i, expected_value) in expected.iter().enumerate() {
238            assert_eq!(
239                result
240                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
241                    .unwrap(),
242                Scalar::bool(*expected_value, Nullability::NonNullable)
243            );
244        }
245    }
246
247    #[test]
248    fn test_display() {
249        let expr = is_not_null(get_item("name", root()));
250        assert_eq!(expr.to_string(), "is_not_null($.name)");
251
252        let expr2 = is_not_null(root());
253        assert_eq!(expr2.to_string(), "is_not_null($)");
254    }
255
256    #[test]
257    fn test_is_not_null_sensitive() {
258        use crate::expr::col;
259        assert!(is_not_null(col("a")).signature().is_null_sensitive());
260    }
261
262    #[test]
263    fn test_is_not_null_falsification() {
264        use vortex_utils::aliases::hash_map::HashMap;
265        use vortex_utils::aliases::hash_set::HashSet;
266
267        use crate::dtype::Field;
268        use crate::dtype::FieldPath;
269        use crate::dtype::FieldPathSet;
270        use crate::expr::and;
271        use crate::expr::col;
272        use crate::expr::eq;
273        use crate::expr::gt;
274        use crate::expr::lit;
275        use crate::expr::pruning::checked_pruning_expr;
276        use crate::expr::stats::Stat;
277
278        let expr = is_not_null(col("a"));
279
280        let (pruning_expr, st) = checked_pruning_expr(
281            &expr,
282            &FieldPathSet::from_iter([
283                FieldPath::from_iter([Field::Name("a".into()), Field::Name("null_count".into())]),
284                FieldPath::from_iter([Field::Name("a".into()), Field::Name("is_constant".into())]),
285            ]),
286        )
287        .unwrap();
288
289        assert_eq!(
290            &pruning_expr,
291            &and(
292                eq(col("a_is_constant"), lit(true)),
293                gt(col("a_null_count"), lit(0u64)),
294            )
295        );
296        assert_eq!(
297            st.map(),
298            &HashMap::from_iter([(
299                FieldPath::from_name("a"),
300                HashSet::from([Stat::NullCount, Stat::IsConstant])
301            )])
302        );
303    }
304}