Skip to main content

vortex_array/scalar_fn/fns/
is_not_null.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Formatter;
5
6use vortex_array::scalar_fn::internal::row_count::RowCount;
7use vortex_error::VortexResult;
8use vortex_session::VortexSession;
9use vortex_session::registry::CachedId;
10
11use crate::ArrayRef;
12use crate::ExecutionCtx;
13use crate::IntoArray;
14use crate::arrays::ConstantArray;
15use crate::dtype::DType;
16use crate::dtype::Nullability;
17use crate::expr::Expression;
18use crate::expr::StatsCatalog;
19use crate::expr::eq;
20use crate::expr::stats::Stat;
21use crate::scalar_fn::Arity;
22use crate::scalar_fn::ChildName;
23use crate::scalar_fn::EmptyOptions;
24use crate::scalar_fn::ExecutionArgs;
25use crate::scalar_fn::ScalarFnId;
26use crate::scalar_fn::ScalarFnVTable;
27use crate::scalar_fn::ScalarFnVTableExt;
28use crate::validity::Validity;
29
30/// Expression that checks for non-null values.
31#[derive(Clone)]
32pub struct IsNotNull;
33
34impl ScalarFnVTable for IsNotNull {
35    type Options = EmptyOptions;
36
37    fn id(&self) -> ScalarFnId {
38        static ID: CachedId = CachedId::new("vortex.is_not_null");
39        *ID
40    }
41
42    fn serialize(&self, _instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
43        Ok(Some(vec![]))
44    }
45
46    fn deserialize(
47        &self,
48        _metadata: &[u8],
49        _session: &VortexSession,
50    ) -> VortexResult<Self::Options> {
51        Ok(EmptyOptions)
52    }
53
54    fn arity(&self, _options: &Self::Options) -> Arity {
55        Arity::Exact(1)
56    }
57
58    fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName {
59        match child_idx {
60            0 => ChildName::from("input"),
61            _ => unreachable!("Invalid child index {} for IsNotNull expression", child_idx),
62        }
63    }
64
65    fn fmt_sql(
66        &self,
67        _options: &Self::Options,
68        expr: &Expression,
69        f: &mut Formatter<'_>,
70    ) -> std::fmt::Result {
71        write!(f, "is_not_null(")?;
72        expr.child(0).fmt_sql(f)?;
73        write!(f, ")")
74    }
75
76    fn return_dtype(&self, _options: &Self::Options, _arg_dtypes: &[DType]) -> VortexResult<DType> {
77        Ok(DType::Bool(Nullability::NonNullable))
78    }
79
80    fn execute(
81        &self,
82        _data: &Self::Options,
83        args: &dyn ExecutionArgs,
84        _ctx: &mut ExecutionCtx,
85    ) -> VortexResult<ArrayRef> {
86        let child = args.get(0)?;
87        match child.validity()? {
88            Validity::NonNullable | Validity::AllValid => {
89                Ok(ConstantArray::new(true, args.row_count()).into_array())
90            }
91            Validity::AllInvalid => Ok(ConstantArray::new(false, args.row_count()).into_array()),
92            Validity::Array(a) => Ok(a),
93        }
94    }
95
96    fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
97        true
98    }
99
100    fn is_fallible(&self, _instance: &Self::Options) -> bool {
101        false
102    }
103
104    fn stat_falsification(
105        &self,
106        _options: &Self::Options,
107        expr: &Expression,
108        catalog: &dyn StatsCatalog,
109    ) -> Option<Expression> {
110        // is_not_null is falsified when ALL values are null, i.e. null_count == row_count.
111        let child = expr.child(0);
112        let null_count_expr = child.stat_expression(Stat::NullCount, catalog)?;
113        Some(eq(null_count_expr, RowCount.new_expr(EmptyOptions, [])))
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use vortex_buffer::buffer;
120    use vortex_error::VortexExpect as _;
121    use vortex_utils::aliases::hash_map::HashMap;
122    use vortex_utils::aliases::hash_set::HashSet;
123
124    use crate::IntoArray;
125    use crate::LEGACY_SESSION;
126    use crate::VortexSessionExecute;
127    use crate::arrays::PrimitiveArray;
128    use crate::arrays::StructArray;
129    use crate::dtype::DType;
130    use crate::dtype::Field;
131    use crate::dtype::FieldPath;
132    use crate::dtype::FieldPathSet;
133    use crate::dtype::Nullability;
134    use crate::expr::col;
135    use crate::expr::eq;
136    use crate::expr::get_item;
137    use crate::expr::is_not_null;
138    use crate::expr::pruning::checked_pruning_expr;
139    use crate::expr::root;
140    use crate::expr::stats::Stat;
141    use crate::expr::test_harness;
142    use crate::scalar::Scalar;
143    use crate::scalar_fn::EmptyOptions;
144    use crate::scalar_fn::internal::row_count::RowCount;
145    use crate::scalar_fn::vtable::ScalarFnVTableExt;
146
147    #[test]
148    fn dtype() {
149        let dtype = test_harness::struct_dtype();
150        assert_eq!(
151            is_not_null(root()).return_dtype(&dtype).unwrap(),
152            DType::Bool(Nullability::NonNullable)
153        );
154    }
155
156    #[test]
157    fn replace_children() {
158        let expr = is_not_null(root());
159        expr.with_children([root()])
160            .vortex_expect("operation should succeed in test");
161    }
162
163    #[test]
164    fn evaluate_mask() {
165        let test_array =
166            PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2), None, Some(3)])
167                .into_array();
168        let expected = [true, false, true, false, true];
169
170        let result = test_array.clone().apply(&is_not_null(root())).unwrap();
171
172        assert_eq!(result.len(), test_array.len());
173        assert_eq!(result.dtype(), &DType::Bool(Nullability::NonNullable));
174
175        for (i, expected_value) in expected.iter().enumerate() {
176            assert_eq!(
177                result
178                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
179                    .unwrap(),
180                Scalar::bool(*expected_value, Nullability::NonNullable)
181            );
182        }
183    }
184
185    #[test]
186    fn evaluate_all_true() {
187        let test_array = buffer![1, 2, 3, 4, 5].into_array();
188
189        let result = test_array.clone().apply(&is_not_null(root())).unwrap();
190
191        assert_eq!(result.len(), test_array.len());
192        for i in 0..result.len() {
193            assert_eq!(
194                result
195                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
196                    .unwrap(),
197                Scalar::bool(true, Nullability::NonNullable)
198            );
199        }
200    }
201
202    #[test]
203    fn evaluate_all_false() {
204        let test_array =
205            PrimitiveArray::from_option_iter(vec![None::<i32>, None, None, None, None])
206                .into_array();
207
208        let result = test_array.clone().apply(&is_not_null(root())).unwrap();
209
210        assert_eq!(result.len(), test_array.len());
211        for i in 0..result.len() {
212            assert_eq!(
213                result
214                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
215                    .unwrap(),
216                Scalar::bool(false, Nullability::NonNullable)
217            );
218        }
219    }
220
221    #[test]
222    fn evaluate_struct() {
223        let test_array = StructArray::from_fields(&[(
224            "a",
225            PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2), None, Some(3)])
226                .into_array(),
227        )])
228        .unwrap()
229        .into_array();
230        let expected = [true, false, true, false, true];
231
232        let result = test_array
233            .clone()
234            .apply(&is_not_null(get_item("a", root())))
235            .unwrap();
236
237        assert_eq!(result.len(), test_array.len());
238        assert_eq!(result.dtype(), &DType::Bool(Nullability::NonNullable));
239
240        for (i, expected_value) in expected.iter().enumerate() {
241            assert_eq!(
242                result
243                    .execute_scalar(i, &mut LEGACY_SESSION.create_execution_ctx())
244                    .unwrap(),
245                Scalar::bool(*expected_value, Nullability::NonNullable)
246            );
247        }
248    }
249
250    #[test]
251    fn test_display() {
252        let expr = is_not_null(get_item("name", root()));
253        assert_eq!(expr.to_string(), "is_not_null($.name)");
254
255        let expr2 = is_not_null(root());
256        assert_eq!(expr2.to_string(), "is_not_null($)");
257    }
258
259    #[test]
260    fn test_is_not_null_sensitive() {
261        assert!(is_not_null(col("a")).signature().is_null_sensitive());
262    }
263
264    #[test]
265    fn test_is_not_null_falsification() {
266        let expr = is_not_null(col("a"));
267
268        let (pruning_expr, st) = checked_pruning_expr(
269            &expr,
270            &FieldPathSet::from_iter([FieldPath::from_iter([
271                Field::Name("a".into()),
272                Field::Name("null_count".into()),
273            ])]),
274        )
275        .unwrap();
276
277        assert_eq!(
278            &pruning_expr,
279            &eq(col("a_null_count"), RowCount.new_expr(EmptyOptions, []))
280        );
281        assert_eq!(
282            st.map(),
283            &HashMap::from_iter([(FieldPath::from_name("a"), HashSet::from([Stat::NullCount]))])
284        );
285    }
286}