datafusion_physical_expr/expressions/
is_null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! IS NULL expression
19
20use crate::PhysicalExpr;
21use arrow::datatypes::FieldRef;
22use arrow::{
23    datatypes::{DataType, Schema},
24    record_batch::RecordBatch,
25};
26use datafusion_common::Result;
27use datafusion_common::ScalarValue;
28use datafusion_expr::ColumnarValue;
29use std::hash::Hash;
30use std::{any::Any, sync::Arc};
31
32/// IS NULL expression
33#[derive(Debug, Eq)]
34pub struct IsNullExpr {
35    /// Input expression
36    arg: Arc<dyn PhysicalExpr>,
37}
38
39// Manually derive PartialEq and Hash to work around https://github.com/rust-lang/rust/issues/78808
40impl PartialEq for IsNullExpr {
41    fn eq(&self, other: &Self) -> bool {
42        self.arg.eq(&other.arg)
43    }
44}
45
46impl Hash for IsNullExpr {
47    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
48        self.arg.hash(state);
49    }
50}
51
52impl IsNullExpr {
53    /// Create new not expression
54    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
55        Self { arg }
56    }
57
58    /// Get the input expression
59    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
60        &self.arg
61    }
62}
63
64impl std::fmt::Display for IsNullExpr {
65    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
66        write!(f, "{} IS NULL", self.arg)
67    }
68}
69
70impl PhysicalExpr for IsNullExpr {
71    /// Return a reference to Any that can be used for downcasting
72    fn as_any(&self) -> &dyn Any {
73        self
74    }
75
76    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
77        Ok(DataType::Boolean)
78    }
79
80    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
81        Ok(false)
82    }
83
84    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
85        let arg = self.arg.evaluate(batch)?;
86        match arg {
87            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
88                arrow::compute::is_null(&array)?,
89            ))),
90            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
91                ScalarValue::Boolean(Some(scalar.is_null())),
92            )),
93        }
94    }
95
96    fn return_field(&self, input_schema: &Schema) -> Result<FieldRef> {
97        self.arg.return_field(input_schema)
98    }
99
100    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
101        vec![&self.arg]
102    }
103
104    fn with_new_children(
105        self: Arc<Self>,
106        children: Vec<Arc<dyn PhysicalExpr>>,
107    ) -> Result<Arc<dyn PhysicalExpr>> {
108        Ok(Arc::new(IsNullExpr::new(Arc::clone(&children[0]))))
109    }
110
111    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        self.arg.fmt_sql(f)?;
113        write!(f, " IS NULL")
114    }
115}
116
117/// Create an IS NULL expression
118pub fn is_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
119    Ok(Arc::new(IsNullExpr::new(arg)))
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    use crate::expressions::col;
126    use arrow::array::{
127        Array, BooleanArray, Float64Array, Int32Array, StringArray, UnionArray,
128    };
129    use arrow::buffer::ScalarBuffer;
130    use arrow::datatypes::*;
131    use datafusion_common::cast::as_boolean_array;
132    use datafusion_physical_expr_common::physical_expr::fmt_sql;
133
134    #[test]
135    fn is_null_op() -> Result<()> {
136        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
137        let a = StringArray::from(vec![Some("foo"), None]);
138
139        // expression: "a is null"
140        let expr = is_null(col("a", &schema)?).unwrap();
141        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
142
143        let result = expr
144            .evaluate(&batch)?
145            .into_array(batch.num_rows())
146            .expect("Failed to convert to array");
147        let result =
148            as_boolean_array(&result).expect("failed to downcast to BooleanArray");
149
150        let expected = &BooleanArray::from(vec![false, true]);
151
152        assert_eq!(expected, result);
153
154        Ok(())
155    }
156
157    fn union_fields() -> UnionFields {
158        [
159            (0, Arc::new(Field::new("A", DataType::Int32, true))),
160            (1, Arc::new(Field::new("B", DataType::Float64, true))),
161            (2, Arc::new(Field::new("C", DataType::Utf8, true))),
162        ]
163        .into_iter()
164        .collect()
165    }
166
167    #[test]
168    fn sparse_union_is_null() {
169        // union of [{A=1}, {A=}, {B=1.1}, {B=1.2}, {B=}, {C=}, {C="a"}]
170        let int_array =
171            Int32Array::from(vec![Some(1), None, None, None, None, None, None]);
172        let float_array =
173            Float64Array::from(vec![None, None, Some(1.1), Some(1.2), None, None, None]);
174        let str_array =
175            StringArray::from(vec![None, None, None, None, None, None, Some("a")]);
176        let type_ids = [0, 0, 1, 1, 1, 2, 2]
177            .into_iter()
178            .collect::<ScalarBuffer<i8>>();
179
180        let children = vec![
181            Arc::new(int_array) as Arc<dyn Array>,
182            Arc::new(float_array),
183            Arc::new(str_array),
184        ];
185
186        let array =
187            UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
188
189        let result = arrow::compute::is_null(&array).unwrap();
190
191        let expected =
192            &BooleanArray::from(vec![false, true, false, false, true, true, false]);
193        assert_eq!(expected, &result);
194    }
195
196    #[test]
197    fn dense_union_is_null() {
198        // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
199        let int_array = Int32Array::from(vec![Some(1), None]);
200        let float_array = Float64Array::from(vec![Some(3.2), None]);
201        let str_array = StringArray::from(vec![Some("a"), None]);
202        let type_ids = [0, 0, 1, 1, 2, 2].into_iter().collect::<ScalarBuffer<i8>>();
203        let offsets = [0, 1, 0, 1, 0, 1]
204            .into_iter()
205            .collect::<ScalarBuffer<i32>>();
206
207        let children = vec![
208            Arc::new(int_array) as Arc<dyn Array>,
209            Arc::new(float_array),
210            Arc::new(str_array),
211        ];
212
213        let array =
214            UnionArray::try_new(union_fields(), type_ids, Some(offsets), children)
215                .unwrap();
216
217        let result = arrow::compute::is_null(&array).unwrap();
218
219        let expected = &BooleanArray::from(vec![false, true, false, true, false, true]);
220        assert_eq!(expected, &result);
221    }
222
223    #[test]
224    fn test_fmt_sql() -> Result<()> {
225        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
226
227        // expression: "a is null"
228        let expr = is_null(col("a", &schema)?).unwrap();
229        let display_string = expr.to_string();
230        assert_eq!(display_string, "a@0 IS NULL");
231        let sql_string = fmt_sql(expr.as_ref()).to_string();
232        assert_eq!(sql_string, "a IS NULL");
233
234        Ok(())
235    }
236}