Skip to main content

datafusion_physical_expr/expressions/
is_null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! IS NULL expression
19
20use crate::PhysicalExpr;
21use arrow::{
22    datatypes::{DataType, Schema},
23    record_batch::RecordBatch,
24};
25use datafusion_common::Result;
26use datafusion_common::ScalarValue;
27use datafusion_expr::ColumnarValue;
28use std::hash::Hash;
29use std::sync::Arc;
30
31/// IS NULL expression
32#[derive(Debug, Eq)]
33pub struct IsNullExpr {
34    /// Input expression
35    arg: Arc<dyn PhysicalExpr>,
36}
37
38// Manually derive PartialEq and Hash to work around https://github.com/rust-lang/rust/issues/78808
39impl PartialEq for IsNullExpr {
40    fn eq(&self, other: &Self) -> bool {
41        self.arg.eq(&other.arg)
42    }
43}
44
45impl Hash for IsNullExpr {
46    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
47        self.arg.hash(state);
48    }
49}
50
51impl IsNullExpr {
52    /// Create new not expression
53    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
54        Self { arg }
55    }
56
57    /// Get the input expression
58    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
59        &self.arg
60    }
61}
62
63impl std::fmt::Display for IsNullExpr {
64    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
65        write!(f, "{} IS NULL", self.arg)
66    }
67}
68
69impl PhysicalExpr for IsNullExpr {
70    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
71        Ok(DataType::Boolean)
72    }
73
74    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
75        Ok(false)
76    }
77
78    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
79        let arg = self.arg.evaluate(batch)?;
80        match arg {
81            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
82                arrow::compute::is_null(&array)?,
83            ))),
84            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
85                ScalarValue::Boolean(Some(scalar.is_null())),
86            )),
87        }
88    }
89
90    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
91        vec![&self.arg]
92    }
93
94    fn with_new_children(
95        self: Arc<Self>,
96        children: Vec<Arc<dyn PhysicalExpr>>,
97    ) -> Result<Arc<dyn PhysicalExpr>> {
98        Ok(Arc::new(IsNullExpr::new(Arc::clone(&children[0]))))
99    }
100
101    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        self.arg.fmt_sql(f)?;
103        write!(f, " IS NULL")
104    }
105}
106
107/// Create an IS NULL expression
108pub fn is_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
109    Ok(Arc::new(IsNullExpr::new(arg)))
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115    use crate::expressions::col;
116    use arrow::array::{
117        Array, BooleanArray, Float64Array, Int32Array, StringArray, UnionArray,
118    };
119    use arrow::buffer::ScalarBuffer;
120    use arrow::datatypes::*;
121    use datafusion_common::cast::as_boolean_array;
122    use datafusion_physical_expr_common::physical_expr::fmt_sql;
123
124    #[test]
125    fn is_null_op() -> Result<()> {
126        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
127        let a = StringArray::from(vec![Some("foo"), None]);
128
129        // expression: "a is null"
130        let expr = is_null(col("a", &schema)?).unwrap();
131        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
132
133        let result = expr
134            .evaluate(&batch)?
135            .into_array(batch.num_rows())
136            .expect("Failed to convert to array");
137        let result =
138            as_boolean_array(&result).expect("failed to downcast to BooleanArray");
139
140        let expected = &BooleanArray::from(vec![false, true]);
141
142        assert_eq!(expected, result);
143
144        Ok(())
145    }
146
147    fn union_fields() -> UnionFields {
148        [
149            (0, Arc::new(Field::new("A", DataType::Int32, true))),
150            (1, Arc::new(Field::new("B", DataType::Float64, true))),
151            (2, Arc::new(Field::new("C", DataType::Utf8, true))),
152        ]
153        .into_iter()
154        .collect()
155    }
156
157    #[test]
158    fn sparse_union_is_null() {
159        // union of [{A=1}, {A=}, {B=1.1}, {B=1.2}, {B=}, {C=}, {C="a"}]
160        let int_array =
161            Int32Array::from(vec![Some(1), None, None, None, None, None, None]);
162        let float_array =
163            Float64Array::from(vec![None, None, Some(1.1), Some(1.2), None, None, None]);
164        let str_array =
165            StringArray::from(vec![None, None, None, None, None, None, Some("a")]);
166        let type_ids = [0, 0, 1, 1, 1, 2, 2]
167            .into_iter()
168            .collect::<ScalarBuffer<i8>>();
169
170        let children = vec![
171            Arc::new(int_array) as Arc<dyn Array>,
172            Arc::new(float_array),
173            Arc::new(str_array),
174        ];
175
176        let array =
177            UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
178
179        let result = arrow::compute::is_null(&array).unwrap();
180
181        let expected =
182            &BooleanArray::from(vec![false, true, false, false, true, true, false]);
183        assert_eq!(expected, &result);
184    }
185
186    #[test]
187    fn dense_union_is_null() {
188        // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
189        let int_array = Int32Array::from(vec![Some(1), None]);
190        let float_array = Float64Array::from(vec![Some(3.2), None]);
191        let str_array = StringArray::from(vec![Some("a"), None]);
192        let type_ids = [0, 0, 1, 1, 2, 2].into_iter().collect::<ScalarBuffer<i8>>();
193        let offsets = [0, 1, 0, 1, 0, 1]
194            .into_iter()
195            .collect::<ScalarBuffer<i32>>();
196
197        let children = vec![
198            Arc::new(int_array) as Arc<dyn Array>,
199            Arc::new(float_array),
200            Arc::new(str_array),
201        ];
202
203        let array =
204            UnionArray::try_new(union_fields(), type_ids, Some(offsets), children)
205                .unwrap();
206
207        let result = arrow::compute::is_null(&array).unwrap();
208
209        let expected = &BooleanArray::from(vec![false, true, false, true, false, true]);
210        assert_eq!(expected, &result);
211    }
212
213    #[test]
214    fn test_fmt_sql() -> Result<()> {
215        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
216
217        // expression: "a is null"
218        let expr = is_null(col("a", &schema)?).unwrap();
219        let display_string = expr.to_string();
220        assert_eq!(display_string, "a@0 IS NULL");
221        let sql_string = fmt_sql(expr.as_ref()).to_string();
222        assert_eq!(sql_string, "a IS NULL");
223
224        Ok(())
225    }
226}