datafusion_physical_expr/expressions/
is_not_null.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! IS NOT NULL expression
19
20use crate::PhysicalExpr;
21use arrow::{
22    datatypes::{DataType, Schema},
23    record_batch::RecordBatch,
24};
25use datafusion_common::Result;
26use datafusion_common::ScalarValue;
27use datafusion_expr::ColumnarValue;
28use std::hash::Hash;
29use std::{any::Any, sync::Arc};
30
31/// IS NOT NULL expression
32#[derive(Debug, Eq)]
33pub struct IsNotNullExpr {
34    /// The input expression
35    arg: Arc<dyn PhysicalExpr>,
36}
37
38// Manually derive PartialEq and Hash to work around https://github.com/rust-lang/rust/issues/78808
39impl PartialEq for IsNotNullExpr {
40    fn eq(&self, other: &Self) -> bool {
41        self.arg.eq(&other.arg)
42    }
43}
44
45impl Hash for IsNotNullExpr {
46    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
47        self.arg.hash(state);
48    }
49}
50
51impl IsNotNullExpr {
52    /// Create new not expression
53    pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
54        Self { arg }
55    }
56
57    /// Get the input expression
58    pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
59        &self.arg
60    }
61}
62
63impl std::fmt::Display for IsNotNullExpr {
64    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
65        write!(f, "{} IS NOT NULL", self.arg)
66    }
67}
68
69impl PhysicalExpr for IsNotNullExpr {
70    /// Return a reference to Any that can be used for downcasting
71    fn as_any(&self) -> &dyn Any {
72        self
73    }
74
75    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
76        Ok(DataType::Boolean)
77    }
78
79    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
80        Ok(false)
81    }
82
83    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
84        let arg = self.arg.evaluate(batch)?;
85        match arg {
86            ColumnarValue::Array(array) => {
87                let is_not_null = arrow::compute::is_not_null(&array)?;
88                Ok(ColumnarValue::Array(Arc::new(is_not_null)))
89            }
90            ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
91                ScalarValue::Boolean(Some(!scalar.is_null())),
92            )),
93        }
94    }
95
96    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
97        vec![&self.arg]
98    }
99
100    fn with_new_children(
101        self: Arc<Self>,
102        children: Vec<Arc<dyn PhysicalExpr>>,
103    ) -> Result<Arc<dyn PhysicalExpr>> {
104        Ok(Arc::new(IsNotNullExpr::new(Arc::clone(&children[0]))))
105    }
106
107    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        self.arg.fmt_sql(f)?;
109        write!(f, " IS NOT NULL")
110    }
111}
112
113/// Create an IS NOT NULL expression
114pub fn is_not_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
115    Ok(Arc::new(IsNotNullExpr::new(arg)))
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121    use crate::expressions::col;
122    use arrow::array::{
123        Array, BooleanArray, Float64Array, Int32Array, StringArray, UnionArray,
124    };
125    use arrow::buffer::ScalarBuffer;
126    use arrow::datatypes::*;
127    use datafusion_common::cast::as_boolean_array;
128    use datafusion_physical_expr_common::physical_expr::fmt_sql;
129
130    #[test]
131    fn is_not_null_op() -> Result<()> {
132        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
133        let a = StringArray::from(vec![Some("foo"), None]);
134        let expr = is_not_null(col("a", &schema)?).unwrap();
135        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
136
137        // expression: "a is not null"
138        let result = expr
139            .evaluate(&batch)?
140            .into_array(batch.num_rows())
141            .expect("Failed to convert to array");
142        let result =
143            as_boolean_array(&result).expect("failed to downcast to BooleanArray");
144
145        let expected = &BooleanArray::from(vec![true, false]);
146
147        assert_eq!(expected, result);
148
149        Ok(())
150    }
151
152    #[test]
153    fn union_is_not_null_op() {
154        // union of [{A=1}, {A=}, {B=1.1}, {B=1.2}, {B=}]
155        let int_array = Int32Array::from(vec![Some(1), None, None, None, None]);
156        let float_array =
157            Float64Array::from(vec![None, None, Some(1.1), Some(1.2), None]);
158        let type_ids = [0, 0, 1, 1, 1].into_iter().collect::<ScalarBuffer<i8>>();
159
160        let children = vec![Arc::new(int_array) as Arc<dyn Array>, Arc::new(float_array)];
161
162        let union_fields: UnionFields = [
163            (0, Arc::new(Field::new("A", DataType::Int32, true))),
164            (1, Arc::new(Field::new("B", DataType::Float64, true))),
165        ]
166        .into_iter()
167        .collect();
168
169        let array =
170            UnionArray::try_new(union_fields.clone(), type_ids, None, children).unwrap();
171
172        let field = Field::new(
173            "my_union",
174            DataType::Union(union_fields, UnionMode::Sparse),
175            true,
176        );
177
178        let schema = Schema::new(vec![field]);
179        let expr = is_not_null(col("my_union", &schema).unwrap()).unwrap();
180        let batch =
181            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap();
182
183        // expression: "a is not null"
184        let actual = expr
185            .evaluate(&batch)
186            .unwrap()
187            .into_array(batch.num_rows())
188            .expect("Failed to convert to array");
189        let actual = as_boolean_array(&actual).unwrap();
190
191        let expected = &BooleanArray::from(vec![true, false, true, true, false]);
192
193        assert_eq!(expected, actual);
194    }
195
196    #[test]
197    fn test_fmt_sql() -> Result<()> {
198        let union_fields: UnionFields = [
199            (0, Arc::new(Field::new("A", DataType::Int32, true))),
200            (1, Arc::new(Field::new("B", DataType::Float64, true))),
201        ]
202        .into_iter()
203        .collect();
204
205        let field = Field::new(
206            "my_union",
207            DataType::Union(union_fields, UnionMode::Sparse),
208            true,
209        );
210
211        let schema = Schema::new(vec![field]);
212        let expr = is_not_null(col("my_union", &schema).unwrap()).unwrap();
213        let display_string = expr.to_string();
214        assert_eq!(display_string, "my_union@0 IS NOT NULL");
215        let sql_string = fmt_sql(expr.as_ref()).to_string();
216        assert_eq!(sql_string, "my_union IS NOT NULL");
217
218        Ok(())
219    }
220}