datafusion_functions/core/
nullif.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::DataType;
19use datafusion_expr::{ColumnarValue, Documentation, ScalarFunctionArgs};
20
21use arrow::compute::kernels::cmp::eq;
22use arrow::compute::kernels::nullif::nullif;
23use datafusion_common::{utils::take_function_args, Result, ScalarValue};
24use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
25use datafusion_macros::user_doc;
26use std::any::Any;
27
28#[user_doc(
29    doc_section(label = "Conditional Functions"),
30    description = "Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_.
31This can be used to perform the inverse operation of [`coalesce`](#coalesce).",
32    syntax_example = "nullif(expression1, expression2)",
33    sql_example = r#"```sql
34> select nullif('datafusion', 'data');
35+-----------------------------------------+
36| nullif(Utf8("datafusion"),Utf8("data")) |
37+-----------------------------------------+
38| datafusion                              |
39+-----------------------------------------+
40> select nullif('datafusion', 'datafusion');
41+-----------------------------------------------+
42| nullif(Utf8("datafusion"),Utf8("datafusion")) |
43+-----------------------------------------------+
44|                                               |
45+-----------------------------------------------+
46```"#,
47    argument(
48        name = "expression1",
49        description = "Expression to compare and return if equal to expression2. Can be a constant, column, or function, and any combination of operators."
50    ),
51    argument(
52        name = "expression2",
53        description = "Expression to compare to expression1. Can be a constant, column, or function, and any combination of operators."
54    )
55)]
56#[derive(Debug, PartialEq, Eq, Hash)]
57pub struct NullIfFunc {
58    signature: Signature,
59}
60
61impl Default for NullIfFunc {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl NullIfFunc {
68    pub fn new() -> Self {
69        Self {
70            // Documentation mentioned in Postgres,
71            // The result has the same type as the first argument — but there is a subtlety.
72            // What is actually returned is the first argument of the implied = operator,
73            // and in some cases that will have been promoted to match the second argument's type.
74            // For example, NULLIF(1, 2.2) yields numeric, because there is no integer = numeric operator, only numeric = numeric
75            //
76            // We don't strictly follow Postgres or DuckDB for **simplicity**.
77            // In this function, we will coerce arguments to the same data type for comparison need. Unlike DuckDB
78            // we don't return the **original** first argument type but return the final coerced type.
79            //
80            // In Postgres, nullif('2', 2) returns Null but nullif('2::varchar', 2) returns error.
81            // While in DuckDB both query returns Null. We follow DuckDB in this case since I think they are equivalent thing and should
82            // have the same result as well.
83            signature: Signature::comparable(2, Volatility::Immutable),
84        }
85    }
86}
87
88impl ScalarUDFImpl for NullIfFunc {
89    fn as_any(&self) -> &dyn Any {
90        self
91    }
92    fn name(&self) -> &str {
93        "nullif"
94    }
95
96    fn signature(&self) -> &Signature {
97        &self.signature
98    }
99
100    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
101        Ok(arg_types[0].to_owned())
102    }
103
104    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
105        nullif_func(&args.args)
106    }
107
108    fn documentation(&self) -> Option<&Documentation> {
109        self.doc()
110    }
111}
112
113/// Implements NULLIF(expr1, expr2)
114/// Args: 0 - left expr is any array
115///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
116fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
117    let [lhs, rhs] = take_function_args("nullif", args)?;
118
119    match (lhs, rhs) {
120        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
121            let rhs = rhs.to_scalar()?;
122            let array = nullif(lhs, &eq(&lhs, &rhs)?)?;
123
124            Ok(ColumnarValue::Array(array))
125        }
126        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
127            let array = nullif(lhs, &eq(&lhs, &rhs)?)?;
128            Ok(ColumnarValue::Array(array))
129        }
130        (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
131            let lhs_s = lhs.to_scalar()?;
132            let lhs_a = lhs.to_array_of_size(rhs.len())?;
133            let array = nullif(
134                // nullif in arrow-select does not support Datum, so we need to convert to array
135                lhs_a.as_ref(),
136                &eq(&lhs_s, &rhs)?,
137            )?;
138            Ok(ColumnarValue::Array(array))
139        }
140        (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
141            let val: ScalarValue = match lhs.eq(rhs) {
142                true => lhs.data_type().try_into()?,
143                false => lhs.clone(),
144            };
145
146            Ok(ColumnarValue::Scalar(val))
147        }
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use std::sync::Arc;
154
155    use arrow::array::*;
156
157    use super::*;
158
159    #[test]
160    fn nullif_int32() -> Result<()> {
161        let a = Int32Array::from(vec![
162            Some(1),
163            Some(2),
164            None,
165            None,
166            Some(3),
167            None,
168            None,
169            Some(4),
170            Some(5),
171        ]);
172        let a = ColumnarValue::Array(Arc::new(a));
173
174        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
175
176        let result = nullif_func(&[a, lit_array])?;
177        let result = result.into_array(0).expect("Failed to convert to array");
178
179        let expected = Arc::new(Int32Array::from(vec![
180            Some(1),
181            None,
182            None,
183            None,
184            Some(3),
185            None,
186            None,
187            Some(4),
188            Some(5),
189        ])) as ArrayRef;
190        assert_eq!(expected.as_ref(), result.as_ref());
191        Ok(())
192    }
193
194    #[test]
195    // Ensure that arrays with no nulls can also invoke NULLIF() correctly
196    fn nullif_int32_non_nulls() -> Result<()> {
197        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
198        let a = ColumnarValue::Array(Arc::new(a));
199
200        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
201
202        let result = nullif_func(&[a, lit_array])?;
203        let result = result.into_array(0).expect("Failed to convert to array");
204
205        let expected = Arc::new(Int32Array::from(vec![
206            None,
207            Some(3),
208            Some(10),
209            Some(7),
210            Some(8),
211            None,
212            Some(2),
213            Some(4),
214            Some(5),
215        ])) as ArrayRef;
216        assert_eq!(expected.as_ref(), result.as_ref());
217        Ok(())
218    }
219
220    #[test]
221    fn nullif_boolean() -> Result<()> {
222        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
223        let a = ColumnarValue::Array(Arc::new(a));
224
225        let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
226
227        let result = nullif_func(&[a, lit_array])?;
228        let result = result.into_array(0).expect("Failed to convert to array");
229
230        let expected =
231            Arc::new(BooleanArray::from(vec![Some(true), None, None])) as ArrayRef;
232
233        assert_eq!(expected.as_ref(), result.as_ref());
234        Ok(())
235    }
236
237    #[test]
238    fn nullif_string() -> Result<()> {
239        let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
240        let a = ColumnarValue::Array(Arc::new(a));
241
242        let lit_array = ColumnarValue::Scalar(ScalarValue::from("bar"));
243
244        let result = nullif_func(&[a, lit_array])?;
245        let result = result.into_array(0).expect("Failed to convert to array");
246
247        let expected = Arc::new(StringArray::from(vec![
248            Some("foo"),
249            None,
250            None,
251            Some("baz"),
252        ])) as ArrayRef;
253
254        assert_eq!(expected.as_ref(), result.as_ref());
255        Ok(())
256    }
257
258    #[test]
259    fn nullif_literal_first() -> Result<()> {
260        let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]);
261        let a = ColumnarValue::Array(Arc::new(a));
262
263        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
264
265        let result = nullif_func(&[lit_array, a])?;
266        let result = result.into_array(0).expect("Failed to convert to array");
267
268        let expected = Arc::new(Int32Array::from(vec![
269            Some(2),
270            None,
271            Some(2),
272            Some(2),
273            Some(2),
274            Some(2),
275        ])) as ArrayRef;
276        assert_eq!(expected.as_ref(), result.as_ref());
277        Ok(())
278    }
279
280    #[test]
281    fn nullif_scalar() -> Result<()> {
282        let a_eq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
283        let b_eq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
284
285        let result_eq = nullif_func(&[a_eq, b_eq])?;
286        let result_eq = result_eq.into_array(1).expect("Failed to convert to array");
287
288        let expected_eq = Arc::new(Int32Array::from(vec![None])) as ArrayRef;
289
290        assert_eq!(expected_eq.as_ref(), result_eq.as_ref());
291
292        let a_neq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
293        let b_neq = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
294
295        let result_neq = nullif_func(&[a_neq, b_neq])?;
296        let result_neq = result_neq
297            .into_array(1)
298            .expect("Failed to convert to array");
299
300        let expected_neq = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
301        assert_eq!(expected_neq.as_ref(), result_neq.as_ref());
302
303        Ok(())
304    }
305}