datafusion_spark/function/string/
ilike.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::ArrayRef;
19use arrow::compute::ilike;
20use arrow::datatypes::{DataType, Field};
21use datafusion_common::{Result, exec_err, internal_err};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{
24    ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25};
26use datafusion_functions::utils::make_scalar_function;
27use std::any::Any;
28use std::sync::Arc;
29
30/// ILIKE function for case-insensitive pattern matching
31/// <https://spark.apache.org/docs/latest/api/sql/index.html#ilike>
32#[derive(Debug, PartialEq, Eq, Hash)]
33pub struct SparkILike {
34    signature: Signature,
35}
36
37impl Default for SparkILike {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl SparkILike {
44    pub fn new() -> Self {
45        Self {
46            signature: Signature::string(2, Volatility::Immutable),
47        }
48    }
49}
50
51impl ScalarUDFImpl for SparkILike {
52    fn as_any(&self) -> &dyn Any {
53        self
54    }
55
56    fn name(&self) -> &str {
57        "ilike"
58    }
59
60    fn signature(&self) -> &Signature {
61        &self.signature
62    }
63
64    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
65        internal_err!("return_field_from_args should be used instead")
66    }
67
68    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<Arc<Field>> {
69        // ILIKE returns a boolean value
70        // The result is nullable if any of the input arguments is nullable
71        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
72        Ok(Arc::new(Field::new("ilike", DataType::Boolean, nullable)))
73    }
74
75    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
76        make_scalar_function(spark_ilike, vec![])(&args.args)
77    }
78}
79
80/// Returns true if str matches pattern (case insensitive).
81pub fn spark_ilike(args: &[ArrayRef]) -> Result<ArrayRef> {
82    if args.len() != 2 {
83        return exec_err!("ilike function requires exactly 2 arguments");
84    }
85
86    let result = ilike(&args[0], &args[1])?;
87    Ok(Arc::new(result))
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93    use crate::function::utils::test::test_scalar_function;
94    use arrow::array::{Array, BooleanArray};
95    use arrow::datatypes::{DataType::Boolean, Field};
96    use datafusion_common::{Result, ScalarValue};
97    use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarUDFImpl};
98
99    macro_rules! test_ilike_string_invoke {
100        ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
101            test_scalar_function!(
102                SparkILike::new(),
103                vec![
104                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
105                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
106                ],
107                $EXPECTED,
108                bool,
109                Boolean,
110                BooleanArray
111            );
112
113            test_scalar_function!(
114                SparkILike::new(),
115                vec![
116                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
117                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
118                ],
119                $EXPECTED,
120                bool,
121                Boolean,
122                BooleanArray
123            );
124
125            test_scalar_function!(
126                SparkILike::new(),
127                vec![
128                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
129                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
130                ],
131                $EXPECTED,
132                bool,
133                Boolean,
134                BooleanArray
135            );
136        };
137    }
138
139    #[test]
140    fn test_ilike_invoke() -> Result<()> {
141        test_ilike_string_invoke!(
142            Some(String::from("Spark")),
143            Some(String::from("_park")),
144            Ok(Some(true))
145        );
146        test_ilike_string_invoke!(
147            Some(String::from("Spark")),
148            Some(String::from("_PARK")),
149            Ok(Some(true))
150        );
151        test_ilike_string_invoke!(
152            Some(String::from("SPARK")),
153            Some(String::from("_park")),
154            Ok(Some(true))
155        );
156        test_ilike_string_invoke!(
157            Some(String::from("Spark")),
158            Some(String::from("sp%")),
159            Ok(Some(true))
160        );
161        test_ilike_string_invoke!(
162            Some(String::from("Spark")),
163            Some(String::from("SP%")),
164            Ok(Some(true))
165        );
166        test_ilike_string_invoke!(
167            Some(String::from("Spark")),
168            Some(String::from("%ARK")),
169            Ok(Some(true))
170        );
171        test_ilike_string_invoke!(
172            Some(String::from("Spark")),
173            Some(String::from("xyz")),
174            Ok(Some(false))
175        );
176        test_ilike_string_invoke!(None, Some(String::from("_park")), Ok(None));
177        test_ilike_string_invoke!(Some(String::from("Spark")), None, Ok(None));
178        test_ilike_string_invoke!(None, None, Ok(None));
179
180        Ok(())
181    }
182
183    #[test]
184    fn test_ilike_nullability() {
185        let ilike = SparkILike::new();
186
187        // Test with non-nullable arguments
188        let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
189        let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
190
191        let result = ilike
192            .return_field_from_args(ReturnFieldArgs {
193                arg_fields: &[
194                    Arc::clone(&non_nullable_field1),
195                    Arc::clone(&non_nullable_field2),
196                ],
197                scalar_arguments: &[None, None],
198            })
199            .unwrap();
200
201        // The result should not be nullable when both inputs are non-nullable
202        assert!(!result.is_nullable());
203        assert_eq!(result.data_type(), &Boolean);
204
205        // Test with first argument nullable
206        let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
207
208        let result = ilike
209            .return_field_from_args(ReturnFieldArgs {
210                arg_fields: &[
211                    Arc::clone(&nullable_field1),
212                    Arc::clone(&non_nullable_field2),
213                ],
214                scalar_arguments: &[None, None],
215            })
216            .unwrap();
217
218        // The result should be nullable when first input is nullable
219        assert!(result.is_nullable());
220        assert_eq!(result.data_type(), &Boolean);
221
222        // Test with second argument nullable
223        let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
224
225        let result = ilike
226            .return_field_from_args(ReturnFieldArgs {
227                arg_fields: &[
228                    Arc::clone(&non_nullable_field1),
229                    Arc::clone(&nullable_field2),
230                ],
231                scalar_arguments: &[None, None],
232            })
233            .unwrap();
234
235        // The result should be nullable when second input is nullable
236        assert!(result.is_nullable());
237        assert_eq!(result.data_type(), &Boolean);
238
239        // Test with both arguments nullable
240        let result = ilike
241            .return_field_from_args(ReturnFieldArgs {
242                arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
243                scalar_arguments: &[None, None],
244            })
245            .unwrap();
246
247        // The result should be nullable when both inputs are nullable
248        assert!(result.is_nullable());
249        assert_eq!(result.data_type(), &Boolean);
250    }
251}