Skip to main content

datafusion_spark/function/string/
ilike.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::ArrayRef;
19use arrow::compute::ilike;
20use arrow::datatypes::{DataType, Field};
21use datafusion_common::{Result, exec_err, internal_err};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{
24    ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25};
26use datafusion_functions::utils::make_scalar_function;
27use std::sync::Arc;
28
29/// ILIKE function for case-insensitive pattern matching
30/// <https://spark.apache.org/docs/latest/api/sql/index.html#ilike>
31#[derive(Debug, PartialEq, Eq, Hash)]
32pub struct SparkILike {
33    signature: Signature,
34}
35
36impl Default for SparkILike {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42impl SparkILike {
43    pub fn new() -> Self {
44        Self {
45            signature: Signature::string(2, Volatility::Immutable),
46        }
47    }
48}
49
50impl ScalarUDFImpl for SparkILike {
51    fn name(&self) -> &str {
52        "ilike"
53    }
54
55    fn signature(&self) -> &Signature {
56        &self.signature
57    }
58
59    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
60        internal_err!("return_field_from_args should be used instead")
61    }
62
63    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<Arc<Field>> {
64        // ILIKE returns a boolean value
65        // The result is nullable if any of the input arguments is nullable
66        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
67        Ok(Arc::new(Field::new("ilike", DataType::Boolean, nullable)))
68    }
69
70    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
71        make_scalar_function(spark_ilike, vec![])(&args.args)
72    }
73}
74
75/// Returns true if str matches pattern (case insensitive).
76pub fn spark_ilike(args: &[ArrayRef]) -> Result<ArrayRef> {
77    if args.len() != 2 {
78        return exec_err!("ilike function requires exactly 2 arguments");
79    }
80
81    let result = ilike(&args[0], &args[1])?;
82    Ok(Arc::new(result))
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88    use crate::function::utils::test::test_scalar_function;
89    use arrow::array::{Array, BooleanArray};
90    use arrow::datatypes::DataType::Boolean;
91    use datafusion_common::ScalarValue;
92
93    macro_rules! test_ilike_string_invoke {
94        ($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
95            test_scalar_function!(
96                SparkILike::new(),
97                vec![
98                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
99                    ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
100                ],
101                $EXPECTED,
102                bool,
103                Boolean,
104                BooleanArray
105            );
106
107            test_scalar_function!(
108                SparkILike::new(),
109                vec![
110                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
111                    ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
112                ],
113                $EXPECTED,
114                bool,
115                Boolean,
116                BooleanArray
117            );
118
119            test_scalar_function!(
120                SparkILike::new(),
121                vec![
122                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
123                    ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
124                ],
125                $EXPECTED,
126                bool,
127                Boolean,
128                BooleanArray
129            );
130        };
131    }
132
133    #[test]
134    fn test_ilike_invoke() -> Result<()> {
135        test_ilike_string_invoke!(
136            Some(String::from("Spark")),
137            Some(String::from("_park")),
138            Ok(Some(true))
139        );
140        test_ilike_string_invoke!(
141            Some(String::from("Spark")),
142            Some(String::from("_PARK")),
143            Ok(Some(true))
144        );
145        test_ilike_string_invoke!(
146            Some(String::from("SPARK")),
147            Some(String::from("_park")),
148            Ok(Some(true))
149        );
150        test_ilike_string_invoke!(
151            Some(String::from("Spark")),
152            Some(String::from("sp%")),
153            Ok(Some(true))
154        );
155        test_ilike_string_invoke!(
156            Some(String::from("Spark")),
157            Some(String::from("SP%")),
158            Ok(Some(true))
159        );
160        test_ilike_string_invoke!(
161            Some(String::from("Spark")),
162            Some(String::from("%ARK")),
163            Ok(Some(true))
164        );
165        test_ilike_string_invoke!(
166            Some(String::from("Spark")),
167            Some(String::from("xyz")),
168            Ok(Some(false))
169        );
170        test_ilike_string_invoke!(None, Some(String::from("_park")), Ok(None));
171        test_ilike_string_invoke!(Some(String::from("Spark")), None, Ok(None));
172        test_ilike_string_invoke!(None, None, Ok(None));
173
174        Ok(())
175    }
176
177    #[test]
178    fn test_ilike_nullability() {
179        let ilike = SparkILike::new();
180
181        // Test with non-nullable arguments
182        let non_nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, false));
183        let non_nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, false));
184
185        let result = ilike
186            .return_field_from_args(ReturnFieldArgs {
187                arg_fields: &[
188                    Arc::clone(&non_nullable_field1),
189                    Arc::clone(&non_nullable_field2),
190                ],
191                scalar_arguments: &[None, None],
192            })
193            .unwrap();
194
195        // The result should not be nullable when both inputs are non-nullable
196        assert!(!result.is_nullable());
197        assert_eq!(result.data_type(), &Boolean);
198
199        // Test with first argument nullable
200        let nullable_field1 = Arc::new(Field::new("str", DataType::Utf8, true));
201
202        let result = ilike
203            .return_field_from_args(ReturnFieldArgs {
204                arg_fields: &[
205                    Arc::clone(&nullable_field1),
206                    Arc::clone(&non_nullable_field2),
207                ],
208                scalar_arguments: &[None, None],
209            })
210            .unwrap();
211
212        // The result should be nullable when first input is nullable
213        assert!(result.is_nullable());
214        assert_eq!(result.data_type(), &Boolean);
215
216        // Test with second argument nullable
217        let nullable_field2 = Arc::new(Field::new("pattern", DataType::Utf8, true));
218
219        let result = ilike
220            .return_field_from_args(ReturnFieldArgs {
221                arg_fields: &[
222                    Arc::clone(&non_nullable_field1),
223                    Arc::clone(&nullable_field2),
224                ],
225                scalar_arguments: &[None, None],
226            })
227            .unwrap();
228
229        // The result should be nullable when second input is nullable
230        assert!(result.is_nullable());
231        assert_eq!(result.data_type(), &Boolean);
232
233        // Test with both arguments nullable
234        let result = ilike
235            .return_field_from_args(ReturnFieldArgs {
236                arg_fields: &[Arc::clone(&nullable_field1), Arc::clone(&nullable_field2)],
237                scalar_arguments: &[None, None],
238            })
239            .unwrap();
240
241        // The result should be nullable when both inputs are nullable
242        assert!(result.is_nullable());
243        assert_eq!(result.data_type(), &Boolean);
244    }
245}